In [1]:
import os
import pg8000          #pg8000 access SQL databases
import pandas as pd    #pandas will be needed to work in a dataframe

In [2]:
#code from Agata
#these are nice functions to open LIMS, make a query and then close LIMS after

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        
        #THESE ARE IMPORTANT!!!!!!
        #Every query needs to be closed when done
        cursor.close()             
        conn.close()
    return results


#this last function will take our query results and put them in a dataframe so that they are easy to work with
def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

# Practice

In [3]:
def find_slices(specimen):
    """Return the names of slices used from a particular mouse prep.
     
    Parameters
    ----------
    specimen : integer corresponding to mouse specimen name
    
    Returns
    -------
    strings of slice names
    """
   
    for file in os.listdir('Z:/Patch-Seq/all-metadata-files'):
        if str(specimen) in file:
            print file[-20:-8]


In [4]:
find_slices(387688)

387688.08.06
387688.09.06
387688.11.06


# Make user report

In [5]:
query = "SELECT s.patched_cell_container, err.recording_date \
FROM specimens s \
JOIN projects proj ON s.project_id = proj.id \
JOIN ephys_roi_results err ON s.ephys_roi_result_id = err.id \
WHERE proj.code <> 'mMPATCH' AND \
s.patched_cell_container IS NOT NULL \
AND err.recording_date > '2017-10-01'"

patched_cell_df = get_lims_dataframe(query)
patched_cell_df.head()

Unnamed: 0,patched_cell_container,recording_date
0,P8S4_171002_351_A01,2017-10-02 17:55:51
1,P9S4_171002_401_A01,2017-10-02 18:00:47
2,P8S4_171002_352_A01,2017-10-02 18:17:44
3,P2S4_171002_051_A01,2017-10-02 18:29:21
4,P8S4_171002_353_A01,2017-10-02 18:47:06


In [6]:
query2 = "SELECT s.patched_cell_container, err.recording_date, ra.failed \
FROM specimens s \
JOIN projects proj ON s.project_id = proj.id \
JOIN ephys_roi_results err ON s.ephys_roi_result_id = err.id \
JOIN rna_amplification_inputs rai on rai.sample_id = s.id \
JOIN rna_amplifications ra on ra.id = rai.rna_amplification_id \
WHERE proj.code <> 'mMPATCH' AND \
s.patched_cell_container IS NOT NULL \
AND ra.failed = 'False' \
AND err.recording_date > '2017-10-01'"

QC_passed_df = get_lims_dataframe(query2)
QC_passed_df.head()

Unnamed: 0,failed,patched_cell_container,recording_date
0,False,P8S4_171002_351_A01,2017-10-02 17:55:51
1,False,P9S4_171002_401_A01,2017-10-02 18:00:47
2,False,P2S4_171002_051_A01,2017-10-02 18:29:21
3,False,P8S4_171002_353_A01,2017-10-02 18:47:06
4,False,P2S4_171002_052_A01,2017-10-02 19:03:11


In [7]:
def cell_count(P_number, df):
    """Return the number of cells in a specified dataframe for a specified user.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
    df: a pandas dataframe
    
    Returns
    -------
    An integer cell number
    """
    user = df[df["patched_cell_container"].str.contains(P_number)]
    return user["patched_cell_container"].count()
    

In [8]:
print "Total cells patched:", cell_count('P8',patched_cell_df)
print "Cells that passed RNA seq QC:", cell_count('P8',QC_passed_df)
print "QC pass percentage:", float((cell_count('P8',QC_passed_df)))/float((cell_count('P8',patched_cell_df)))

Total cells patched: 700
Cells that passed RNA seq QC: 467
QC pass percentage: 0.667142857143


In [9]:
def per_user(P_number):
    """Returns a summary of patcher metrics for a specified user.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
        
    Returns
    -------
    Returns None
    Prints a summary of patcher metrics, including total cells patched, cells that passed RNA seq QC, and QC pass percentage
    """
    print "Total cells patched:", cell_count(P_number,patched_cell_df)
    print "Cells that passed RNA seq QC:", cell_count(P_number,QC_passed_df)
    print "QC pass percentage:", float((cell_count(P_number,QC_passed_df)))/float((cell_count(P_number,patched_cell_df)))
    

In [49]:
per_user('P2')

Total cells patched: 387
Cells that passed RNA seq QC: 267
QC pass percentage: 0.68992248062


In [11]:
query3 = "WITH do_63x(cell_id, go) AS (SELECT DISTINCT cell.id, array_to_string(array_agg(DISTINCT tag.name), ' _AND_ ') \
FROM specimens cell JOIN ephys_roi_results err ON err.id = cell.ephys_roi_result_id \
JOIN specimen_tags_specimens sptagsp ON sptagsp.specimen_id = cell.id \
JOIN specimen_tags tag ON tag.id = sptagsp.specimen_tag_id AND tag.id in (602120185,602122082) \
GROUP BY cell.id ORDER BY 1) \
SELECT s.patched_cell_container, err.recording_date, do_63x.go \
FROM specimens s \
JOIN projects proj ON s.project_id = proj.id \
LEFT JOIN do_63x ON do_63x.cell_id = s.id \
JOIN ephys_roi_results err ON s.ephys_roi_result_id = err.id \
WHERE proj.code <> 'mMPATCH' AND \
s.patched_cell_container IS NOT NULL \
AND err.recording_date > '2017-10-01'"

image_df = get_lims_dataframe(query3)
image_df.head()

Unnamed: 0,go,patched_cell_container,recording_date
0,63x go,P8S4_171002_351_A01,2017-10-02 17:55:51
1,63x go,P9S4_171002_401_A01,2017-10-02 18:00:47
2,63x no go,P8S4_171002_352_A01,2017-10-02 18:17:44
3,63x go,P2S4_171002_051_A01,2017-10-02 18:29:21
4,63x no go,P8S4_171002_353_A01,2017-10-02 18:47:06


In [19]:
query4 = "WITH do_63x(cell_id, go) AS (SELECT DISTINCT cell.id, array_to_string(array_agg(DISTINCT tag.name), ' _AND_ ') \
FROM specimens cell JOIN ephys_roi_results err ON err.id = cell.ephys_roi_result_id \
JOIN specimen_tags_specimens sptagsp ON sptagsp.specimen_id = cell.id \
JOIN specimen_tags tag ON tag.id = sptagsp.specimen_tag_id AND tag.id in (602120185,602122082) \
GROUP BY cell.id ORDER BY 1) \
SELECT s.patched_cell_container, err.recording_date, do_63x.go AS go_no_go_63x, ra.failed AS RNA_seq_QC_Failed \
FROM specimens s \
JOIN projects proj ON s.project_id = proj.id \
LEFT JOIN do_63x ON do_63x.cell_id = s.id \
JOIN ephys_roi_results err ON s.ephys_roi_result_id = err.id \
LEFT JOIN rna_amplification_inputs rai on rai.sample_id = s.id \
LEFT JOIN rna_amplifications ra on ra.id = rai.rna_amplification_id \
WHERE proj.code <> 'mMPATCH' AND \
s.patched_cell_container IS NOT NULL \
AND err.recording_date > '2017-10-01'"

all_df = get_lims_dataframe(query4)
all_df.head()

Unnamed: 0,rna_seq_qc_failed,patched_cell_container,go_no_go_63x,recording_date
0,False,P8S4_171002_351_A01,63x go,2017-10-02 17:55:51
1,False,P9S4_171002_401_A01,63x go,2017-10-02 18:00:47
2,True,P8S4_171002_352_A01,63x no go,2017-10-02 18:17:44
3,False,P2S4_171002_051_A01,63x go,2017-10-02 18:29:21
4,False,P8S4_171002_353_A01,63x no go,2017-10-02 18:47:06


In [30]:
def gen_filter(df, col, cond):
    filtered = df[df[col] == cond]
    return filtered

In [31]:
qcpass = gen_filter(all_df,'rna_seq_qc_failed', False)

In [41]:
cell_count('P8', qcpass)

467

In [33]:
imagego = gen_filter(all_df,'go_no_go_63x', '63x go')

In [46]:
cell_count('P8', imagego)

168

In [47]:
def per_user2(P_number):
    """Returns a summary of patcher metrics for a specified user.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
        
    Returns
    -------
    Returns None
    Prints a summary of patcher metrics, including total cells patched, cells that passed RNA seq QC, and QC pass percentage
    """
    print "Total cells patched:", cell_count(P_number,all_df)
    print "Cells that passed RNA seq QC:", cell_count(P_number,qcpass)
    print "QC pass percentage:", float((cell_count(P_number,qcpass)))/float((cell_count(P_number,all_df)))
    

In [50]:
per_user2('P2')

Total cells patched: 387
Cells that passed RNA seq QC: 267
QC pass percentage: 0.68992248062
