# Lindsay's Script

In [1]:
import os
import pg8000          #pg8000 access SQL databases
import pandas as pd    #paandas will be needed to work in a dataframe
from pandas import ExcelWriter

In [2]:
#code from Agata
#these are nice functions to open LIMS, make a query and then close LIMS after

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        
        #THESE ARE IMPORTANT!!!!!!
        #Every query needs to be closed when done
        cursor.close()             
        conn.close()
    return results


#this last function will take our query results and put them in a dataframe so that they are easy to work with
def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

### User Report

In [3]:
def user_df(P_number):
    """Saves an excel dataframe with all patched cells for a specified user in current directory.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
       
    Returns
    -------
    None
    """
    user = master_df[master_df["patched_cell_container"].str.contains(P_number)]
    user.sort_values("patched_cell_container", axis = 0, inplace = True)
    user = user.loc[:,['name', 'patched_cell_container','recording_date', 'rna_qc', 'go_no_go_63x', 'link_20x']]
    writer = pd.ExcelWriter('user_dataframe.xlsx')
    user.to_excel(writer,'Sheet1')
    return None

def cell_count(P_number, df):
    """Return the number of cells in a specified dataframe for a specified user.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
    df: a pandas dataframe
    
    Returns
    -------
    An integer cell number
    """
    user = df[df["patched_cell_container"].str.contains(P_number)]
    return user["patched_cell_container"].count()

def per_user(P_number):
    """Returns a summary of patcher metrics for a specified user.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
        
    Returns
    -------
    Returns None
    Prints a summary of patcher metrics, including total cells patched, cells that passed RNA seq QC, and QC pass percentage
    """
    print "Total cells patched:", cell_count(P_number,master_df)
    print "Cells that passed RNA seq QC:", cell_count(P_number,qcpass)
    print "QC pass percentage:", float((cell_count(P_number,qcpass)))/float((cell_count(P_number,master_df)))
    print "Cells imaged at 63x:", cell_count(P_number,imagego)

def gen_filter(df, col, cond):
    filtered = df[df[col] == cond]
    return filtered

In [4]:
query = """
WITH do_63x(cell_id, go) AS (SELECT DISTINCT cell.id, array_to_string(array_agg(DISTINCT tag.name), ' _AND_ ') 
FROM specimens cell JOIN ephys_roi_results err ON err.id = cell.ephys_roi_result_id 
JOIN specimen_tags_specimens sptagsp ON sptagsp.specimen_id = cell.id 
JOIN specimen_tags tag ON tag.id = sptagsp.specimen_tag_id AND tag.id in (602120185,602122082) 
GROUP BY cell.id ORDER BY 1), 
imgs20 AS (WITH tag_list AS (SELECT DISTINCT ims.id AS ims_id, imst.name AS tag FROM image_series ims 
JOIN image_series_image_series_tags ims2imst ON ims2imst.image_series_id = ims.id 
JOIN image_series_tags imst ON imst.id = ims2imst.image_series_tag_id 
ORDER BY 1,2) 
SELECT DISTINCT ims.id AS image_series_id, ims.workflow_state AS workflow_state, ims.specimen_id, 
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT tag_list.tag), '_AND_') AS tags FROM image_series ims 
LEFT JOIN tag_list ON tag_list.ims_id = ims.id 
WHERE ims.type = 'FocalPlaneImageSeries' AND is_stack = false 
GROUP BY ims.id, ims.workflow_state, ims.specimen_id)
SELECT DISTINCT s.patched_cell_container, err.recording_date, do_63x.go AS go_no_go_63x, CASE WHEN ra.id IS NOT NULL THEN (CASE WHEN ra.failed = 't' THEN 'failed' ELSE 'passed' END) ELSE '' END AS rna_qc, s.name,
'=HYPERLINK("http://lims2/focal_plane_image_series?id=' || imgs20.image_series_id || '")' as link_20x
FROM specimens s 
LEFT JOIN projects proj ON s.project_id = proj.id 
LEFT JOIN do_63x ON do_63x.cell_id = s.id 
JOIN specimens slice ON s.parent_id = slice.id
LEFT JOIN ephys_roi_results err ON s.ephys_roi_result_id = err.id 
LEFT JOIN rna_amplification_inputs rai on rai.sample_id = s.id 
LEFT JOIN imgs20 ON slice.id = imgs20.specimen_id 
LEFT JOIN rna_amplifications ra on ra.id = rai.rna_amplification_id 
WHERE proj.code <> 'mMPATCH' AND 
s.patched_cell_container IS NOT NULL 
AND err.recording_date > '2017-10-01'
"""

#AND (err.recording_date > '2017-10-01' OR err.recording_date IS NULL)
#AND err.recording_date > '2017-10-01'

master_df = get_lims_dataframe(query)
master_df

Unnamed: 0,go_no_go_63x,name,recording_date,rna_qc,patched_cell_container,link_20x
0,,Ndnf-IRES2-dgCre;Slc32a1-IRES2-FlpO;Ai65-35889...,2017-11-16 22:21:12,passed,P9S4_171116_404_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
1,63x go,Chat-IRES-Cre-neo;Ai14-367633.05.01.01,2018-01-12 19:14:41,passed,PAS4_180112_453_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
2,,Ntsr1-Cre_GN220;Ai14-357985.04.01.02,2017-11-07 21:35:38,failed,P9S4_171107_402_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
3,,Npr3-IRES2-Cre;Sst-IRES-FlpO;Ai65-379882.04.02.01,2018-03-13 17:02:13,failed,P8S4_180313_351_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
4,,H18.03.008.11.02.01,2018-04-18 02:37:13,failed,PAS4_180417_454_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
5,,Vip-IRES-Cre;Ai14-381891.05.01.01,2018-03-30 18:59:58,failed,PAS4_180330_451_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
6,,H18.03.010.11.10.01,2018-05-09 01:23:22,,P9S4_180508_405_A01,
7,,Crh-IRES-Cre_ZJH;Sst-IRES-FlpO;Ai65-360064.05....,2017-12-07 22:19:44,failed,P9S4_171207_404_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
8,63x go,Slc17a8-IRES2-Cre;Slc32a1-IRES2-FlpO;Ai65-3547...,2017-10-30 20:25:59,passed,P2S4_171030_051_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
9,,Oxtr-T2A-Cre;Ai14-381904.03.02.02,2018-03-28 21:45:07,passed,P2S4_180328_055_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."


In [6]:
qcpass = gen_filter(master_df,'rna_qc', 'passed')
qcpass

Unnamed: 0,go_no_go_63x,name,recording_date,rna_qc,patched_cell_container,link_20x
0,,Ndnf-IRES2-dgCre;Slc32a1-IRES2-FlpO;Ai65-35889...,2017-11-16 22:21:12,passed,P9S4_171116_404_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
1,63x go,Chat-IRES-Cre-neo;Ai14-367633.05.01.01,2018-01-12 19:14:41,passed,PAS4_180112_453_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
8,63x go,Slc17a8-IRES2-Cre;Slc32a1-IRES2-FlpO;Ai65-3547...,2017-10-30 20:25:59,passed,P2S4_171030_051_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
9,,Oxtr-T2A-Cre;Ai14-381904.03.02.02,2018-03-28 21:45:07,passed,P2S4_180328_055_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
10,,Crh-IRES-Cre_ZJH;Sst-IRES-FlpO;Ai65-374738.02....,2018-02-06 19:35:00,passed,P2S4_180206_052_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
11,,H18.03.009.11.13.01,2018-04-24 22:29:18,passed,PAS4_180424_455_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
12,63x no go,Ndnf-IRES2-dgCre;Slc32a1-IRES2-FlpO;Ai65-37811...,2018-03-05 20:49:49,passed,P2S4_180305_051_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
14,63x go,Pvalb-IRES-Cre;Ai14-380351.02.01.01,2018-03-19 17:50:24,passed,P9S4_180319_403_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
15,63x go,Rbp4-Cre_KL100;Ai14-362155.05.02.04,2017-12-01 19:55:40,passed,PAS4_171201_454_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."
16,63x go,Ctgf-T2A-dgCre;Ai14-374412.06.02.03,2018-02-09 22:12:49,passed,P8S4_180209_356_A01,"=HYPERLINK(""http://lims2/focal_plane_image_ser..."


In [7]:
imagego = gen_filter(master_df,'go_no_go_63x', '63x go')

In [8]:
user_df('PA') #Outputs an excel sheet based on 'User'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


In [9]:
per_user('PA') #Outputs print commands with results based on 'User'

Total cells patched: 286
Cells that passed RNA seq QC: 184
QC pass percentage: 0.643356643357
Cells imaged at 63x: 67
