# Imports and Functions

In [2]:
import pg8000
import pandas as pd    
import os, re
import numpy as np
import matplotlib.pyplot as plt
from allensdk.core.nwb_data_set import NwbDataSet
import allensdk.ephys.ephys_features as aef
import allensdk.ephys.extract_cell_features as ecf
import seaborn as sns
#from research_ops_tools.lims2_funcs import get_lims_dataframe
#from allensdk.internal.core.lims_utilities import linux_to_windows

import warnings
warnings.filterwarnings('ignore')

sns.set()

In [3]:
#code from Agata
#these are nice functions to open LIMS, make a query and then close LIMS after

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        
        #THESE ARE IMPORTANT!!!!!!
        #Every query needs to be closed when done
        cursor.close()             
        conn.close()
    return results


#this last function will take our query results and put them in a dataframe so that they are easy to work with
def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

# Obtain ephys data from LIMS

In [25]:
q = """
SELECT cell.name,
cell.id,
SUBSTRING(cell.patched_cell_container FROM 6 FOR 6) AS container_date,
ef.*
FROM specimens cell 
JOIN ephys_roi_results err ON cell.ephys_roi_result_id = err.id
JOIN projects proj ON cell.project_id = proj.id
LEFT JOIN ephys_features ef on ef.specimen_id = cell.id
WHERE SUBSTRING(cell.patched_cell_container FROM 6 FOR 6) BETWEEN '180101' AND '190830'
AND proj.code in ('mIVSCC-MET', 'T301')"""

lims_df = get_lims_dataframe(q)
lims_df


Unnamed: 0,tau,upstroke_downstroke_ratio_short_square,thumbnail_sweep_id,has_delay,threshold_v_ramp,peak_v_short_square,upstroke_downstroke_ratio_ramp,sag,updated_at,threshold_t_ramp,...,trough_v_short_square,f_i_curve_slope,created_at,peak_t_long_square,latency,fast_trough_v_long_square,upstroke_downstroke_ratio_long_square,trough_v_ramp,peak_v_long_square,adaptation
0,20.155605,2.161662,861091259.0,,-41.937500,32.906250,2.154621,0.003525,2019-05-02 01:16:36.071838,4.135120,...,-73.750000,0.062500,2019-05-02 01:16:36.071838,0.60518,0.02238,-57.843750,2.197645,-52.156250,26.500000,0.082358
1,,,,,,,,,NaT,,...,,,NaT,,,,,,,
2,9.199781,3.582439,861090559.0,,-37.781250,22.385417,3.575613,0.033580,2019-05-02 01:15:17.698290,2.571320,...,-69.104164,0.550000,2019-05-02 01:15:17.698290,0.54306,0.04264,-42.156250,3.670867,-47.343750,27.562500,-0.081838
3,,,,,,,,,NaT,,...,,,NaT,,,,,,,
4,9.319132,1.592834,861091394.0,,-33.890625,31.687500,1.172872,0.055818,2019-05-02 01:16:48.807617,13.129790,...,-66.583336,0.037500,2019-05-02 01:16:48.807617,0.54284,0.01220,-55.000000,1.611259,-49.859375,30.593750,
5,8.374705,3.763035,861091396.0,,-25.984375,37.293751,2.409095,0.059685,2019-05-02 01:16:46.338036,7.647620,...,-72.775000,0.062500,2019-05-02 01:16:46.338036,0.54206,0.01574,-41.937500,3.531583,-37.828125,31.062500,0.207067
6,12.350084,1.887376,861090329.0,,-41.656250,8.468750,1.575830,0.032335,2019-05-02 01:15:01.537859,6.769120,...,-68.312500,0.025000,2019-05-02 01:15:01.537859,0.51350,0.00654,-62.312500,1.869017,-58.781250,11.437500,
7,17.099053,1.769058,861090429.0,,-61.343750,25.000000,1.867376,0.003167,2019-05-02 01:15:54.452867,2.497820,...,-76.406250,0.025000,2019-05-02 01:15:54.452867,0.55670,0.01744,-57.500000,2.070813,-61.687500,24.750000,
8,,,,,,,,,NaT,,...,,,NaT,,,,,,,
9,25.138305,1.871368,861090527.0,,-30.656250,39.687500,1.642519,0.029762,2019-05-02 01:16:04.194879,3.346520,...,-55.687500,0.350000,2019-05-02 01:16:04.194879,0.67946,0.02370,-51.375000,1.811722,-50.343750,27.187500,0.051346


In [24]:
q = """
SELECT cell.name,
cell.id,
ef.*
FROM specimens cell 
JOIN ephys_roi_results err ON cell.ephys_roi_result_id = err.id
JOIN projects proj ON cell.project_id = proj.id
LEFT JOIN ephys_features ef on ef.specimen_id = cell.id
AND proj.code in ('mIVSCC-MET')"""

lims_df = get_lims_dataframe(q)
lims_df


Unnamed: 0,tau,upstroke_downstroke_ratio_short_square,thumbnail_sweep_id,has_delay,threshold_v_ramp,peak_v_short_square,upstroke_downstroke_ratio_ramp,sag,updated_at,threshold_t_ramp,...,trough_v_short_square,f_i_curve_slope,created_at,peak_t_long_square,latency,fast_trough_v_long_square,upstroke_downstroke_ratio_long_square,trough_v_ramp,peak_v_long_square,adaptation
0,46.068612,1.629215,809630410.0,,,16.793180,,0.047297,2019-01-17 19:12:40.217597,,...,-58.533524,6.723404e-01,2019-01-17 19:12:40.217597,0.62088,0.04230,-48.656250,1.531230,,14.718750,0.007492
1,6.309486,1.411009,809638950.0,,,25.327499,,0.035610,2019-01-17 19:32:04.970144,,...,-55.353752,6.100000e-01,2019-01-17 19:32:04.970144,0.51534,0.00944,-54.418751,1.421988,,27.456249,0.023410
2,6.390954,1.622022,818810664.0,,,19.479687,,0.055444,2019-02-06 00:26:25.252915,,...,-54.535934,5.750000e-01,2019-02-06 00:26:25.252915,0.61070,0.07508,-47.081249,1.612776,,16.262499,0.053562
3,87.041986,1.576874,869520640.0,,,3.412500,,0.075305,2019-05-16 20:06:55.409414,,...,-68.378128,9.099119e-19,2019-05-16 20:06:55.409414,0.50710,0.00520,-57.899998,1.550681,,5.356250,
4,7.247627,1.458550,821532548.0,,,21.600000,,0.011772,2019-02-12 18:57:44.583239,,...,-59.375000,6.057692e-01,2019-02-12 18:57:44.583239,0.52204,0.01200,-51.931248,1.480023,,20.287500,0.002700
5,20.677347,2.976289,696994967.0,False,-33.656253,41.085940,2.797883,0.155472,2018-05-11 18:58:07.446023,4.950100,...,-76.312504,1.889286e-01,2018-05-11 18:58:07.446023,1.11784,0.03632,-46.937504,2.917276,-51.604168,39.125004,0.037970
6,33.214118,4.251538,696995208.0,False,-44.520836,48.117189,4.369519,0.170575,2018-05-11 18:57:59.839707,1.658880,...,-67.515629,3.405457e-01,2018-05-11 18:57:59.839707,1.39190,0.02916,-51.281250,4.262800,-51.531251,48.343750,0.043399
7,12.288935,1.435377,830394112.0,,,21.559376,,0.077684,2019-02-28 19:53:20.046030,,...,-75.956253,1.769231e+00,2019-02-28 19:53:20.046030,0.51590,0.00878,-59.899998,1.383077,,19.531250,0.001766
8,25.324462,1.792097,804848289.0,,-29.112497,12.025000,1.630815,0.087595,2019-01-07 19:54:43.737155,3.277633,...,-69.012497,4.000000e-01,2019-01-07 19:54:43.737155,0.56440,0.02922,-50.612499,1.911140,-48.020832,17.231249,0.023762
9,16.676097,3.478696,697012899.0,False,-35.989585,34.806252,3.959953,0.017999,2018-05-11 19:07:43.425874,5.518340,...,-76.987503,1.388889e-01,2018-05-11 19:07:43.425874,1.07450,0.02670,-47.781250,3.939144,-52.322918,32.000000,


In [19]:
lims_df.columns

Index([u'tau', u'upstroke_downstroke_ratio_short_square',
       u'thumbnail_sweep_id', u'has_delay', u'threshold_v_ramp',
       u'peak_v_short_square', u'upstroke_downstroke_ratio_ramp', u'sag',
       u'updated_at', u'threshold_t_ramp', u'slow_trough_v_ramp', u'vrest',
       u'has_pause', u'trough_t_ramp', u'trough_v_long_square',
       u'threshold_t_short_square', u'id', u'trough_t_short_square',
       u'fast_trough_v_ramp', u'trough_t_long_square',
       u'slow_trough_v_long_square', u'rheobase_sweep_id', u'peak_t_ramp',
       u'has_burst', u'slow_trough_t_long_square', u'threshold_v_long_square',
       u'fast_trough_t_long_square', u'ri', u'threshold_t_long_square',
       u'threshold_v_short_square', u'avg_isi', u'vm_for_sag', u'specimen_id',
       u'threshold_i_long_square', u'threshold_i_short_square',
       u'slow_trough_t_ramp', u'peak_v_ramp', u'fast_trough_v_short_square',
       u'fast_trough_t_short_square', u'fast_trough_t_ramp',
       u'slow_trough_t_short_squ

# Obtain mapping data from shiny, stored on the network

In [26]:
shiny_df = pd.read_csv("\\\\allen\\programs\\celltypes\\workgroups\\279\\IVSCC_KPIs\\\shiny_LIMS.csv")
shiny_df

Unnamed: 0,specimen_id,cell_name,patched_cell_container,rig_operator,project_code,image_series_20x_id,case_number,recording_date,creation_date,donor_name,...,batch_vendor_name,broad_class_label,cluster_detail,cluster_label,marker_sum_norm_label,percent_cdna_longer_than_400bp,res_index,roi,seurat_cluster_label,subclass_label
0,639568687,Oxtr-T2A-Cre;Ai14-351467.05.02.01,P8S4_171002_351_A01,P8,mIVSCC-MET,643497668.0,351467,2017-10-02,2017-10-02-19-20-00,Oxtr-T2A-Cre;Ai14-351467,...,RSC-125,GABAergic,n_Sst,n91,0.571161,0.314,0.714428,VISp5,,Sst
1,639568972,Oxtr-T2A-Cre;Ai14-351467.03.01.01,P9S4_171002_401_A01,P9,mIVSCC-MET,643497439.0,351467,2017-10-02,2017-10-02-19-20-13,Oxtr-T2A-Cre;Ai14-351467,...,RSC-125,GABAergic,n_Sst,n91,0.797319,0.623,0.714428,VISp2-3,,Sst
2,639604957,Oxtr-T2A-Cre;Ai14-351467.05.02.02,P8S4_171002_352_A01,P8,mIVSCC-MET,643497668.0,351467,2017-10-02,2017-10-02-19-49-34,Oxtr-T2A-Cre;Ai14-351467,...,RSC-125,Glutamatergic,n3,n3,0.205138,0.295,0.000000,VISp5,,L6b
3,639615709,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-350934.04.01.01,P2S4_171002_051_A01,P2,mIVSCC-MET,643497697.0,350934,2017-10-02,2017-10-02-20-04-24,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-350934,...,RSC-125,Glutamatergic,Excitatory,n6,0.598208,0.418,0.604422,VISp4,,L4
4,639622744,Oxtr-T2A-Cre;Ai14-351467.05.02.03,P8S4_171002_353_A01,P8,mIVSCC-MET,643497668.0,351467,2017-10-02,2017-10-02-20-14-30,Oxtr-T2A-Cre;Ai14-351467,...,RSC-125,Glutamatergic,Excitatory,n5,0.261427,0.609,0.542001,VISp5,,L6 CT
5,639636584,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-350934.04.01.02,P2S4_171002_052_A01,P2,mIVSCC-MET,643497697.0,350934,2017-10-02,2017-10-02-20-39-54,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-350934,...,RSC-125,Glutamatergic,L4 IT VISp Rspo1,L4 IT VISp Rspo1,1.011053,0.702,1.000000,VISp2-3,,L4
6,639671955,Gad2-IRES-Cre;Ai14-350672.06.02.01,PAS4_171002_451_A01,PA,mIVSCC-MET,643497953.0,350672,2017-10-02,2017-10-02-22-04-50,Gad2-IRES-Cre;Ai14-350672,...,RSC-125,GABAergic,Inhibitory,n59,0.543551,0.442,0.434669,VISp4,,Serpinf1
7,639674270,Chrna2-Cre_OE25;Ai14-351067.04.02.01,P8S4_171002_354_A01,P8,mIVSCC-MET,643498140.0,351067,2017-10-02,2017-10-02-22-14-43,Chrna2-Cre_OE25;Ai14-351067,...,RSC-125,GABAergic,Vip Lmo1 Fam159b,Vip Lmo1 Fam159b,1.062120,0.430,1.000000,VISp5,,Vip
8,639689311,Chrna2-Cre_OE25;Ai14-351067.04.02.02,P8S4_171002_355_A01,P8,mIVSCC-MET,643498140.0,351067,2017-10-02,2017-10-02-22-49-37,Chrna2-Cre_OE25;Ai14-351067,...,RSC-125,GABAergic,n_Sst,n100,0.788812,0.433,0.860419,VISp5,,Sst
9,639689999,Gad2-IRES-Cre;Ai14-350672.03.01.01,P9S4_171002_402_A01,P9,mIVSCC-MET,643498169.0,350672,2017-10-02,2017-10-02-22-49-50,Gad2-IRES-Cre;Ai14-350672,...,RSC-125,GABAergic,Lamp5 Lsp1,Lamp5 Lsp1,0.974841,0.751,1.000000,VISp2-3,,Lamp5


# Merge LIMS and Shiny dataframes, save to csv

In [27]:
ephys_shiny_df = pd.merge(left = lims_df, 
                    right = shiny_df, 
                    left_on = 'name', #ephys
                    right_on = 'cell_name', #shiny
                    how = 'left')
ephys_shiny_df[ephys_shiny_df['name'].str.contains('H19.06.356.21.06.04.01')]['project_code']

Series([], Name: project_code, dtype: object)

In [28]:
ephys_shiny_df.to_csv("MET_data_191101.csv")

In [12]:
ephys_shiny_df['subclass_label'].unique()

array(['Vip', 'Lamp5', 'Sst', 'L5 IT', 'Pvalb', 'Serpinf1', 'Sncg',
       'L6 CT', 'L5 PT', nan, 'L6 IT', 'L2/3 IT', 'L4', 'Meis2', 'NP',
       'L6b'], dtype=object)