# Imports and Functions

In [1]:
import pg8000
import pandas as pd    
import os, re
import numpy as np
import matplotlib.pyplot as plt
from allensdk.core.nwb_data_set import NwbDataSet
import allensdk.ephys.ephys_features as aef
import allensdk.ephys.extract_cell_features as ecf
import seaborn as sns
#from research_ops_tools.lims2_funcs import get_lims_dataframe
#from allensdk.internal.core.lims_utilities import linux_to_windows

import warnings
warnings.filterwarnings('ignore')

sns.set()

In [2]:
#code from Agata
#these are nice functions to open LIMS, make a query and then close LIMS after

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        
        #THESE ARE IMPORTANT!!!!!!
        #Every query needs to be closed when done
        cursor.close()             
        conn.close()
    return results


#this last function will take our query results and put them in a dataframe so that they are easy to work with
def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

# Obtain ephys data from LIMS

In [3]:
q = """
SELECT cell.name,
cell.id,
SUBSTRING(cell.patched_cell_container FROM 6 FOR 6) AS container_date,
ef.*
FROM specimens cell 
JOIN ephys_roi_results err ON cell.ephys_roi_result_id = err.id
JOIN projects proj ON cell.project_id = proj.id
LEFT JOIN ephys_features ef on ef.specimen_id = cell.id
WHERE SUBSTRING(cell.patched_cell_container FROM 6 FOR 6) BETWEEN '180101' AND '999999'
AND proj.code in ('hIVSCC-MET', 'hIVSCC-METc')"""

lims_df = get_lims_dataframe(q)
lims_df


Unnamed: 0,tau,upstroke_downstroke_ratio_short_square,thumbnail_sweep_id,has_delay,threshold_v_ramp,peak_v_short_square,upstroke_downstroke_ratio_ramp,sag,updated_at,threshold_t_ramp,...,trough_v_short_square,f_i_curve_slope,created_at,peak_t_long_square,latency,fast_trough_v_long_square,upstroke_downstroke_ratio_long_square,trough_v_ramp,peak_v_long_square,adaptation
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,22.641362,1.777143,1.040521e+09,,-26.156250,15.515626,2.390769,0.145818,2020-08-17 18:31:42.853652,2.721780,...,-64.203125,0.198661,2020-08-16 00:50:31.473692,0.75468,0.02196,-44.937504,2.497027,-46.781254,29.218752,-0.014374
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,12.217778,1.806691,1.040518e+09,,,27.885420,,0.371542,2020-08-17 18:31:30.220258,,...,-68.010414,0.353554,2020-08-16 00:50:53.256168,0.51856,0.01228,-65.875000,1.750159,,27.468752,-0.096957
8,,,,,,,,,,,...,,,,,,,,,,
9,18.535130,2.228835,1.040521e+09,,-50.468750,11.869792,2.456655,0.396388,2020-08-17 18:29:08.933770,0.959700,...,-63.083332,0.041943,2020-08-16 00:55:56.738257,0.66842,0.01632,-50.937504,2.349560,-56.531250,19.125002,0.034384


In [39]:
q = """
SELECT cell.name,
cell.id,
proj.code,
ef.*
FROM specimens cell 
JOIN ephys_roi_results err ON cell.ephys_roi_result_id = err.id
JOIN projects proj ON cell.project_id = proj.id
LEFT JOIN ephys_features ef on ef.specimen_id = cell.id
WHERE proj.code = 'H301'"""

lims_df = get_lims_dataframe(q)
lims_df


Unnamed: 0,tau,code,thumbnail_sweep_id,has_delay,threshold_v_ramp,peak_v_short_square,upstroke_downstroke_ratio_ramp,sag,updated_at,threshold_t_ramp,...,f_i_curve_slope,created_at,peak_t_long_square,latency,fast_trough_v_long_square,slow_trough_v_ramp,upstroke_downstroke_ratio_long_square,trough_v_ramp,peak_v_long_square,adaptation
0,17.695707,H301,485504770.0,False,-37.093753,41.421876,2.987071,0.069023,2017-09-06 19:40:25.758759,12.466328,...,0.027053,2015-08-14 23:50:46.355352,1.102680,0.051065,-50.968750,-58.958337,3.558154,-58.958337,38.156254,
1,14.570731,H301,485513440.0,False,-45.666668,51.906252,4.312684,0.114468,2017-06-05 18:17:56.089882,22.377955,...,0.012500,2015-08-15 00:34:39.493106,1.167655,0.078930,-51.531250,-54.697920,4.091862,-55.197919,54.843754,
2,31.469414,H301,485537710.0,False,-44.218753,45.025000,3.692418,0.045291,2017-06-05 18:18:45.184823,1.812767,...,0.301339,2015-08-15 19:04:13.020431,1.216160,0.023565,-51.718754,,3.213940,-50.468751,47.750004,0.015221
3,27.435178,H301,486760778.0,False,-37.604170,42.739585,3.303472,0.074271,2017-08-09 00:38:16.950453,4.600962,...,0.080120,2015-09-18 23:36:39.621489,1.196935,0.079985,-52.593754,-58.197919,3.397767,-58.197919,43.062500,0.741967
4,32.363539,H301,486760617.0,False,-38.072919,37.742189,3.548556,0.173008,2017-06-05 18:16:28.622000,7.137367,...,0.057753,2015-09-18 23:34:43.394541,1.332170,0.112790,-51.750004,-56.072919,3.776687,-56.072919,41.781250,
5,14.007319,H301,486760452.0,False,-35.000003,27.681252,2.554582,0.072103,2017-06-05 18:17:57.950842,8.591112,...,0.124482,2015-09-18 23:35:40.601527,1.165290,0.073400,-45.093754,-49.510419,2.858349,-49.510419,28.062502,0.053199
6,19.418438,H301,486941688.0,False,-42.958333,37.681252,3.651213,0.154392,2018-03-01 00:52:22.751811,8.604432,...,0.153409,2015-09-21 23:56:52.286175,1.098960,0.048625,-55.000004,,3.788291,-56.312504,37.906250,0.187289
7,21.922694,H301,488385680.0,False,-45.875004,36.875002,3.392293,0.214776,2017-06-05 18:19:36.143466,2.618160,...,0.208838,2015-10-06 21:00:38.532683,1.099940,0.028020,-52.906254,-58.354168,3.234728,-58.354168,31.468752,0.035458
8,15.479129,H301,488386783.0,False,-38.479169,40.333335,2.268135,0.289759,2017-06-05 18:17:28.494362,5.146210,...,0.194643,2015-10-06 21:38:23.460196,1.108240,0.035120,-53.281250,-54.031251,2.373951,-55.489587,41.031254,0.110072
9,15.025867,H301,488388018.0,False,-47.708337,34.864583,2.869977,0.117780,2018-08-28 03:24:18.245549,12.061787,...,0.072876,2015-10-06 21:47:37.352513,1.149300,0.053460,-55.218754,,3.029907,-59.895836,35.781254,


In [27]:
lims_df.to_csv('ME_200501.csv')

# Obtain mapping data from shiny, stored on the network

In [5]:
#sncg1= pd.read_csv('soltesz_sncg_shiny_data.csv')
sncg1= pd.read_csv('soltesz_sncg_shiny_data_200406.csv')
sncg2= pd.read_csv('soltesz_slc_sncg_shiny_data.csv')
#mouse_hip = pd.concat([sncg1,sncg2])
mouse_hip = pd.read_csv('visp_sncg_200413.csv')
culture = pd.read_csv('culture_200415.csv')
mouse = pd.read_csv('mouse_200416.csv')
mouse = pd.read_csv('all_mouse_regions.csv')
human_lockdown = pd.read_csv('MET_METc_lockdown_data.csv')

In [6]:
#shiny_df = pd.read_csv("\\\\allen\\programs\\celltypes\\workgroups\\279\\IVSCC_KPIs\\\shiny_LIMS.csv")
#shiny_df = pd.read_csv('human_shiny_200127.csv')
#shiny_df = pd.read_csv('ps_no_exc23 (metx).csv')
shiny_df = human_lockdown
shiny_df

Unnamed: 0.1,Unnamed: 0,spec_id_label,subclass_label,seurat_cluster_label,Tree_first_cl_label,Norm_Marker_Sum.0.4_label,Tree_call_label,donor_label,ac_label,sample_id,cell_name_label,DIV,Virus,date
0,577,923121427,LAMP5/PAX6/Other,Inh L5-6 PVALB LGR5,Inh L1-2 SST BAGE2 (ADARB2+),False,PoorQ,H19.03.319,Acute,PES4_190813_659_A01,H19.03.319.11.11.01.04,,,190813
1,211,911022014,LAMP5/PAX6/Other,Inh L1-2 PAX6 CDH12,Inh L1-2 PAX6 CDH12,False,PoorQ,H19.03.317,Acute,PAS4_190723_451_A01,H19.03.317.11.06.01.02,,,190723
2,244,910987373,LAMP5/PAX6/Other,Inh L1-4 LAMP5 LCP2 (rosehip),Inh L1-4 LAMP5 LCP2 (rosehip),False,PoorQ,H19.03.317,Acute,PES4_190723_651_A01,H19.03.317.11.10.01.01,,,190723
3,1,884889997,LAMP5/PAX6/Other,Inh L2-4 PVALB WFDC2,Inh L1-2 PAX6 TNFAIP8L3,False,PoorQ,H19.26.403,Acute,P1S4_190611_005_A01,H19.26.403.11.12.01.01,,,190611
4,223,880209696,LAMP5/PAX6/Other,Inh L1-3 SST CALB1,Inh L2-6 LAMP5 CA1 (Igtp),False,PoorQ,H19.06.356,Acute,P1S4_190603_002_A01,H19.06.356.11.08.01.02,,,190603
5,224,857542109,LAMP5/PAX6/Other,Inh L1-3 VIP ADAMTSL1,Inh L1-2 PAX6 CDH12,False,PoorQ,H19.03.310,Acute,PES4_190425_651_A01,H19.03.310.11.06.01.01,,,190425
6,4,854597364,LAMP5/PAX6/Other,Exc L4-6 RORB C1R,Inh L1-2 PAX6 CDH12,False,PoorQ,H19.03.309,Acute,P1S4_190419_004_A01,H19.03.309.11.08.01.04,,,190419
7,502,840131814,LAMP5/PAX6/Other,Inh L1-3 SST CALB1,Inh L1-4 LAMP5 LCP2 (rosehip),False,PoorQ,H19.06.351,Acute,PES4_190321_659_A01,H19.06.351.11.10.01.10,,,190321
8,536,787132331,LAMP5/PAX6/Other,Exc L2-3 LINC00507 FREM3,Inh L1 SST NMBR (ADARB2+),False,PoorQ,H18.03.322,Acute,PBS4_181128_502_A01,H18.03.322.11.16.01.02,,,181128
9,225,766787898,LAMP5/PAX6/Other,Exc L2-3 LINC00507 FREM3,Inh L1-2 PAX6 CDH12,False,PoorQ,H18.06.366,Acute,PES4_181022_655_A01,H18.06.366.15.05.01.01,,,181022


In [7]:
shiny_df['spec_id_label']

0      923121427
1      911022014
2      910987373
3      884889997
4      880209696
5      857542109
6      854597364
7      840131814
8      787132331
9      766787898
10     750841036
11     721577859
12     720804117
13     720777847
14     720836999
15     712874974
16     712901159
17     700825191
18     696099874
19     696105256
20     689331715
21     674650891
22     672659031
23     672652822
24     672324078
25     665695769
26     653796765
27     653818084
28     653890370
29     653791870
         ...    
820    960843637
821    696108575
822    689319155
823    685766680
824    819074590
825    819106998
826    685771025
827    685780246
828    685806972
829    685785495
830    677063468
831    672646211
832    819116654
833    819133824
834    672655512
835    666068044
836    665718254
837    819166048
838    819177874
839    819185246
840    819191383
841    665700287
842    665695931
843    819197541
844    653882762
845    653038959
846    652069520
847    6501300

# Merge LIMS and Shiny dataframes, save to csv

In [40]:
ephys_shiny_df = pd.merge(left = lims_df, 
                    right = shiny_df, 
                    left_on = 'name', #ephys
                    right_on = 'cell_name_label', #shiny
                    how = 'left')

In [8]:
ephys_shiny_df = pd.merge(left = shiny_df, 
                    right = lims_df, 
                    left_on = 'cell_name_label', #shiny
                    right_on = 'name', #ephys
                    how = 'left')

In [27]:
mouse_ids = ephys_shiny_df[['specimen_id_x','subclass_label']]
mouse_ids.to_csv('human_ids191211.csv')

In [56]:
ephys_shiny_df.to_csv("all_mouse_regions_ephys_200417.csv")

In [28]:
ephys_shiny_df.to_csv("MET_data_191101.csv")

In [41]:
ephys_shiny_df.to_csv('culture_patchseq_data_200219.csv')

In [9]:
ephys_shiny_df['subclass_label'].unique()

array(['LAMP5/PAX6/Other', 'SST', 'PVALB', 'VIP'], dtype=object)

In [10]:
ephys_shiny_df.to_csv('human_200326.csv')

In [28]:
ephys_shiny_df.to_csv('mouse_hip_200406.csv')

In [36]:
ephys_shiny_df.to_csv('visp_sncg_200414.csv')

In [None]:
ephys_shiny_df = pd.merge(left = lims_df, 
                    right = shiny_df, 
                    left_on = 'name', #ephys
                    right_on = 'cell_name', #shiny
                    how = 'left')

In [38]:
test = lims_df[lims_df['name'] == 'H17.06.007.11.05.04']
test.to_csv("test200508.csv")

In [11]:
ephys_shiny_df.to_csv('human_lockdown_shiny_ephys.csv')