# SQL Module 1: Copied from Aaron on single_cell_ephys Github

### Import

In [1]:
import pg8000          #pg8000 access SQL databases
import pandas as pd    #pandas will be needed to work in a dataframe

### Functions created by Agata to access LIMS

In [2]:
#Code from Agata
#Opens LIMS, makes a query and then closes LIMS after

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        #THESE ARE IMPORTANT!!!!!! (Every query needs to be closed when done)
        #This closes query
        cursor.close()             
        conn.close()
    return results


#This last function will take our query results and put them in a dataframe so that they are easy to work with
def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

### Example: Accessing LIMS and looking at data tables

##### specimens table: shows the column with one row entry 

In [3]:
my_query = "SELECT * FROM specimens LIMIT 10"
#asterisk(*) = all columns
#specimens = table name
#LIMIT 10 = limits search to first 10 rows

my_result = limsquery(my_query)
first_element = my_result[0]
#first element is created to only look at the first row

print first_element
#Shows all columns and one row of entry

{'cell_depth': None, 'ephys_roi_result_id': None, 'parent_y_coord': 0, 'reference_space_id': None, 'updated_at': datetime.datetime(2016, 12, 16, 4, 54, 44, 477335), 'cell_label': None, 'preparation_method_id': None, 'parent_x_coord': 2, 'location_id': None, 'id': 556516441, 'cortex_layer_id': None, 'plane_of_section_id': 11, 'frozen_at': None, 'flipped_specimen_id': 561557765, 'data': None, 'pinned_radius': None, 'rna_integrity_number': None, 'histology_well_name': None, 'created_by': None, 'priority': None, 'parent_id': 556516212, 'ephys_start_time_sec': None, 'project_id': 305094322, 'alignment3d_id': None, 'carousel_well_name': u'T301_122_161107_01_12', 'patched_cell_container': None, 'updated_by': None, 'cell_prep_id': None, 'biophysical_model_state': u'review_required', 'barcode': u'0556516441', 'storage_directory': None, 'x_coord': None, 'tissue_ph': None, 'specimen_preparation_method_id': None, 'donor_id': 555257198, 'operation_id': None, 'ephys_neural_tissue_plan_id': 555257244

##### specimens table: shows all column names

In [4]:
my_query = "SELECT * FROM specimens LIMIT 1"
#asterisk(*) = all columns
#specimens = table name
#LIMIT 1 = limits search to first row

my_result = limsquery(my_query)
first_element = my_result[0]
#first_element is created to only look at the first row

print first_element.keys()
#keys = column names
#Shows all columns names 

['cell_depth', 'ephys_roi_result_id', 'parent_y_coord', 'reference_space_id', 'updated_at', 'cell_label', 'preparation_method_id', 'parent_x_coord', 'location_id', 'id', 'cortex_layer_id', 'plane_of_section_id', 'frozen_at', 'flipped_specimen_id', 'data', 'pinned_radius', 'rna_integrity_number', 'histology_well_name', 'created_by', 'priority', 'parent_id', 'ephys_start_time_sec', 'project_id', 'alignment3d_id', 'carousel_well_name', 'patched_cell_container', 'updated_by', 'cell_prep_id', 'biophysical_model_state', 'barcode', 'storage_directory', 'x_coord', 'tissue_ph', 'specimen_preparation_method_id', 'donor_id', 'operation_id', 'ephys_neural_tissue_plan_id', 'structure_id', 'parent_z_coord', 'facs_well_id', 'name', 'y_coord', 'normalization_group_id', 'postmortem_interval_id', 'specimen_set_id', 'created_at', 'tissue_processing_id', 'ephys_cell_plan_id', 'hemisphere_id', 'ephys_qc_result', 'cell_reporter_id', 'task_flow_id', 'external_specimen_name']


In [5]:
my_query = "SELECT * FROM donors LIMIT 1"
#asterisk(*) = all columns
#specimens = table name
#LIMIT 1 = limits search to first row

my_result = limsquery(my_query)
first_element = my_result[0]
#first_element is created to only look at the first row

print first_element.keys()
#keys = column names
#Shows all columns names 

['weight', 'education_level_id', 'updated_at', 'height', 'transgenic_induction_method_id', 'id', 'death_on', 'primary_tissue_source_id', 'full_genotype', 'induction_method', 'age_id', 'created_by', 'occupation_id', 'date_of_birth', 'external_donor_name', 'race_id', 'death_manner_id', 'updated_by', 'organism_id', 'data', 'name', 'created_at', 'gender_id', 'baseline_weight_g', 'death_cause_id', 'handedness_id']


### How to display one table as a dataframe

In [29]:
specimens_query = "SELECT * FROM specimens LIMIT 5"
lims_df = get_lims_dataframe(specimens_query)
lims_df

Unnamed: 0,cell_depth,ephys_roi_result_id,parent_y_coord,reference_space_id,updated_at,cell_label,preparation_method_id,parent_x_coord,location_id,id,...,postmortem_interval_id,specimen_set_id,created_at,tissue_processing_id,ephys_cell_plan_id,hemisphere_id,ephys_qc_result,cell_reporter_id,task_flow_id,external_specimen_name
0,,,0.0,,2016-12-16 04:54:44.477335,,,2.0,,556516441,...,,,2016-11-07 16:03:11.459275,555257241,,,,,,
1,,,,,2017-04-19 14:52:52.593310,,,,,581535046,...,,,2017-04-14 16:19:20.011713,581535068,,,,,,
2,,,,,2017-03-01 16:37:56.349162,,,,,571099320,...,,,2017-02-24 16:19:14.340373,571099382,,,,,,
3,,,,,2016-12-20 15:59:41.497432,,,,,561463630,...,,,2016-12-14 17:16:20.787949,561463640,,,,,,
4,,,,,2017-04-21 14:53:40.845724,,,,,581535066,...,,,2017-04-14 16:19:20.491006,581535089,,,,,,


In [30]:
donors_query = "SELECT * FROM donors LIMIT 5"
lims_df = get_lims_dataframe(donors_query)
lims_df

Unnamed: 0,weight,education_level_id,updated_at,height,transgenic_induction_method_id,id,death_on,primary_tissue_source_id,full_genotype,induction_method,...,death_manner_id,updated_by,organism_id,data,name,created_at,gender_id,baseline_weight_g,death_cause_id,handedness_id
0,27.0,,2014-08-21 18:04:29.171400,,2.0,304264883,2014-05-30 19:00:00,,,,...,,,2,,Dlg3-Cre_KG118-140704,2014-06-02 18:34:25.590938,1,,,
1,,12.0,2015-10-12 18:15:08.604873,,,487607512,2015-10-12 00:00:00,,,,...,,,2,,Scnn1a-Tg3-Cre;Ai14-212221,2015-10-01 20:40:52.965928,1,,,
2,,12.0,2015-10-14 18:56:24.462132,,,488675880,NaT,,,,...,,,2,,Nr5a1-Cre;Ai14-213390,2015-10-14 18:56:24.462132,2,,,
3,,,2014-07-31 15:32:44.664385,,,305963622,NaT,,,,...,,,2,,Sim1-Cre_KJ18-147763,2014-07-31 15:32:44.664385,3,,,
4,,,2015-11-04 16:27:15.636271,,2.0,490646938,2015-11-03 00:00:00,,,,...,,,2,,Sim1-Cre_KJ18;Ai14(HZ)-215642,2015-11-04 16:27:15.636271,2,,,


In [67]:
ephys_query = "SELECT * FROM ephys_roi_results LIMIT 5"
lims_df = get_lims_dataframe(ephys_query)
lims_df

Unnamed: 0,rig_name,ephys_qc_criteria_id,failed_bad_rs,updated_at,storage_directory,electrode_0_pa,input_resistance_mohm,id,stage2_reviewer_id,blowout_mv,...,initial_access_resistance_mohm,qc_notes,recording_date,created_at,seal_gohm,published_at,failed_clogged_pipette,stage1_reviewer_id,failed_electrode_0,notes
0,,324256702,False,2017-06-02 16:59:38.494775,/allen/programs/celltypes/production/mousecell...,15.115,739.51104,591545243,485591509.0,3.382243,...,17.023904,,2017-05-25 20:11:34,2017-05-25 20:49:23.191938,1.606623,,False,525757437.0,False,
1,,324256702,False,2017-08-11 19:28:43.535002,/allen/programs/celltypes/production/mousecell...,-0.845,172.504224,605846847,485591509.0,1.479923,...,16.37042,,2017-08-01 17:34:10,2017-08-01 18:04:39.483043,1.055227,,False,305127608.0,False,
2,,324256702,False,2017-08-02 23:25:07.483748,/allen/programs/celltypes/production/mousecell...,-16.4775,440.122624,606152814,,,...,13.762438,,2017-08-02 21:08:18,2017-08-02 21:49:25.474869,1.892042,,True,,False,
3,,324256702,,2017-09-21 17:39:46.596585,/allen/programs/celltypes/production/mousecell...,,,637823738,,,...,,,NaT,2017-09-21 17:39:46.495650,,,,,,
4,,324256702,,2016-08-11 17:10:42.079461,/allen/programs/celltypes/production/mousecell...,,,305384219,,,...,,,NaT,2014-07-14 22:11:51.196915,,,,,,


In [69]:
#To shows columns within dataframe
lims_df.columns

Index([u'rig_name', u'ephys_qc_criteria_id', u'failed_bad_rs', u'updated_at',
       u'storage_directory', u'electrode_0_pa', u'input_resistance_mohm',
       u'id', u'stage2_reviewer_id', u'blowout_mv', u'failed_other',
       u'sampling_rate', u'input_access_resistance_ratio', u'failed_no_seal',
       u'workflow_state', u'ephys_specimen_roi_plan_id',
       u'initial_access_resistance_mohm', u'qc_notes', u'recording_date',
       u'created_at', u'seal_gohm', u'published_at', u'failed_clogged_pipette',
       u'stage1_reviewer_id', u'failed_electrode_0', u'notes'],
      dtype='object')

### How to JOIN tables together with a same column to create a dataframe

In [18]:
#This is the basics of how a SQL query looks
#SELECT TableA., TableB., TableC., TableD. FROM TableA JOIN TableB ON TableB.aID = TableA.aID 
#JOIN TableC ON TableC.cID = TableB.cID WHERE DATE(TableC.date)=date(now())
#This allows us to pull in info from 4 different tables since everything we want is not 
#stored in one place, but there are overlaps to link things up. 10 digit IDs generally are your link

In [None]:
#lims_query = "SELECT table_name1.col_name1, table_name2.col_name2 \
#              FROM table_name1 JOIN table_name2 ON col_name1 = col_name2"
#lims_df = get_lims_dataframe(lims_query)
#lims_df.tail()

##### Example: INNER JOIN

In [64]:
#Note both specimens and donors have an ID column
#\ = lets you continue in new line below

lims_query = "SELECT specimens.cell_depth, donors.weight, specimens.id, donors.id \
              FROM specimens INNER JOIN donors ON specimens.id = donors.id"
lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,cell_depth,id,weight
6727,,1276,
6728,,1277,2.3
6729,,1278,2.6
6730,,2886,
6731,,4436,


In [62]:
lims_query = "SELECT specimens.cell_depth, donors.weight, specimens.id, donors.id \
              FROM donors INNER JOIN specimens ON donors.id = specimens.id"
lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,cell_depth,id,weight
6727,,1276,
6728,,1277,2.3
6729,,1278,2.6
6730,,2886,
6731,,4436,


##### Aaron's Example

In [32]:
# note both specimens and ephys roi results have an ID column, so we renamed the specimens column 
# as cell_id in the output

lims_query = "SELECT ephys_roi_results.id, specimens.id AS cell_id, specimens.name, specimens.ephys_roi_result_id \
FROM ephys_roi_results JOIN specimens ON specimens.ephys_roi_result_id = ephys_roi_results.id"
lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,ephys_roi_result_id,cell_id,id,name
13998,670707505,670707523,670707505,Vip-IRES-Cre;Ai14-377304.05.01.01
13999,670709191,670709216,670709191,Vip-IRES-Cre;Ai14-377304.03.01.01
14000,670761564,670761569,670761564,Nr5a1-Cre;Ai14-378059.08.06.01
14001,670771758,670771764,670771758,Tlx3-Cre_PL56;Ai140;Vip-IRES-FlpO;Ai65F-378314...
14002,670776759,670776794,670776759,Nr5a1-Cre;Ai14-378060.09.06.02


### How to access the whole list of LIMS data tables

##### Choose from table name from list to create dataframe

In [6]:
my_query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES"
my_result = limsquery(my_query)
my_result

[{'table_name': u'pg_type'},
 {'table_name': u'pg_roles'},
 {'table_name': u'pg_group'},
 {'table_name': u'pg_user'},
 {'table_name': u'pg_rules'},
 {'table_name': u'pg_views'},
 {'table_name': u'pg_tables'},
 {'table_name': u'pg_matviews'},
 {'table_name': u'pg_indexes'},
 {'table_name': u'pg_stats'},
 {'table_name': u'pg_settings'},
 {'table_name': u'pg_locks'},
 {'table_name': u'pg_cursors'},
 {'table_name': u'pg_available_extensions'},
 {'table_name': u'pg_available_extension_versions'},
 {'table_name': u'pg_prepared_xacts'},
 {'table_name': u'pg_prepared_statements'},
 {'table_name': u'pg_seclabels'},
 {'table_name': u'pg_timezone_abbrevs'},
 {'table_name': u'pg_timezone_names'},
 {'table_name': u'pg_stat_all_tables'},
 {'table_name': u'pg_stat_xact_all_tables'},
 {'table_name': u'pg_stat_sys_tables'},
 {'table_name': u'pg_stat_xact_sys_tables'},
 {'table_name': u'pg_stat_user_tables'},
 {'table_name': u'pg_stat_xact_user_tables'},
 {'table_name': u'pg_statio_all_tables'},
 {'tabl

### How to JOIN tables together with a same column to create a dataframe

In [8]:
#This is the basics of how a SQL query looks
#SELECT TableA., TableB., TableC., TableD. FROM TableA JOIN TableB ON TableB.aID = TableA.aID 
#JOIN TableC ON TableC.cID = TableB.cID WHERE DATE(TableC.date)=date(now())
#This allows us to pull in info from 4 different tables since everything we want is not 
#stored in one place, but there are overlaps to link things up. 10 digit IDs generally are your link

In [9]:
#Note both specimens and ephys roi results have an ID column, so we renamed the specimens column 
#As cell_id in the output

lims_query = "SELECT ephys_roi_results.id, specimens.id AS cell_id, specimens.name, specimens.ephys_roi_result_id \
FROM ephys_roi_results JOIN specimens ON specimens.ephys_roi_result_id = ephys_roi_results.id"
lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,ephys_roi_result_id,cell_id,id,name
13998,670707505,670707523,670707505,Vip-IRES-Cre;Ai14-377304.05.01.01
13999,670709191,670709216,670709191,Vip-IRES-Cre;Ai14-377304.03.01.01
14000,670761564,670761569,670761564,Nr5a1-Cre;Ai14-378059.08.06.01
14001,670771758,670771764,670771758,Tlx3-Cre_PL56;Ai140;Vip-IRES-FlpO;Ai65F-378314...
14002,670776759,670776794,670776759,Nr5a1-Cre;Ai14-378060.09.06.02


In [10]:
#This is the structure we want to follow for SQL:
#SELECT your tables.columns
#FROM these columns are coming if you are going to starting using
#JOIN left ON right
#WHERE gives you the ability to filter data
#AND and OR add variability to your filters
#" opens and closes your query"
#\ lets you wrap text
#We can shorten ephys_roi_results to err and specimens to s, search for all of our features and use
#JOIN to join the search results from ephys_roi_results and specimens following the outline from above

In [11]:
# callout the abreveations in the FROM section
# ephys_roi_results err

lims_query = "SELECT err.id, s.id AS cell_id, s.name \
FROM ephys_roi_results err \
JOIN specimens s ON s.ephys_roi_result_id = err.id"

lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,cell_id,id,name
13998,670707523,670707505,Vip-IRES-Cre;Ai14-377304.05.01.01
13999,670709216,670709191,Vip-IRES-Cre;Ai14-377304.03.01.01
14000,670761569,670761564,Nr5a1-Cre;Ai14-378059.08.06.01
14001,670771764,670771758,Tlx3-Cre_PL56;Ai140;Vip-IRES-FlpO;Ai65F-378314...
14002,670776794,670776759,Nr5a1-Cre;Ai14-378060.09.06.02


In [12]:
# callout the abreveations in the FROM section
# ephys_roi_results err

lims_query = "SELECT err.id, s.id, s.name, proj.code \
FROM ephys_roi_results err \
JOIN specimens s ON s.ephys_roi_result_id = err.id \
JOIN projects proj ON s.project_id = proj.id \
WHERE proj.code = 'H301'"

lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,code,id,name
908,H301,643584484,H17.06.013.12.04.01
909,H301,643589588,H17.06.013.12.04.02
910,H301,643601251,H17.06.013.12.13.01
911,H301,643619982,H17.06.013.12.13.02
912,H301,643625553,H17.06.013.12.13.03


In [13]:
#filters for cells that only failed due to access resistance over 20 MOhm
#grabs the ephys roi result id, project code, recording date, cell name, cell id, and qc results

lims_query = "SELECT err.id, err.recording_date, err.failed_bad_rs, \
err.failed_electrode_0, err.failed_clogged_pipette, err.failed_no_seal, \
err.failed_other, err.workflow_state, s.name, s.ephys_roi_result_id, s.project_id, proj.id, proj.code \
FROM ephys_roi_results err JOIN specimens s ON s.ephys_roi_result_id = err.id \
JOIN projects proj ON s.project_id = proj.id \
WHERE (failed_bad_rs = 'TRUE' \
AND err.failed_electrode_0 = 'FALSE' \
AND err.failed_clogged_pipette = 'FALSE' \
AND err.failed_no_seal = 'FALSE' \
AND err.failed_other = 'FALSE' \
AND err.workflow_state != 'manual_passed') \
AND (proj.code = 'T301' OR proj.code = 'T301x')"

lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,ephys_roi_result_id,code,recording_date,failed_bad_rs,workflow_state,name,failed_clogged_pipette,failed_other,project_id,failed_electrode_0,id,failed_no_seal
67,601831626,T301,2017-07-10 21:03:11,True,manual_failed,Nos1-CreERT2;Ai14-333287.03.01.01,False,False,305094322,False,305094322,False
68,602660919,T301,2017-07-13 22:16:27,True,manual_failed,Scnn1a-Tg2-Cre;Ai14-333165.04.02.01,False,False,305094322,False,305094322,False
69,604693290,T301x,2017-07-26 22:50:46,True,manual_failed,Rbp4-Cre_KL100;Ai14-337193.04.02.01,False,False,300080300,False,300080300,False
70,605537141,T301x,2017-07-28 20:37:50,True,manual_failed,Gad2-IRES-Cre;Ai14-336420.04.02.01,False,False,300080300,False,300080300,False
71,605538385,T301x,2017-07-28 21:32:11,True,manual_failed,Gad2-IRES-Cre;Ai14-336420.02.02.01,False,False,300080300,False,300080300,False


In [14]:
query = "SELECT * \
FROM scheduled_procedures \
JOIN users ON users.id = scheduled_procedures.assigned_to_id \
JOIN scheduled_procedures_subjects sps ON scheduled_procedures.id = sps.scheduled_procedure_id \
JOIN specimens ON sps.subject_id = specimens.id \
WHERE users.login = 'nadiad' and scheduled_procedures.lab_request_status ='active' \
ORDER BY scheduled_procedures.created_at"
df = get_lims_dataframe(query)
df.head()

Unnamed: 0,cell_depth,ephys_roi_result_id,siv_default_ontology_id,scheduled_date,parent_y_coord,reference_space_id,task_flow_id,updated_at,requested_date_range_start,carousel_well_name,...,tissue_processing_id,ephys_qc_result,requested_by_id,y_coord,specimen_set_id,cell_reporter_id,login,pinned_radius,external_specimen_name,parent_x_coord
0,,,,2018-03-05 08:00:00,,9.0,,2018-03-06 21:02:02.145452,2018-02-20 08:00:00,,...,,,649368583,,,,nadiad,,356797,
1,,,,NaT,,,,2018-02-22 21:14:37.351421,2018-02-22 08:00:00,,...,,,649368583,,,,nadiad,,367347,
2,,,,NaT,,,,2018-02-22 21:16:04.779125,2018-02-22 08:00:00,,...,,,649368583,,,,nadiad,,368296,
3,,,,NaT,,,,2018-02-22 21:15:16.544898,2018-02-22 08:00:00,,...,,,649368583,,,,nadiad,,368294,
4,,,,2018-03-06 08:00:00,,9.0,,2018-03-05 20:19:43.029910,2018-02-26 08:00:00,,...,,,553513823,,,,nadiad,,365251,


In [15]:
query = "SELECT scheduled_procedures.*, sps.subject_id, specimens.name AS mouse_name \
FROM scheduled_procedures \
JOIN users ON users.id = scheduled_procedures.assigned_to_id \
JOIN scheduled_procedures_subjects sps ON scheduled_procedures.id = sps.scheduled_procedure_id \
JOIN specimens ON sps.subject_id = specimens.id \
WHERE users.login = 'nadiad' and scheduled_procedures.lab_request_status ='active'"
df = get_lims_dataframe(query)
df.head()

Unnamed: 0,task_type,lab_request_status,task_id,department_id,created_at,mouse_name,updated_at,requested_date_range_start,subject_id,batch_name,assigned_to_id,requested_by_id,comments,scheduled_date,methods_versions_map,task_flow_step_id,project_id,location_id,id,requested_date_range_end
0,Task,active,665597226,,2018-02-20 20:13:38.828488,Slc17a7-IRES2-Cre;Camk2a-tTA;Ai93-356797,2018-03-05 23:34:11.840064,2018-02-20 08:00:00,646498173,dillanb_C600_Brain Observatory Visual Coding_2...,186,649368583,,2018-03-05 08:00:00,{},563649249,305493902,,665597231,2018-02-20 08:00:00
1,Task,active,667196096,,2018-02-26 18:56:38.688319,Nr5a1-Cre;Camk2a-tTA;Ai93-361002,2018-02-26 18:56:38.688319,2018-02-26 08:00:00,650072699,dillanb_C600_Brain Observatory Visual Coding_2...,186,553513823,,NaT,{},563649249,305493902,,667196099,2018-02-26 08:00:00
2,Task,active,667196240,,2018-02-26 18:56:41.773087,Cux2-CreERT2;Camk2a-tTA;Ai93-360565,2018-02-26 18:56:41.773087,2018-02-26 08:00:00,649496259,dillanb_C600_Brain Observatory Visual Coding_2...,186,553513823,,NaT,{},563649249,305493902,,667196257,2018-02-26 08:00:00
3,Task,active,667196077,,2018-02-26 18:56:35.046602,Slc17a7-IRES2-Cre;Camk2a-tTA;Ai93-365251,2018-03-05 23:34:22.265162,2018-02-26 08:00:00,654638707,carriea_C600_Brain Observatory Visual Coding_2...,186,553513823,,2018-03-06 08:00:00,"{u'task_flow_steps': [{u'id': 563649243, u'met...",563649249,305493902,,667196080,2018-02-26 08:00:00
4,Task,active,666487715,,2018-02-22 21:17:08.356640,Pvalb-IRES-Cre-367347,2018-02-22 21:17:08.356640,2018-02-22 08:00:00,657861412,carriea_MouseBrainCellAtlasTranssynaptic_BRAIN...,186,649368583,,NaT,"{u'task_flow_steps': [{u'id': None, u'methods_...",656499942,654688276,,666487718,2018-02-22 08:00:00


In [16]:
#Jim asked to generate a csv with the rows being each patch-seq cell and the columns being the number 
#of sweeps for each of those cells that are of a certain stim.description (name)

query = "SELECT cell.name AS cell_name, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%subthreshold%%' THEN 1 ELSE NULL END) AS subthreshold \
FROM specimens cell JOIN ephys_sweeps ess ON cell.id = ess.specimen_id \
JOIN ephys_stimuli ON ess.ephys_stimulus_id = ephys_stimuli.id \
WHERE cell.patched_cell_container NOTNULL \
GROUP BY cell.name"

df2 = get_lims_dataframe(query)
df2.head()

Unnamed: 0,subthreshold,cell_name
0,3,Oxtr-T2A-Cre;Ai14-351471.04.01.01
1,0,Slc32a1-IRES-Cre;Ai14-326812.04.02.05
2,0,Slc32a1-IRES-Cre;Ai14-305535.06.02.01
3,0,Gad2-IRES-Cre;Ai14-267341.07.02.02
4,0,Slc17a6-IRES-Cre;Ai14-309388.03.01.01


In [17]:
query = "SELECT cell.name AS cell_name, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%subthreshold%%' THEN 1 ELSE NULL END) AS subthreshold, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C1LSFINEST150112%%' THEN 1 ELSE NULL END) AS longsquare, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C1SSFINEST150112%%' THEN 1 ELSE NULL END) AS shortsquare, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C1RP25PR1S141203%%' THEN 1 ELSE NULL END) AS ramp, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%Blip%%' THEN 1 ELSE NULL END) AS cap_check, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C2CHIRP171103%%' THEN 1 ELSE NULL END) AS chirp, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C1NSD1SHORT17110%%' THEN 1 ELSE NULL END) AS noise_1, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C1NSD2SHORT17110%%' THEN 1 ELSE NULL END) AS noise_2, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C2SSTRIPLE171103%%' THEN 1 ELSE NULL END) AS triple \
FROM specimens cell JOIN ephys_sweeps ess ON cell.id = ess.specimen_id \
JOIN ephys_stimuli ON ess.ephys_stimulus_id = ephys_stimuli.id \
WHERE cell.patched_cell_container NOTNULL \
GROUP BY cell.name"

df2 = get_lims_dataframe(query)
df2.head()

Unnamed: 0,noise_1,ramp,shortsquare,chirp,noise_2,subthreshold,cap_check,longsquare,triple,cell_name
0,0,2,5,0,0,3,1,4,0,Oxtr-T2A-Cre;Ai14-351471.04.01.01
1,0,0,0,0,0,0,0,4,0,Slc32a1-IRES-Cre;Ai14-326812.04.02.05
2,0,1,3,0,0,0,0,1,0,Slc32a1-IRES-Cre;Ai14-305535.06.02.01
3,0,3,7,0,0,0,0,0,0,Gad2-IRES-Cre;Ai14-267341.07.02.02
4,0,1,3,0,0,0,0,0,0,Slc17a6-IRES-Cre;Ai14-309388.03.01.01
