In [3]:
import pg8000          #pg8000 access SQL databases
import pandas as pd    #pandas will be needed to work in a dataframe

In [4]:
#code from Agata
#these are nice functions to open LIMS, make a query and then close LIMS after

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        
        #THESE ARE IMPORTANT!!!!!!
        #Every query needs to be closed when done
        cursor.close()             
        conn.close()
    return results


#this last function will take our query results and put them in a dataframe so that they are easy to work with
def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

## Find how many cells took longer to find instanteous threshold than they did to find rheobase (ie more SSFINEST than LSFINEST sweeps).

In [5]:
query = "SELECT cell.name AS cell_name, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C1LSFINEST150112%%' THEN 1 ELSE NULL END) AS longsquare, \
COUNT(CASE WHEN ephys_stimuli.description LIKE '%%C1SSFINEST150112%%' THEN 1 ELSE NULL END) AS shortsquare \
FROM specimens cell JOIN ephys_sweeps ess ON cell.id = ess.specimen_id \
JOIN ephys_stimuli ON ess.ephys_stimulus_id = ephys_stimuli.id \
WHERE cell.patched_cell_container NOTNULL \
GROUP BY cell.name"

df2 = get_lims_dataframe(query)
df2.head()

Unnamed: 0,shortsquare,longsquare,cell_name
0,5,4,Oxtr-T2A-Cre;Ai14-351471.04.01.01
1,0,4,Slc32a1-IRES-Cre;Ai14-326812.04.02.05
2,3,1,Slc32a1-IRES-Cre;Ai14-305535.06.02.01
3,7,0,Gad2-IRES-Cre;Ai14-267341.07.02.02
4,3,0,Slc17a6-IRES-Cre;Ai14-309388.03.01.01


In [6]:
df2[df2['shortsquare'] > df2['longsquare']].count()

shortsquare    3353
longsquare     3353
cell_name      3353
dtype: int64

## Show the distribution of hemispheres all the cells patched in 2017 with the age and sex of the mice used.

In [29]:
query = "SELECT * FROM ephys_roi_results"
query2= "SELECT * FROM specimens"
query3= "SELECT * FROM donors"

df3 = limsquery(query)
df4 = limsquery(query2)
df5 = limsquery(query3)

print df3[0].keys()
print " "
print df4[0].keys()
print " "
print df5[0].keys()

['rig_name', 'ephys_qc_criteria_id', 'failed_bad_rs', 'updated_at', 'storage_directory', 'electrode_0_pa', 'input_resistance_mohm', 'id', 'stage2_reviewer_id', 'blowout_mv', 'failed_other', 'sampling_rate', 'input_access_resistance_ratio', 'failed_no_seal', 'workflow_state', 'ephys_specimen_roi_plan_id', 'initial_access_resistance_mohm', 'qc_notes', 'recording_date', 'created_at', 'seal_gohm', 'published_at', 'failed_clogged_pipette', 'stage1_reviewer_id', 'failed_electrode_0', 'notes']
 
['cell_depth', 'ephys_roi_result_id', 'parent_y_coord', 'reference_space_id', 'updated_at', 'cell_label', 'preparation_method_id', 'parent_x_coord', 'location_id', 'id', 'cortex_layer_id', 'plane_of_section_id', 'frozen_at', 'flipped_specimen_id', 'data', 'pinned_radius', 'rna_integrity_number', 'histology_well_name', 'created_by', 'priority', 'parent_id', 'ephys_start_time_sec', 'project_id', 'alignment3d_id', 'carousel_well_name', 'patched_cell_container', 'updated_by', 'cell_prep_id', 'biophysical_

In [38]:
query = "SELECT err.recording_date, s.donor_id, s.hemisphere_id, d.age_id, d.gender_id \
FROM ephys_roi_results err \
JOIN specimens s ON s.ephys_roi_result_id = err.id \
JOIN donors d ON s.donor_id = d.id \
WHERE s.hemisphere_id = 1.0 OR s.hemisphere_id = 2.0 \
AND (err.recording_date > '2017-01-01' AND err.recording_date < '2017-12-31')"

df = get_lims_dataframe(query)
df.tail()

Unnamed: 0,gender_id,donor_id,hemisphere_id,recording_date,age_id
3094,2,642979453,2,2017-10-19 21:57:10,20
3095,1,643830470,2,2017-10-25 18:01:20,20
3096,1,643830470,2,2017-10-25 20:12:41,20
3097,1,643830482,1,2017-10-26 18:00:08,20
3098,2,643830492,2,2017-10-27 21:09:28,20


## Look for cells that do not have a qc reviewer and failed qc for project T301. What are the cell level qc failures? (access, gohm seal, blowout, input resistance)

In [39]:
query= "SELECT err.stage1_reviewer_id, err.workflow_state, err.stage2_reviewer_id, proj.code, \
err.failed_bad_rs, err.failed_electrode_0, err.failed_no_seal, err.blowout_mv, \
err.initial_access_resistance_mohm \
FROM ephys_roi_results err JOIN specimens s ON s.ephys_roi_result_id = err.id \
JOIN projects proj ON s.project_id = proj.id \
WHERE proj.code = 'T301' \
AND workflow_state = 'auto_failed'"

df = get_lims_dataframe(query)
df


Unnamed: 0,code,failed_bad_rs,workflow_state,stage2_reviewer_id,blowout_mv,stage1_reviewer_id,failed_electrode_0,initial_access_resistance_mohm,failed_no_seal
0,T301,False,auto_failed,,,,True,,
1,T301,False,auto_failed,,,,False,,
2,T301,False,auto_failed,,,,True,,
3,T301,False,auto_failed,,,,True,,
4,T301,False,auto_failed,,,,False,,
5,T301,False,auto_failed,,,,True,,
6,T301,False,auto_failed,,,,True,,
7,T301,False,auto_failed,,,,True,,
8,T301,False,auto_failed,,,,True,,
9,T301,False,auto_failed,,,,True,,
