# Import Section

In [1]:
import pg8000          #pg8000 access SQL databases
import pandas as pd    #pandas will be needed to work in a dataframe

In [2]:
#Import necessary functions
import pandas as pd
import numpy as np
import seaborn as sns

#Makes a white backround with gray horizontal gridmarks at y-values
sns.set_style("whitegrid")

# Definitions created by Agata to access LIMS to pull database

In [3]:
#code from Agata
#these are nice functions to open LIMS, make a query and then close LIMS after

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        
        #THESE ARE IMPORTANT!!!!!!
        #Every query needs to be closed when done
        cursor.close()             
        conn.close()
    return results


#this last function will take our query results and put them in a dataframe so that they are easy to work with
def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

# Rules in SQL

### Practice 

In [35]:
my_query = "SELECT * FROM specimens limit 10"
#we are going to select all columns in the specimens table and limit our search to the first 10

my_result = limsquery(my_query)

first_element = my_result
#now we only want to look at the first element of our result
#df = get_lims_dataframe(my_query)
print first_element
#df

[{'cell_depth': None, 'ephys_roi_result_id': None, 'parent_y_coord': 0, 'reference_space_id': None, 'updated_at': datetime.datetime(2016, 12, 16, 4, 54, 44, 477335), 'cell_label': None, 'preparation_method_id': None, 'parent_x_coord': 2, 'location_id': None, 'id': 556516441, 'cortex_layer_id': None, 'plane_of_section_id': 11, 'frozen_at': None, 'flipped_specimen_id': 561557765, 'data': None, 'pinned_radius': None, 'rna_integrity_number': None, 'histology_well_name': None, 'created_by': None, 'priority': None, 'parent_id': 556516212, 'ephys_start_time_sec': None, 'project_id': 305094322, 'alignment3d_id': None, 'carousel_well_name': u'T301_122_161107_01_12', 'patched_cell_container': None, 'updated_by': None, 'cell_prep_id': None, 'biophysical_model_state': u'review_required', 'barcode': u'0556516441', 'storage_directory': None, 'x_coord': None, 'tissue_ph': None, 'specimen_preparation_method_id': None, 'donor_id': 555257198, 'operation_id': None, 'ephys_neural_tissue_plan_id': 55525724

In [12]:
my_query = "SELECT * FROM users LIMIT 10"
#we are going to select all columns in the specimens table and limit our search to the first 10

my_result = limsquery(my_query)

first_element = my_result[0]
#now we only want to look at the first element of our result

print first_element.keys()

['siv_default_ontology_id', 'created_at', 'siv_default_graph_id', 'updated_at', 'id', 'siv_default_cortex_layer_ontology_id', 'use_custom_header', 'siv_default_cortex_layer_graph_id', 'login', 'group_override']


In [15]:
my_query = "SELECT ra.amplified_quantity_ng, ra.name, ra.run_date, ra.percent_cdna_longer_than_400bp, ra.failed, \
ra.cycles, ra.id, rai.sample_id, cell.id, cell.name, cell.created_at, cell.patched_cell_container \
FROM specimens cell \
LEFT JOIN rna_amplification_inputs rai ON rai.sample_id = cell.id \
LEFT JOIN rna_amplifications ra ON ra.id = rai.rna_amplification_id \
WHERE run_date > '2017-01-01'"

my_result = limsquery(my_query)

first_element = my_result[0]
#now we only want to look at the first element of our result

print first_element

{'amplified_quantity_ng': 0.0, 'name': u'H16.06.012.11.03.01', 'run_date': datetime.datetime(2017, 1, 4, 8, 0), 'percent_cdna_longer_than_400bp': 0.0, 'created_at': datetime.datetime(2016, 9, 29, 21, 13, 38, 114713), 'failed': True, 'patched_cell_container': u'P1S4_160929_003_A01', 'sample_id': 548408304, 'cycles': 19, 'id': 548408304}


In [16]:
#The Query above has access to cycles which is hopefully PCR cycles.
#Access then restrict to only 21 PCR cycles

In [17]:
my_query = "SELECT cell.name AS cell_name, donors.full_genotype \
FROM specimens cell JOIN donors ON cell.donor_id = donors.id \
WHERE cell.ephys_roi_result_id IS NOT NULL"

my_result = limsquery(my_query)

first_element = my_result[0]
#now we only want to look at the first element of our result

print first_element.keys

{'full_genotype': None, 'cell_name': u'148989.01.01'}


## Try doing Picogreen vs User

In [10]:
#You need "Date", "User", "Rig #", "Picogreen conc. (pg/uL)", "PCR cycles", "SM_QC_PF", "Bad dates"