In [1]:
import csv
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime

In [2]:
pd.options.display.max_columns = None #Displays all columns

In [3]:
def gen_fil(df, col, cond):
    '''
    Filters the column with a certain condition
    df: dataframe
    col: column
    cond: condition
    '''
    filtered = df[df[col] == cond]
    return filtered

def gen_cat(df, col):
    '''
    Assigns astype to the column
    df: dataframe
    col: column
    '''    
    category = df[col].astype('category')
    return category

def cell_count(P_num):
    '''
    Counts number of Lims tube id based on P# as number of cells
    df = dataframe
    count() = counts number
    '''
    user = df1[df1['Lims tube id'].str.contains(P_num)]
    return user['Lims tube id'].count()

def P_user(P_num):
    '''
    Prints cell_count 
    '''
    print("Total cells:", cell_count(P_num))

In [4]:
def nucleated(x):
    nuc_high_seal = x[(x['Post patch?'] == 'Nucleated') | 
                      (((x['Post patch?'] == 'nucleus_visible') | 
                      (x['Post patch?'] == 'nucleus_present')) & 
                      (x['Post patch pipette R'] >= 500))]
    return nuc_high_seal

def partial_nucleated(y):
    nuc_low_seal = y[(y['Post patch?'] == 'Partial-Nucleus') | 
                     (((y['Post patch?'] == 'nucleus_present') | 
                     (y['Post patch?'] == 'nucleus_visible')) & 
                     (y['Post patch pipette R'] <= 499))]
    return nuc_low_seal

def outside_out(z):
    no_high_seal = z[(z['Post patch?'] == 'Outside-Out') | 
                     (((z['Post patch?'] == 'nucleus_absent') | 
                     (z['Post patch?'] == 'no_nucleus_visible')) & 
                     (z['Post patch pipette R'] >= 500))]
    return no_high_seal

def no_seal(w): 
    no_low_seal = w[(w['Post patch?'] == 'No-Seal') | 
                    (((w['Post patch?'] == 'nucleus_absent') | 
                    (w['Post patch?'] == 'no_nucleus_visible')) & 
                    (w['Post patch pipette R'] <= 499))]
    return no_low_seal
    
def entire_cell(v):
    entire = v[(v['Post patch?'] == 'Entire-Cell') | 
               (v['Post patch?'] == 'entire_cell')]
    return entire

#variable['post_patch'] = 'Term'
#Term is an output displayed in the Post_Patch column

def reclassify(df):
    nu = nucleated(df)
    nu['post_patch'] = 'Nuc-high seal' 
    oo = outside_out(df)
    oo['post_patch'] = 'No-high seal'
    pn = partial_nucleated(df)
    pn['post_patch'] = 'Nuc-low seal'
    ns = no_seal(df)
    ns['post_patch'] = 'No-low seal'
    ec = entire_cell(df)
    ec['post_patch'] = 'Entire cell'
    return  nu, oo, pn, ns, ec

def concat_df(a, b, c, d, e):
    frames = (a, b, c, d, e)
    df = pd.concat(frames)
    return df

def postpatch_reclass(df):
    return concat_df(*reclassify(df))

#df = postpatch_reclass(df)

In [5]:
df = pd.read_csv('C:/users/kumar/allen-institute/personal-projects/csv/patch_seq_log_mouse.csv')
#parse_dates=['Date'])

list_initial = ['Date',
                'File',
                'User',
                'cell type',
                'Post patch?',
                'Post patch pipette R',
#                'Nucleus sucked in?',
                'Time spent extracting cytosol',
                'Time spent retracting pipette',
                'patch duration',
                'Lims tube id',
#                'Well ID #',
                'PCR cycles',
                'SM_QC_PF']
df = df[list_initial]
df.head()

Unnamed: 0,Date,File,User,cell type,Post patch?,Post patch pipette R,Time spent extracting cytosol,Time spent retracting pipette,patch duration,Lims tube id,PCR cycles,SM_QC_PF
0,170104,Gad2-IRES-Cre;Ai14-292660.07.01.01,P1,tdt+,Outside-Out,,2.3,,11.78333333,P1S4_170104_001_A01,18.0,pass
1,170104,Gad2-IRES-Cre;Ai14-292660.07.01.02,P1,tdt+,No-Seal,,2.316666667,,12.21666667,P1S4_170104_002_A01,18.0,pass
2,170104,Gad2-IRES-Cre;Ai14-292660.07.01.03,P1,tdt+,No-Seal,,2.45,,11.78333333,P1S4_170104_003_A01,18.0,pass
3,170104,Gad2-IRES-Cre;Ai14-292660.08.01.01,P1,tdt+,No-Seal,,2.316666667,,12.45,P1S4_170104_004_A01,18.0,pass
4,170104,Gad2-IRES-Cre;Ai14-292660.08.01.02,P1,tdt+,Outside-Out,,2.35,,12.36666667,P1S4_170104_005_A01,18.0,pass


In [6]:
#Changing to string to add 20 in front of strings within date column
df['Date'] = df['Date'].astype('str')
df['Date'] = '20' + df['Date']
df['Date'] = pd.to_datetime(df['Date']) #Converting date to YYYY-MM-DD

df = df.set_index('Date') #Set date column as index column
df.sort_index(inplace = True) #Sort the date column just in case

start_date = input('Enter the start date for your dataframe (YYYY-MM-DD): ')
#Production Date Start = '2017-10-01' 
df = df[start_date:] #Choosing start & end date

Enter the start date for your dataframe (YYYY-MM-DD): 2017-10-01


In [7]:
df.head()

Unnamed: 0_level_0,File,User,cell type,Post patch?,Post patch pipette R,Time spent extracting cytosol,Time spent retracting pipette,patch duration,Lims tube id,PCR cycles,SM_QC_PF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-10-02,Chrna2-Cre_OE25;Ai14-351067.03.02.02,P8,tdt+,nucleus_absent,30.0,0.733333333,5.4,13.76666667,P8S4_171002_357_A01,21.0,fail
2017-10-02,Oxtr-T2A-Cre;Ai14-351467.03.01.01,P9,tdt+,Nucleated,,0.61666667,,5.33333333,P9S4_171002_401_A01,21.0,pass
2017-10-02,Gad2-IRES-Cre;Ai14-350672.03.01.01,P9,tdt+,nucleus_present,1000.0,0.283333333,1.766666667,7.283333333,P9S4_171002_402_A01,21.0,pass
2017-10-02,Gad2-IRES-Cre;Ai14-350672.04.01.01,P9,tdt+,nucleus_absent,1000.0,0.45,1.316666667,6.416666667,P9S4_171002_403_A01,21.0,fail
2017-10-02,Gad2-IRES-Cre;Ai14-350672.04.01.03,P9,tdt+,nucleus_absent,4.7,0.516666667,1.766666667,5.416666667,P9S4_171002_405_A01,21.0,fail


In [8]:
#Categories
df['User'] = gen_cat(df, 'User')
df['PCR cycles'] = gen_cat(df, 'PCR cycles')
df['SM_QC_PF'] = gen_cat(df, 'SM_QC_PF')

#Filtering 
df = gen_fil(df, 'cell type', 'Cre+')
df = gen_fil(df, 'PCR cycles', 21)
df = gen_fil(df, 'SM_QC_PF', 'pass')

df = postpatch_reclass(df) #Adding post_patch column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

In [9]:
df.head()

Unnamed: 0_level_0,File,User,cell type,Post patch?,Post patch pipette R,Time spent extracting cytosol,Time spent retracting pipette,patch duration,Lims tube id,PCR cycles,SM_QC_PF,post_patch
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.01.02,PA,Cre+,nucleus_present,1000.0,0.766666667,2.483333333,8.566666667,PAS4_171207_454_A01,21.0,pass,Nuc-high seal
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.01.01,PA,Cre+,nucleus_present,2000.0,1.016666667,2.316666667,8.75,PAS4_171207_453_A01,21.0,pass,Nuc-high seal
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.02.02,PA,Cre+,nucleus_present,1000.0,0.433333333,3.116666667,18.55,PAS4_171207_452_A01,21.0,pass,Nuc-high seal
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.02.01,PA,Cre+,nucleus_present,2000.0,0.95,2.366666667,9.283333333,PAS4_171207_451_A01,21.0,pass,Nuc-high seal
2017-12-07,Cux2-CreERT2;Ai14-362878.04.01.02,P9,Cre+,nucleus_present,2000.0,0.466666667,2.75,7.083333333,P9S4_171207_402_A01,21.0,pass,Nuc-high seal


In [10]:
df.tail()

Unnamed: 0_level_0,File,User,cell type,Post patch?,Post patch pipette R,Time spent extracting cytosol,Time spent retracting pipette,patch duration,Lims tube id,PCR cycles,SM_QC_PF,post_patch
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-04-10,Vip-IRES-Cre;Ai14-383085.05.01.01,P1,Cre+,entire_cell,4000.0,0.4,1.25,13.31666667,P1S4_180410_001_A01,21.0,pass,Entire cell
2018-04-11,Ndnf-IRES2-dgCre;Slc17a6-IRES2-FlpO;Ai65-38393...,P9,Cre+,entire_cell,6.9,0.583333333,7.066666667,14.43333333,P9S4_180411_407_A01,21.0,pass,Entire cell
2018-04-13,Gad2-IRES-Cre;Ai14-383745.03.01.02,P9,Cre+,entire_cell,0.0,0.583333333,5.6,12.05,P9S4_180413_403_A01,21.0,pass,Entire cell
2018-04-25,Th-P2A-FlpO;Ai65F-385999.04.01.02,PB,Cre+,entire_cell,2.0,1.716666667,0.15,16.26666667,PBS4_180425_502_A01,21.0,pass,Entire cell
2018-04-27,Gad2-IRES-Cre;Ai14-386058.02.01.03,P9,Cre+,entire_cell,7.0,0.566666667,0.666666667,7.85,P9S4_180427_405_A01,21.0,pass,Entire cell


In [11]:
df1 = df[['File', 'User', 'post_patch', 'Lims tube id']] #Creating a smaller dataframe

In [12]:
df1.head()

Unnamed: 0_level_0,File,User,post_patch,Lims tube id
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.01.02,PA,Nuc-high seal,PAS4_171207_454_A01
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.01.01,PA,Nuc-high seal,PAS4_171207_453_A01
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.02.02,PA,Nuc-high seal,PAS4_171207_452_A01
2017-12-07,Chrna2-Cre_OE25;Ai14-362810.05.02.01,PA,Nuc-high seal,PAS4_171207_451_A01
2017-12-07,Cux2-CreERT2;Ai14-362878.04.01.02,P9,Nuc-high seal,P9S4_171207_402_A01


In [13]:
#Outputs total number of cells
P_user('PA')

Total cells: 116
