##### Imports

In [1]:
import pandas as pd
import numpy as np
import csv
import os
from temp_funcs import read_file, merge_dataframes, drop_cols, drop_nans, filter_date_range, filter_df, create_container_col, create_cond_df

import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context = "notebook", style = "white", font="verdana") # font_scale = 1.35)

##### Pandas Display Settings

In [2]:
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Lists/Paths

In [3]:
csv_path = "C:/Users/ramr/Documents/Github/analysis_projects/csv/ivscc_data_blitz"

path_main_jem = "//allen/programs/celltypes/workgroups/279/Patch-Seq/compiled-jem-data"
path_jem = os.path.join(path_main_jem, "jem_metadata.csv")
path_jem_na = os.path.join(path_main_jem, "jem_metadata_tubeNA.csv")
path_jem_fail = os.path.join(path_main_jem, "jem_metadata_wFAILURE.csv")

path_shiny_mouse = "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/star/mouse_patchseq_VISp_current/mapping.df.with.bp.40.lastmap.csv"
path_ephys = os.path.join(csv_path, "ephys_features.csv")

In [4]:
fields_jem = ["name", "date", "organism_name", "container", "rigOperator", "status", "roi_major", "roi_minor"]
fields_ephys = ["b'name'", "b'vrest'", "b'ri'", "b'sag'", "b'tau'",
                "b'upstroke_downstroke_ratio_long_square'", "b'latency'", "b'f_i_curve_slope'"]
fields_shiny = ["sample_id", "cell_name", "percent_cdna_longer_than_400bp", "rna_amplification_pass_fail",
                "amplified_quantity_ng", "Genes.With.CPM", "marker_sum_norm_label", "Norm_Marker_Sum.0.4_label"]

In [5]:
jem = read_file(path_jem, fields_jem) #20843 rows
ephys = read_file(path_ephys, fields_ephys) #8541 rows
shiny_mouse = read_file(path_shiny_mouse, fields_shiny) #10674 rows

INFO:temp_funcs:Read file in as a pandas dataframe
INFO:temp_funcs:Read file in as a pandas dataframe
INFO:temp_funcs:Read file in as a pandas dataframe


In [6]:
jem

Unnamed: 0,organism_name,name,date,rigOperator,status,roi_major,roi_minor,container
0,Mouse,Oxtr-T2A-Cre;Ai14-351467.03.01.01,2017-10-02 10:46:00 -0700,lisak,SUCCESS (high confidence),VISp,layer 2/3,P9S4_171002_401_A01
1,Mouse,Oxtr-T2A-Cre;Ai14-351467.05.02.02,2017-10-02 10:54:00 -0700,lindsayn,SUCCESS (high confidence),VISp,layer 5,P8S4_171002_352_A01
2,Mouse,Oxtr-T2A-Cre;Ai14-351467.05.02.03,2017-10-02 10:54:00 -0700,lindsayn,SUCCESS (high confidence),VISp,layer 5,P8S4_171002_353_A01
3,Mouse,Oxtr-T2A-Cre;Ai14-351467.05.02.01,2017-10-02 10:54:00 -0700,lindsayn,SUCCESS (high confidence),VISp,layer 5,P8S4_171002_351_A01
4,Mouse,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-350934.04.01.01,2017-10-02 11:30:00 -0700,rustym,SUCCESS (high confidence),VISp,layer 4,P2S4_171002_051_A01
...,...,...,...,...,...,...,...,...
15704,Human,H20.03.302.11.07.01.01.06,,,,,,PMS4_200121_236_A01
15705,Human,H18.29.138.11.32.04,,,,,,PHS4_181212_304_A01
15706,Human,H19.29.164.11.11.01,,,,,,PHS4_191113_101_A01
15707,Macaca nemestrina,Q20.26.028.1A.22.04.02,,,,,,PQS4_201216_328_A01


In [7]:
merge_sj = merge_dataframes(shiny_mouse, jem, "cell_name", "name", "inner") #2850 rows (even if how=left)
merge_all = merge_dataframes(merge_sj, ephys, "cell_name", "b'name'", "inner") #2635 rows

INFO:temp_funcs:Merged two pandas dataframe into one dataframe
INFO:temp_funcs:Merged two pandas dataframe into one dataframe


In [9]:
merge_all = filter_date_range(merge_all, "date") #2849 rows

INFO:temp_funcs:Filtered dataframe: 2017-01-01 - 2021-12-31


In [11]:
merge_all["roi_major"].uniqueue()

array(['VISp', 'MOp', 'TEa', 'HIP', 'RSPd', 'SSp', 'RSPv', 'CTXsp', 'ORB',
       'MOs', 'HY', 'RSP', 'PAL', 'STRd', 'EPI', 'MB'], dtype=object)

In [12]:
merge_all = filter_df(merge_all, "roi_major", "EPI")
merge_all

INFO:temp_funcs:Filtered dataframe based on roi_major == EPI


Unnamed: 0,sample_id,cell_name,percent_cdna_longer_than_400bp,rna_amplification_pass_fail,amplified_quantity_ng,Genes.With.CPM,marker_sum_norm_label,Norm_Marker_Sum.0.4_label,organism_name,name,date,rigOperator,status,roi_major,roi_minor,container,b'name',b'vrest',b'ri',b'sag',b'tau',b'upstroke_downstroke_ratio_long_square',b'latency',b'f_i_curve_slope'
7560,PAS4_201109_452_A01,Rorb-IRES2-Cre-neo;Ai14-553330.10.09.02,0.469,Pass,5.722739,5229,0.468011,True,Mouse,Rorb-IRES2-Cre-neo;Ai14-553330.10.09.02,2020-11-09 09:51:16 -0800,ramr,SUCCESS,EPI,MH,PAS4_201109_452_A01,Rorb-IRES2-Cre-neo;Ai14-553330.10.09.02,,,,,,,
8774,P8S4_201112_354_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.09.09.02,0.546,Pass,5.657634,4764,0.054154,False,Mouse,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.09.09.02,2020-11-12 13:26:51 -0800,lindsayn,SUCCESS,EPI,MH,P8S4_201112_354_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.09.09.02,,,,,,,
8568,P8S4_201112_353_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.09.09.01,0.647,Pass,12.890235,8266,0.408199,True,Mouse,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.09.09.01,2020-11-12 13:26:51 -0800,lindsayn,SUCCESS,EPI,MH,P8S4_201112_353_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.09.09.01,,,,,,,
8524,PJS4_201112_803_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.08.09.01,0.535,Pass,8.309376,7628,0.394313,False,Mouse,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.08.09.01,2020-11-12 14:36:17 -0800,sarav,SUCCESS,EPI,LH,PJS4_201112_803_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.08.09.01,,,,,,,
8380,P8S4_201112_355_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.10.09.01,0.585,Pass,9.861348,7517,0.324647,False,Mouse,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.10.09.01,2020-11-12 15:49:45 -0800,lindsayn,SUCCESS,EPI,MH,P8S4_201112_355_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.10.09.01,,,,,,,
7023,PJS4_201112_805_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.07.09.02,0.557,Pass,11.078461,8834,0.429342,True,Mouse,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.07.09.02,2020-11-12 16:07:10 -0800,sarav,SUCCESS,EPI,LH,PJS4_201112_805_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.07.09.02,,,,,,,
7877,PJS4_201112_804_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.07.09.01,0.584,Pass,7.581815,7403,0.44071,True,Mouse,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.07.09.01,2020-11-12 16:07:10 -0800,sarav,SUCCESS,EPI,LH,PJS4_201112_804_A01,Nos1-CreERT2;Sst-IRES-FlpO;Ai65-553471.07.09.01,,,,,,,
9002,PJS4_201113_801_A01,Ctgf-T2A-dgCre;Ai14-553532.09.09.01,0.561,Pass,8.361602,7726,0.272719,False,Mouse,Ctgf-T2A-dgCre;Ai14-553532.09.09.01,2020-11-13 11:50:34 -0800,sarav,SUCCESS,EPI,MH,PJS4_201113_801_A01,Ctgf-T2A-dgCre;Ai14-553532.09.09.01,,,,,,,
8071,PES4_201113_651_A01,Ctgf-T2A-dgCre;Ai14-553532.10.09.01,0.704,Pass,17.204155,10699,0.331183,False,Mouse,Ctgf-T2A-dgCre;Ai14-553532.10.09.01,2020-11-13 12:45:37 -0800,katherineb,SUCCESS,EPI,MH,PES4_201113_651_A01,Ctgf-T2A-dgCre;Ai14-553532.10.09.01,,,,,,,
8576,PJS4_201116_803_A01,Pvalb-IRES-Cre;Ai14-553659.09.09.03,0.352,Fail,8.825953,7987,0.283086,False,Mouse,Pvalb-IRES-Cre;Ai14-553659.09.09.03,2020-11-16 14:42:42 -0800,sarav,SUCCESS,EPI,MH,PJS4_201116_803_A01,Pvalb-IRES-Cre;Ai14-553659.09.09.03,,,,,,,
