##### Imports

In [1]:
import pandas as pd
import numpy as np
import csv
import logging
import os
from scipy.stats import pearsonr

from temp_funcs import read_file, merge_dataframes, drop_cols, drop_nans, filter_date_range, \
filter_df, create_container_col, create_cond_df, create_container_df

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

sns.set(context = "notebook", style = "ticks", font="verdana", font_scale = 1.5)
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Paths/Fields

In [2]:
path = "C:/Users/kumar/Documents/Github/analysis_projects/"
csv_path = os.path.join(path, "csv/")
excel_path = os.path.join(path, "excel/")
plot_path = os.path.join(path, "plot/hl4_plot/")

path_shiny_human = os.path.join(csv_path, "current_shiny_human.csv")
path_ephys = os.path.join(csv_path, "ephys_hIVSCC_MET.csv")

In [3]:
fields_shiny = ["cell_name", "roi", "cell_specimen_project", "Norm_Marker_Sum.0.4_label",
                "marker_sum_norm_label", "rna_amplification_pass_fail",
                "percent_cdna_longer_than_400bp", "amplified_quantity_ng", "sample_id", 
                "Tree_call", "seurat_prediction_score_label", "seurat_cluster_label",
                "broad_class_label", "subclass_label", "go_no_go_63x", "image_series_63x_qc", "Genes.Detected.CPM"]

shiny_dict = {"Norm_Marker_Sum.0.4_label" : "NMS Pass/Fail",
              "rna_amplification_pass_fail" : "RNA Amp Pass/Fail",
              "marker_sum_norm_label": "Normalized marker sum",
              "percent_cdna_longer_than_400bp" : "cDNA quality (%>400 bp)",
              "amplified_quantity_ng" : "Amplified content (ng)",
              "seurat_prediction_score_label": "Seurat prediction score",
              "go_no_go_63x": "Morpho call(63x)",
              "image_series_63x_qc": "Imaging status",
              "Genes.Detected.CPM": "Genes detected"}

#Replacing string in specific columns
nms_dict = {True: "Pass", False: "Fail"}
morpho_dict = {"63x go": "Go", "63x no go": "No Go"}
imaging_dict = {"ready_for_dendrite_trace": "Ready for dendrite trace", "image_qc_passed": "Image QC passed", 
                "qc": "QC", "deferred": "Deferred", "autotracing": "Autotracing", "rescanning": "Rescanning",
                "processing": "Processing", "failed": "Failed"}

##### Filtering shiny

In [4]:
shiny_human = read_file(path_shiny_human, fields_shiny) #3592 rows
shiny_human.rename(columns=shiny_dict, inplace=True)

INFO:temp_funcs:Read file in as a pandas dataframe


In [5]:
shiny_human["NMS Pass/Fail"] = shiny_human["NMS Pass/Fail"].replace(nms_dict)
shiny_human["Morpho call(63x)"] = shiny_human["Morpho call(63x)"].replace(morpho_dict)
shiny_human["Imaging status"] = shiny_human["Imaging status"].replace(imaging_dict)
shiny_human["cDNA quality (%>400 bp)"] = shiny_human["cDNA quality (%>400 bp)"] * 100

In [6]:
shiny_human["region"] = pd.np.where(shiny_human.roi.str.contains("FCx"), "FCx", 
                                    pd.np.where(shiny_human.roi.str.contains("OCx"), "OCx", 
                                    pd.np.where(shiny_human.roi.str.contains("PCx"), "PCx",
                                    pd.np.where(shiny_human.roi.str.contains("TCx"), "TCx", "Unknown/Misc"))))

shiny_human["layer"] = pd.np.where(shiny_human.roi.str.contains("L1"), "L1", 
                                   pd.np.where(shiny_human.roi.str.contains("L2"), "L2",
                                   pd.np.where(shiny_human.roi.str.contains("L3"), "L3",
                                   pd.np.where(shiny_human.roi.str.contains("L4"), "L4", 
                                   pd.np.where(shiny_human.roi.str.contains("L5"), "L5",
                                   pd.np.where(shiny_human.roi.str.contains("L6"), "L6",
                                   pd.np.where(shiny_human.roi.str.contains("L2-3"), "L2-3",
                                   pd.np.where(shiny_human.roi.str.contains("L3a"), "L3",
                                   pd.np.where(shiny_human.roi.str.contains("L3b"), "L3",
                                   pd.np.where(shiny_human.roi.str.contains("L3c"), "L3",
                                   pd.np.where(shiny_human.roi.str.contains("L6a"), "L6",
                                   pd.np.where(shiny_human.roi.str.contains("LL1"), "LL1", 
                                   pd.np.where(shiny_human.roi.str.contains("LL2"), "LL2", 
                                   pd.np.where(shiny_human.roi.str.contains("LL3"), "LL3",
                                   pd.np.where(shiny_human.roi.str.contains("LL4"), "LL4", 
                                   pd.np.where(shiny_human.roi.str.contains("LL5"), "LL5", "unknown_layer"))))))))))))))))

shiny_human["imaging_recon"] = pd.np.where(shiny_human["Imaging status"].str.contains("Ready for dendrite trace"), "Possible Reconstructions", 
                                           pd.np.where(shiny_human["Imaging status"].str.contains("Image QC passed"), "Possible Reconstructions", 
                                           pd.np.where(shiny_human["Imaging status"].str.contains("Autotracing"), "Possible Reconstructions", 
                                           pd.np.where(shiny_human["Imaging status"].str.contains("Deferred"), "Failed Reconstructions", 
                                           pd.np.where(shiny_human["Imaging status"].str.contains("Failed"), "Failed Reconstructions", "In Process")))))

##### h_l4

In [7]:
h_l4 = filter_df(shiny_human, "layer", "L4")
h_l4.count()

INFO:temp_funcs:Filtered dataframe based on layer == L4


sample_id                  474
cell_name                  474
roi                        474
cDNA quality (%>400 bp)    474
RNA Amp Pass/Fail          474
Amplified content (ng)     474
Genes detected             474
cell_specimen_project      473
Morpho call(63x)           350
Imaging status             138
Normalized marker sum      474
NMS Pass/Fail              474
seurat_cluster_label       474
Seurat prediction score    474
broad_class_label          474
subclass_label             474
Tree_call                  474
region                     474
layer                      474
imaging_recon              474
dtype: int64

In [10]:
h_l4 = filter_df(h_l4, "seurat_cluster_label", "Exc L3-5 RORB ESR1")

INFO:temp_funcs:Filtered dataframe based on seurat_cluster_label == Exc L3-5 RORB ESR1


In [11]:
h_l4["seurat_cluster_label"].unique()

array(['Exc L3-5 RORB ESR1'], dtype=object)

In [13]:
h_l4.count()

sample_id                  105
cell_name                  105
roi                        105
cDNA quality (%>400 bp)    105
RNA Amp Pass/Fail          105
Amplified content (ng)     105
Genes detected             105
cell_specimen_project      105
Morpho call(63x)            92
Imaging status              34
Normalized marker sum      105
NMS Pass/Fail              105
seurat_cluster_label       105
Seurat prediction score    105
broad_class_label          105
subclass_label             105
Tree_call                  105
region                     105
layer                      105
imaging_recon              105
dtype: int64

In [18]:
h_l4.to_csv(excel_path + "ESR1.csv")