##### Imports

In [1]:
import pandas as pd
import numpy as np
import csv
import logging
import os

from functions.temp_funcs import read_file, merge_dataframes, drop_cols, drop_nans, filter_date_range, \
filter_df, create_container_col, create_cond_df

import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context = "notebook", style = "white", font="verdana") # font_scale = 1.35)

In [2]:
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Paths

In [3]:
path = "C:/Users/kumar/Documents/Github/analysis_projects/analysis_scripts/"
csv_path = os.path.join(path, "csv/")
excel_path = os.path.join(path, "excel/")
plot_path = os.path.join(path, "plot/")

In [4]:
path_shiny_mouse = os.path.join(csv_path, "current_mouse_shiny.csv")
path_shiny_human = os.path.join(csv_path, "current_human_shiny.csv")

##### Project Goal/Info

In [5]:
"""
As of now a cell must have an RNA-Amp Pass and NMS pass to move on for a reconstruction.
There are a percentage of cells that fail at RNA-Amp yet have NMS pass - these need to evaluated to 
determine quality and numbers. 

Items to sort:
Project code (hIVSCC-MET, hIVSCC-METc, hIVSCC-METx, mIVSCC-MET)

Variables:
NMS pass or fail
NMS score
RNA_amp pass or fail
%>400 bp --- cDNA quality
Concentration pg/ul BA  ----Can't find it?
pg yield BA----amplified content same as cDNA quantity (picogreen yield)
"""

"\nAs of now a cell must have an RNA-Amp Pass and NMS pass to move on for a reconstruction.\nThere are a percentage of cells that fail at RNA-Amp yet have NMS pass - these need to evaluated to \ndetermine quality and numbers. \n\nItems to sort:\nProject code (hIVSCC-MET, hIVSCC-METc, hIVSCC-METx, mIVSCC-MET)\n\nVariables:\nNMS pass or fail\nNMS score\nRNA_amp pass or fail\n%>400 bp --- cDNA quality\nConcentration pg/ul BA  ----Can't find it?\npg yield BA----amplified content same as cDNA quantity (picogreen yield)\n"

In [6]:
"""
shiny columns:

["cell_specimen_project", "Norm_Marker_Sum.0.4_label", "marker_sum_norm_label",
"rna_amplification_pass_fail", "percent_cdna_longer_than_400bp", "amplified_quantity_ng"]

"""

'\nshiny columns:\n\n["cell_specimen_project", "Norm_Marker_Sum.0.4_label", "marker_sum_norm_label",\n"rna_amplification_pass_fail", "percent_cdna_longer_than_400bp", "NOT SURE", "amplified_quantity_ng"]\n\n'

##### Start Code

In [18]:
fields_shiny_mouse = ["cell_name_label", "cell_specimen_project_label", "Norm_Marker_Sum.0.4_label",
                      "marker_sum_norm_label", "rna_amp_pass_fail_label"] 
                #"percent_cdna_longer_than_400bp", "amplified_quantity_ng"]
shiny_mouse = read_file(path_shiny_mouse, fields_shiny_mouse)

INFO:functions.temp_funcs:Read file in as a pandas dataframe


In [19]:
fields_shiny_human = ["cell_name_label","cell_specimen_project_label", "Norm_Marker_Sum.0.4_label",
                      "marker_sum_norm_label"] 
shiny_human = read_file(path_shiny_human, fields_shiny_human)

INFO:functions.temp_funcs:Read file in as a pandas dataframe


In [20]:
shiny_mouse

Unnamed: 0,cell_name_label,cell_specimen_project_label,rna_amp_pass_fail_label,marker_sum_norm_label,Norm_Marker_Sum.0.4_label
0,Vipr2-IRES2-Cre;Slc32a1-T2A-FlpO;Ai65-338917.03.01.02,T301x,Pass,0.380674,False
1,Crh-IRES-Cre_ZJH;Sst-IRES-FlpO;Ai65-362357.04.01.02,mIVSCC-MET,Fail,0.717450,True
2,Slc32a1-IRES-Cre;Ai14-321584.04.01.01,T301x,Pass,0.506820,True
3,Slc32a1-IRES-Cre;Ai14-326815.04.02.05,T301x,Pass,0.904325,True
4,Sncg-IRES2-FlpO-neo;Ai65F-487894.09.01.01,mIVSCC-METx,Pass,0.853235,True
...,...,...,...,...,...
12284,ZZ_Missing,ZZ_Missing,ZZ_Missing,1.206065,True
12285,ZZ_Missing,ZZ_Missing,ZZ_Missing,0.895267,True
12286,ZZ_Missing,ZZ_Missing,ZZ_Missing,1.065042,True
12287,ZZ_Missing,ZZ_Missing,ZZ_Missing,0.883404,True


In [21]:
shiny_human

Unnamed: 0,cell_name_label,cell_specimen_project_label,marker_sum_norm_label,Norm_Marker_Sum.0.4_label
0,H19.06.351.11.16.01.01,hIVSCC-MET,1.303453,True
1,H19.06.351.11.16.01.03,hIVSCC-MET,1.182129,True
2,H17.03.016.11.16.02,hIVSCC-MET,1.047215,True
3,H19.06.351.11.16.02.06,hIVSCC-MET,0.945765,True
4,H17.03.016.11.15.07,hIVSCC-MET,0.777364,True
...,...,...,...,...
3932,H200.1023.CX.09.013.519,ZZ_Missing,1.121556,True
3933,H200.1023.CX.09.010.364,ZZ_Missing,0.845011,True
3934,H200.1030.CX10.03.741,ZZ_Missing,0.721089,True
3935,H200.1030.CX10.03.761,ZZ_Missing,0.763140,True


In [None]:
fields_shiny = ["cell_specimen_project", "Norm_Marker_Sum.0.4_label", "marker_sum_norm_label",
                "rna_amplification_pass_fail", "percent_cdna_longer_than_400bp", "amplified_quantity_ng"]
shiny = read_file(path_shiny, fields_shiny)

In [None]:
shiny["cell_specimen_project"].unique() #10674 rows

In [None]:
shiny.dropna(subset=["cell_specimen_project"],inplace=True) #10669 rows

##### Filter project code

In [None]:
project_list = ["hIVSCC-MET", "hIVSCC-METc", "hIVSCC-METx", "mIVSCC-MET"]
fill_project_shiny = create_cond_df(shiny, "cell_specimen_project", project_list) #8623 rows

In [None]:
fill_project_shiny["cell_specimen_project"].unique()

##### Filter RNA Amp pass_fail

In [None]:
fill_project_shiny["rna_amplification_pass_fail"].unique()

In [None]:
fill_project_shiny["Norm_Marker_Sum.0.4_label"].unique()

In [None]:
fail_rna = filter_df(fill_project_shiny, "rna_amplification_pass_fail", "Fail") #1401 rows

##### Filter NMS True False if greater than 0.4

In [None]:
true_nms_fail_rna = filter_df(fail_rna, "Norm_Marker_Sum.0.4_label", True) #615 rows

##### Makes dfs to csvs

In [None]:
#shiny.to_csv(csv_path + "set_rna_amp.csv")
#true_nms_fail_rna.to_csv(csv_path + "rna_amp.csv")