##### Imports

In [1]:
import pandas as pd
import numpy as np
import csv
import logging
import os

from functions.temp_funcs import read_file, merge_dataframes, drop_cols, drop_nans, filter_date_range, \
filter_df, create_container_col, create_cond_df

import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context = "notebook", style = "white", font="verdana") # font_scale = 1.35)

In [2]:
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Paths

In [3]:
path = "C:/Users/kumar/Documents/Github/analysis_projects/analysis_scripts/"
csv_path = os.path.join(path, "csv/")
excel_path = os.path.join(path, "excel/")
plot_path = os.path.join(path, "plot/")

In [4]:
path_jem = os.path.join(csv_path, "jem_metadata_wFAILURE.csv")
path_ephys = os.path.join(csv_path, "ephys_mIVSCC_MET.csv")
path_shiny = os.path.join(csv_path, "Mouse_VISp_ctx_shiny.csv")

##### Project Goal/Info

In [5]:
"""
As of now a cell must have an RNA-Amp Pass and NMS pass to move on for a reconstruction.
There are a percentage of cells that fail at RNA-Amp yet have NMS pass - these need to evaluated to 
determine quality and numbers. 

Items to sort:
Project code (hIVSCC-MET, hIVSCC-METc, hIVSCC-METx, mIVSCC-MET)

Variables:
NMS pass or fail
NMS score
RNA_amp pass or fail
%>400 bp --- cDNA quality
Concentration pg/ul BA  ----Can't find it?
pg yield BA----amplified content same as cDNA quantity (picogreen yield)
"""

"\nAs of now a cell must have an RNA-Amp Pass and NMS pass to move on for a reconstruction.\nThere are a percentage of cells that fail at RNA-Amp yet have NMS pass - these need to evaluated to \ndetermine quality and numbers. \n\nItems to sort:\nProject code (hIVSCC-MET, hIVSCC-METc, hIVSCC-METx, mIVSCC-MET)\n\nVariables:\nNMS pass or fail\nNMS score\nRNA_amp pass or fail\n%>400 bp --- cDNA quality\nConcentration pg/ul BA  ----Can't find it?\npg yield BA----amplified content same as cDNA quantity (picogreen yield)\n"

In [6]:
"""
shiny columns:

["cell_specimen_project", "Norm_Marker_Sum.0.4_label", "marker_sum_norm_label",
"rna_amplification_pass_fail", "percent_cdna_longer_than_400bp", "NOT SURE", "amplified_quantity_ng"]

"""

'\nshiny columns:\n\n["cell_specimen_project", "Norm_Marker_Sum.0.4_label", "marker_sum_norm_label",\n"rna_amplification_pass_fail", "percent_cdna_longer_than_400bp", "NOT SURE", "amplified_quantity_ng"]\n\n'

##### Start Code

In [7]:
fields_shiny = ["cell_specimen_project", "Norm_Marker_Sum.0.4_label", "marker_sum_norm_label",
                "rna_amplification_pass_fail", "percent_cdna_longer_than_400bp", "amplified_quantity_ng"]
shiny = read_file(path_shiny, fields_shiny)

INFO:functions.temp_funcs:Read file in as a pandas dataframe


In [8]:
shiny["cell_specimen_project"].unique() #10674 rows

array(['T301x', 'mIVSCC-MET', 'mIVSCC-METx', 'T301', 'hIVSCC-METx',
       'mMPATCHseq', 'mMPATCH', 'T301t', 'mMPATCHx', 'hIVSCC-MET', nan],
      dtype=object)

In [9]:
shiny.dropna(subset=["cell_specimen_project"],inplace=True) #10669 rows

##### Filter project code

In [10]:
project_list = ["hIVSCC-MET", "hIVSCC-METc", "hIVSCC-METx", "mIVSCC-MET"]
fill_project_shiny = create_cond_df(shiny, "cell_specimen_project", project_list) #8623 rows

INFO:functions.temp_funcs:Created a conditional dataframe based on cell_specimen_project containing ['hIVSCC-MET', 'hIVSCC-METc', 'hIVSCC-METx', 'mIVSCC-MET']


In [11]:
fill_project_shiny["cell_specimen_project"].unique()

array(['mIVSCC-MET', 'mIVSCC-METx', 'hIVSCC-METx', 'hIVSCC-MET'],
      dtype=object)

##### Filter RNA Amp pass_fail

In [12]:
fill_project_shiny["rna_amplification_pass_fail"].unique()

array(['Fail', 'Pass'], dtype=object)

In [13]:
fill_project_shiny["Norm_Marker_Sum.0.4_label"].unique()

array([ True, False])

In [14]:
fail_rna = filter_df(fill_project_shiny, "rna_amplification_pass_fail", "Fail") #1401 rows

INFO:functions.temp_funcs:Filtered dataframe based on rna_amplification_pass_fail == Fail


##### Filter NMS True False if greater than 0.4

In [15]:
true_nms_fail_rna = filter_df(fail_rna, "Norm_Marker_Sum.0.4_label", True) #615 rows

INFO:functions.temp_funcs:Filtered dataframe based on Norm_Marker_Sum.0.4_label == True


##### Makes dfs to csvs

In [16]:
#shiny.to_csv(csv_path + "set_rna_amp.csv")
#true_nms_fail_rna.to_csv(csv_path + "rna_amp.csv")