##### Imports

In [1]:
import pandas as pd
import numpy as np
import csv
import logging
import os

from functions.temp_funcs import read_file, merge_dataframes, drop_cols, drop_nans, filter_date_range, \
filter_df, create_container_col, create_cond_df, create_container_df

import seaborn as sns
import matplotlib.pyplot as plt

sns.set(context = "notebook", style = "ticks", font="verdana") # font_scale = 1.35)
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

ModuleNotFoundError: No module named 'functions'

##### Paths

In [None]:
path = os.path.dirname(os.getcwd())
csv_path = os.path.join(path, "csv/")
excel_path = os.path.join(path, "excel/")
plot_path = os.path.join(path, "plot/")

path_shiny_mouse = os.path.join(csv_path, "current_shiny_mouse.csv")
path_shiny_human = os.path.join(csv_path, "current_shiny_human.csv")

##### Start Code

In [None]:
fields_shiny = ["cell_name", "cell_specimen_project", "Norm_Marker_Sum.0.4_label",
                "marker_sum_norm_label", "rna_amplification_pass_fail",
                "percent_cdna_longer_than_400bp", "amplified_quantity_ng", "sample_id", 
                "Tree_call", "seurat_prediction_score_label"]

shiny_dict = {"Norm_Marker_Sum.0.4_label" : "NMS Pass/Fail",
              "rna_amplification_pass_fail" : "RNA amp Pass/Fail",
              "marker_sum_norm_label": "NMS score",
              "percent_cdna_longer_than_400bp" : "cDNA quality (%>400 bp)",
              "amplified_quantity_ng" : "Amplified content (ng)"}

In [None]:
shiny_mouse = read_file(path_shiny_mouse, fields_shiny) #10839 rows
shiny_mouse.rename(columns=shiny_dict, inplace=True)
shiny_mouse.dropna(subset=["cell_specimen_project"],inplace=True) #10834 rows

In [None]:
shiny_human = read_file(path_shiny_human, fields_shiny) #3592 rows
shiny_human.rename(columns=shiny_dict, inplace=True)
shiny_human.dropna(subset=["cell_specimen_project"],inplace=True) #3578 rows

##### Filter project code

In [None]:
shinym = filter_df(shiny_mouse, "cell_specimen_project", "mIVSCC-MET") 
shinym = create_container_df(shinym, "sample_id")
shinym.dropna(subset=["cDNA quality (%>400 bp)", "NMS score", "Amplified content (ng)"], inplace=True)
shinym = shinym[shinym["cDNA quality (%>400 bp)"] <=1]
shinym = shinym[shinym["Amplified content (ng)"] <=50]
shinym = filter_df(shinym, "NMS Pass/Fail", True)
#shinym_rpass = filter_df(shinym, "RNA amp Pass/Fail", "Pass")
shinym = filter_df(shinym, "RNA amp Pass/Fail", "Fail")

In [None]:
shinyh = filter_df(shiny_human, "cell_specimen_project", "hIVSCC-MET")
shinyh = create_container_df(shinyh, "sample_id")
shinyh.dropna(subset=["cDNA quality (%>400 bp)", "NMS score", "Amplified content (ng)"], inplace=True)
shinyh = shinyh[shinyh["cDNA quality (%>400 bp)"] <=1]
shinyh = shinyh[shinyh["Amplified content (ng)"] <=50]
shinyh = filter_df(shinyh, "NMS Pass/Fail", True)
shinyh = filter_df(shinyh, "RNA amp Pass/Fail", "Fail")

collaborator_dict = {"PG" : "Gabor", "PH" : "Mansvelder"}
for collab in ["P1", "P2", "P6", "P7", "P8", "P9", "PA", "PB", "PE", "PX"]:
    collaborator_dict[collab] = "AIBS"

shinyh["collaborator_label"] = shinyh["collaborator_label"].replace(collaborator_dict)

##### Merging mouse and human shiny df

In [None]:
shiny = pd.concat([shinym, shinyh], ignore_index=True, sort =False) #8593
shiny = shiny[shiny["cDNA quality (%>400 bp)"] <=1] #1577
shiny = shiny[shiny["Amplified content (ng)"] <=50] #1550

##### Counts

In [None]:
aibs = filter_df(shinyh, "collaborator_label", "AIBS")
pg = filter_df(shinyh, "collaborator_label", "Gabor")
ph = filter_df(shinyh, "collaborator_label", "Mansvelder")
print(f"Mouse Human total count: {shiny.sample_id.count()}")
print(f"Mouse total count: {shinym.sample_id.count()}")
print(f"Human total count: {shinyh.sample_id.count()}")
print(f"AIBS count: {aibs.sample_id.count()}")
print(f"Gabor count: {pg.sample_id.count()}")
print(f"Mansvelder count: {ph.sample_id.count()}")

##### Plots

In [None]:
spec_colors= {"mIVSCC-MET" : "#ef3b2c", "hIVSCC-MET" : "#4292c6"}
#human_colors = {"AIBS":"#c6dbef", "Gabor":"#6baed6", "Mansvelder":"#2171b5"}
human_colors = {"AIBS":"#6baed6", "Gabor":"#2171b5", "Mansvelder":"#08306b"}

In [None]:
ax = sns.pairplot(data=shinym, vars=["cDNA quality (%>400 bp)", "NMS score", "Amplified content (ng)"],
                  height=3, aspect=1, diag_kind="kde", hue="cell_specimen_project", palette=spec_colors, kind="reg")
ax._legend.remove()
ax.axes[0,0].set_xlim((0,1))
ax.axes[0,1].set_xlim((0,1.5))
ax.axes[0,2].set_xlim((0,50))
ax.axes[0,2].set_ylim((0,1))
ax.axes[1,2].set_ylim((0,1.5))
ax.axes[2,2].set_ylim((0,50))

plt.savefig(plot_path + "mouse_pairplot_nms_pass_rna_fail.jpeg", bbox_inches="tight")

In [None]:
ax = sns.pairplot(data=shinyh, vars=["cDNA quality (%>400 bp)", "NMS score", "Amplified content (ng)"],
                  height=3, diag_kind="kde", hue="collaborator_label", palette=human_colors, kind="reg")

ax.axes[0,0].set_xlim((0,1))
ax.axes[0,1].set_xlim((0,1.5))
ax.axes[0,2].set_xlim((0,50))
ax.axes[0,2].set_ylim((0,1))
ax.axes[1,2].set_ylim((0,1.5))
ax.axes[2,2].set_ylim((0,50))

plt.savefig(plot_path + "human_pairplot_nms_pass_rna_fail.jpeg", bbox_inches="tight")

In [None]:
human_colors = {"AIBS":"#41ab5d", "Gabor":"#4292c6", "Mansvelder":"#ef3b2c"}
ax = sns.pairplot(data=shinyh, vars=["cDNA quality (%>400 bp)", "NMS score", "Amplified content (ng)"],
                  height=3, diag_kind="kde", hue="collaborator_label", palette=human_colors, kind="reg")

ax.axes[0,0].set_xlim((0,1))
ax.axes[0,1].set_xlim((0,1.5))
ax.axes[0,2].set_xlim((0,50))
ax.axes[0,2].set_ylim((0,1))
ax.axes[1,2].set_ylim((0,1.5))
ax.axes[2,2].set_ylim((0,50))

plt.savefig(plot_path + "human_pairplot_nms_pass_rna_fail.jpeg", bbox_inches="tight")

In [None]:
ax = sns.pairplot(data=shiny, vars=["cDNA quality (%>400 bp)", "NMS score", "Amplified content (ng)"],
                  height=3, diag_kind="kde", hue="cell_specimen_project", palette=spec_colors, kind="reg")

ax.axes[0,0].set_xlim((0,1))
ax.axes[0,1].set_xlim((0,1.5))
ax.axes[0,2].set_xlim((0,50))
ax.axes[0,2].set_ylim((0,1))
ax.axes[1,2].set_ylim((0,1.5))
ax.axes[2,2].set_ylim((0,50))

plt.savefig(plot_path + "mouse_human_pairplot_nms_pass_rna_fail.jpeg", bbox_inches="tight")

In [None]:
fig_dims = (3, 13)
f, axes = plt.subplots(nrows=3, ncols=1, figsize=fig_dims)

ax = sns.violinplot(x="cell_specimen_project", y="cDNA quality (%>400 bp)", data=shiny[shiny["cDNA quality (%>400 bp)"] <=1], 
                    order=["mIVSCC-MET", "hIVSCC-MET"], inner="box", scale="area",
                    palette=spec_colors, saturation=1, ax=axes[0])
ax1 = sns.violinplot(x="cell_specimen_project", y="NMS score", data=shiny[shiny["NMS score"] <=1.5],
                     order=["mIVSCC-MET", "hIVSCC-MET"], inner="box", scale="area",
                     palette=spec_colors, saturation=1, ax=axes[1])
ax2 = sns.violinplot(x="cell_specimen_project", y="Amplified content (ng)",
                     data=shiny[shiny["Amplified content (ng)"] <=50],
                     order=["mIVSCC-MET", "hIVSCC-MET"], inner="box", scale="area",
                     palette=spec_colors, saturation=1, ax=axes[2])

ax.set(xticklabels=[])
ax1.set(xticklabels=[])

ax.set_xlabel("")
ax1.set_xlabel("")
ax2.set_xlabel("Specimen Project Label")

nobs = shiny["cell_specimen_project"].value_counts().values
nobs = [str(x) for x in nobs.tolist()]
nobs = ["n: " + i for i in nobs]

pos = range(len(nobs))
for tick,label in zip(pos,ax.get_xticklabels()):
    ax.text(pos[tick], 1, nobs[tick], horizontalalignment='center', size="large", color='black', weight='bold')
    
plt.savefig(plot_path + "violinplot_nms_pass_rna_fail.jpeg", bbox_inches="tight")

In [None]:
def single_vio_plt(col_y, lim_y, plot_name):
    fig_dims = (3, 4)
    f, axes = plt.subplots(figsize=fig_dims)

    ax = sns.violinplot(x="cell_specimen_project", y=col_y, data=shiny[shiny[col_y] <= lim_y], 
                        order=["mIVSCC-MET", "hIVSCC-MET"], inner="box", scale="area",
                        palette=spec_colors, saturation=1)
    plt.savefig(plot_path + "violinplot_nms_pass_rna_fail_" + plot_name + ".jpeg", bbox_inches="tight")

In [None]:
single_vio_plt("cDNA quality (%>400 bp)", 1, "cdna")
single_vio_plt("NMS score", 1.5, "nms")
single_vio_plt("Amplified content (ng)", 50, "amp")