##### Imports 

In [1]:
import pandas as pd
import numpy as np
import csv
import logging
import os
from functions.temp_funcs import read_file, merge_dataframes, drop_cols, drop_nans, filter_date_range, \
filter_df, create_container_col, create_cond_df

import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context = "notebook", style = "white", font="verdana") # font_scale = 1.35)

##### Logging Set-up

In [2]:
logging.basicConfig(level=logging.INFO)
LOGGER = logging.getLogger(__name__)

##### Pandas Display Settings

In [3]:
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Lists/Paths

In [4]:
path = "C:/Users/kumar/Documents/Github/analysis_projects/analysis_scripts/"
csv_path = os.path.join(path, "csv/")
excel_path = os.path.join(path, "excel/")
plot_path = os.path.join(path, "plot/")

In [5]:
path_jem = os.path.join(csv_path, "jem_metadata_wFAILURE.csv")
path_ephys = os.path.join(csv_path, "ephys_mIVSCC_MET.csv")
path_shiny = os.path.join(csv_path, "Mouse_VISp_ctx_shiny.csv")

In [6]:
fields_jem = ["date", "organism_name", "container", "rigOperator", "status", "roi"]
fields_ephys = ["name", "vrest", "ri", "sag", "tau",
                "upstroke_downstroke_ratio_long_square", "latency", "f_i_curve_slope"]
fields_shiny = ["patch.date", "cell_name", "sample_id", "cell_specimen_project",
                "subclass_label", "broad_class_label",
                "marker_sum_norm_label", "Norm_Marker_Sum.0.4_label"]

In [7]:
drop_nans_list = ["date"]
drop_cols_list = ["sample_id", "patch.date", "status", "name", "cell_specimen_project", "organism_name"]

##### Main

In [8]:
def main():
    global jem, shiny, ephys, merge_sj, merge_all, merge_all_glu, merge_all_gab, rsp_glu, visp_glu, rsp_visp_glu
    jem = read_file(path_jem, fields_jem) #20843 rows
    ephys = read_file(path_ephys, fields_ephys) #8541 rows
    shiny = read_file(path_shiny, fields_shiny) #10674 rows

    jem = filter_df(jem, "status", "SUCCESS") #13325 rows
    jem = filter_date_range(jem, "date") #6335 rows
    
    shiny = filter_df(shiny, "Norm_Marker_Sum.0.4_label", True) #9101 rows
    shiny = filter_date_range(shiny, "patch.date") #2849 rows

    merge_sj = merge_dataframes(shiny, jem, "sample_id", "container", "inner") #2850 rows (even if how=left)
    merge_all = merge_dataframes(merge_sj, ephys, "cell_name", "name", "inner") #2635 rows

    merge_all = drop_nans(merge_all, drop_nans_list)
    merge_all = drop_cols(merge_all, drop_cols_list)
    merge_all = create_container_col(merge_all, "rigOperator")
    merge_all.set_index("date", inplace=True)
    
    merge_all["region"] = pd.np.where(merge_all.roi.str.contains("RSP"), "RSP", 
                                      pd.np.where(merge_all.roi.str.contains("VISp"), "VISp", 
                                      pd.np.where(merge_all.roi.str.contains("TEa"), "TEa",
                                      pd.np.where(merge_all.roi.str.contains("CTXsp"), "CLA",
                                      pd.np.where(merge_all.roi.str.contains("MO"), "MO",
                                      pd.np.where(merge_all.roi.str.contains("ORB"), "ORB",
                                      pd.np.where(merge_all.roi.str.contains("SSp"), "SSp",
                                      pd.np.where(merge_all.roi.str.contains("HY"), "HY", "Unknown"))))))))
    
    merge_all_glu = create_cond_df(merge_all, "broad_class_label", ["Glutamatergic"])
    merge_all_gab = create_cond_df(merge_all, "broad_class_label", ["GABAergic"])

    rsp_glu = create_cond_df(merge_all_glu, "region", ["RSP"])
    visp_glu = create_cond_df(merge_all_glu, "region", ["VISp"])
    rsp_visp_glu = pd.concat([rsp_glu, visp_glu])

In [9]:
if __name__ == "__main__":
    main()

INFO:functions.temp_funcs:Read file in as a pandas dataframe
INFO:functions.temp_funcs:Read file in as a pandas dataframe
INFO:functions.temp_funcs:Read file in as a pandas dataframe
INFO:functions.temp_funcs:Filtered dataframe based on status == SUCCESS
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values([date_col], inplace=True)
INFO:functions.temp_funcs:Filtered dataframe: 2019-01-01 - 2020-12-31
INFO:functions.temp_funcs:Filtered dataframe based on Norm_Marker_Sum.0.4_label == True
INFO:functions.temp_funcs:Filtered dataframe: 2019-01-01 - 2020-12-31
INFO:functions.temp_funcs:Merged two pandas dataframe into one dataframe
INFO:functions.temp_funcs:Merged two pandas dataframe into one dataframe
INFO:functions.temp_funcs:Dropped NaNs from these columns: ['date']
INFO:functions.temp_funcs:Dropped columns: ['sample_id'

##### HeatMap: Filtering dataframe for outliers

In [None]:
df['derivedPremium'] = (df.groupby(['policyNo.'])['writtenPremium']
                          .diff()
                          .fillna(df['writtenPremium']))
print (df)

In [12]:
rsp_visp_glu.reset_index(inplace=True)
rv_df = rsp_visp_glu[["subclass_label", "region", "vrest"]]

In [14]:
rv_df = rv_df.groupby(["subclass_label", "region"]).mean().reset_index()

In [18]:
rv_df["vrest_mean"] = (rv_df.groupby(["region"])["vrest"]
                          .diff()
                          .fillna(rv_df["vrest"]))

In [19]:
rv_df

Unnamed: 0,subclass_label,region,vrest,vrest_mean
0,DG,VISp,-71.160976,-71.160976
1,L2 IT ENTm,VISp,-68.620692,2.540284
2,L2 IT RSP-ACA,RSP,-76.811338,-76.811338
3,L2 IT RSP-ACA,VISp,-76.773133,-8.152441
4,L2/3 IT CTX,RSP,-79.119185,-2.307847
5,L2/3 IT CTX,VISp,-77.377667,-0.604534
6,L2/3 IT PPP,VISp,-75.377449,2.000218
7,L3 RSP-ACA,RSP,-65.180289,13.938895
8,L3 RSP-ACA,VISp,-66.775607,8.601843
9,L4/5 IT CTX,RSP,-59.979497,5.200793


In [None]:
rsp_glu = rsp_glu[rsp_glu["f_i_curve_slope"] < 0.7]
rsp_glu = rsp_glu[rsp_glu["ri"] < 600]
rsp_glu = rsp_glu[rsp_glu["upstroke_downstroke_ratio_long_square"] < 7]
rsp_glu = rsp_glu[rsp_glu["latency"] < 0.15]
rsp_glu = rsp_glu[rsp_glu["sag"] < 0.40]
rsp_glu = rsp_glu[rsp_glu["vrest"] < -50]
rsp_glu = rsp_glu[rsp_glu["tau"] < 40]

In [None]:
visp_glu = visp_glu[visp_glu["f_i_curve_slope"] < 0.7]
visp_glu = visp_glu[visp_glu["ri"] < 600]
visp_glu = visp_glu[visp_glu["upstroke_downstroke_ratio_long_square"] < 7]
visp_glu = visp_glu[visp_glu["latency"] < 0.15]
visp_glu = visp_glu[visp_glu["sag"] < 0.40]
visp_glu = visp_glu[visp_glu["vrest"] < -50]
visp_glu = visp_glu[visp_glu["tau"] < 40]

In [None]:
rsp_glu.reset_index(inplace=True)
visp_glu.reset_index(inplace=True)
rsp_visp_glu.reset_index(inplace=True)
rsp_glu.sort_values(["subclass_label"], inplace=True)
visp_glu.sort_values(["subclass_label"], inplace=True)
rsp_visp_glu.sort_values(["subclass_label"], inplace=True)

In [None]:
sub5_list = ["L2 IT RSP-ACA", "L5 NP CTX", "L5 ET CTX"]

In [None]:
rv_df = rsp_visp_glu[["subclass_label", "region", "vrest", "ri", "sag", "tau",
                "upstroke_downstroke_ratio_long_square", "latency", "f_i_curve_slope"]]

In [None]:
rv_df

In [None]:
rv_heatmap_df = create_cond_df(rv_df, "subclass_label", sub5_list)

In [None]:
rv_heatmap_df

##### Heatmap: Mean calculation

In [None]:
r_df = rsp_glu[["subclass_label", "region", "vrest", "ri", "sag", "tau",
                "upstroke_downstroke_ratio_long_square", "latency", "f_i_curve_slope"]]
r_df = r_df.groupby(["subclass_label", "region"]).mean().reset_index()

v_df = visp_glu[["subclass_label", "region", "vrest", "ri", "sag", "tau",
                "upstroke_downstroke_ratio_long_square", "latency", "f_i_curve_slope"]]
v_df = v_df.groupby(["subclass_label", "region"]).mean().reset_index()

In [None]:
heatmap_df = pd.merge(left=r_df, right=v_df, on="subclass_label")

In [None]:
heatmap_df = create_cond_df(heatmap_df, "subclass_label", sub5_list)

##### Heatmap: ((RSP - VISP) / VISP) * 100 to get %change from VISp

In [None]:
heatmap_df["vrest"] = ((heatmap_df["vrest_x"] - heatmap_df["vrest_y"]) / heatmap_df["vrest_y"].abs()) * 100 #new - old/old  = r - v/v
heatmap_df["ri"] = ((heatmap_df["ri_x"] - heatmap_df["ri_y"]) / heatmap_df["ri_y"]) * 100
heatmap_df["sag"] = ((heatmap_df["sag_x"] - heatmap_df["sag_y"]) / heatmap_df["sag_y"]) * 100
heatmap_df["tau"] = ((heatmap_df["tau_x"] - heatmap_df["tau_y"]) / heatmap_df["tau_y"]) * 100
heatmap_df["upstroke_downstroke_ratio_long_square"] = ((heatmap_df["upstroke_downstroke_ratio_long_square_x"] - heatmap_df["upstroke_downstroke_ratio_long_square_y"]) / heatmap_df["upstroke_downstroke_ratio_long_square_y"]) * 100
heatmap_df["latency"] = ((heatmap_df["latency_x"] - heatmap_df["latency_y"]) / heatmap_df["latency_y"]) * 100
heatmap_df["f_i_curve_slope"] = ((heatmap_df["f_i_curve_slope_x"] - heatmap_df["f_i_curve_slope_y"]) / heatmap_df["f_i_curve_slope_y"]) * 100
heatmap_df

In [None]:
heatmap_df.drop(columns=["vrest_x", "vrest_y",
                    "ri_x", "ri_y", 
                    "sag_x", "sag_y", 
                    "tau_x", "tau_y",
                    "upstroke_downstroke_ratio_long_square_x", "upstroke_downstroke_ratio_long_square_y",
                    "latency_x", "latency_y",
                    "f_i_curve_slope_x", "f_i_curve_slope_y",
                    "region_x", "region_y"], inplace=True)

heatmap_df.rename(columns={"vrest": "Vm(mV)", 
                      "ri": "Input Resistance(MΩ)",
                      "sag" : "Sag Fraction",
                      "tau": "τ(s)",
                      "upstroke_downstroke_ratio_long_square": "LS AP up/downstoke",
                      "latency" : "Latency",
                      "f_i_curve_slope" : "F-I Curve"}, inplace=True)

heatmap_df = heatmap_df.melt(id_vars=["subclass_label"],
                             value_vars=["Vm(mV)", "Input Resistance(MΩ)", "Sag Fraction", "τ(s)",
                                         "LS AP up/downstoke", "Latency", "F-I Curve"],
                             var_name="ephys_features", value_name="%change from VISp")

heatmap_df = pd.pivot_table(heatmap_df, values='%change from VISp', index=["ephys_features"], columns="subclass_label")
heatmap_df

##### Heatmap: Plot

In [None]:
fig, ax = plt.subplots(figsize=(12,12))
sns.heatmap(data=heatmap_df,
            vmin=-55, 
            vmax=55,
            cmap="coolwarm",
            center=0,
            #robust=True,
            #annot=True,
            fmt="0.2g",
            linewidths=0.5,
            square=True,
            cbar=True,
            cbar_kws={"shrink": 1}) #"label": "RSP %change from VISp", 

plt.title("Heatmap: %shift of RSP with respect to VISp")
plt.xlabel("Subclass")
plt.ylabel("Electrophysiological Features")

#Seaborn heatmap cutoffs, so to fix it use the code below
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)

cbar = ax.collections[0].colorbar
cbar.set_ticks([-55, -40, -20, 0, 20, 40, 55])
cbar.set_ticklabels(["RSP Negative Shift from VISp",
                     "-40%",
                     "-20%", 
                     "Neutral Shift", 
                     "20%", 
                     "40%", 
                     "RSP Positive Shift from VISp"])

plt.savefig(plot_path + "heatmap.jpeg", bbox_inches="tight")