##### Imports 

In [1]:
import pandas as pd
import csv
import logging
from functions_py.mephys_funcs import read_file, merge_dataframes, filter_date, drop_cols, drop_nans, \
create_cond_df, create_container_df

import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context = "notebook", style = "ticks", font="verdana") # font_scale = 1.35)

##### Pandas Display Settings

In [2]:
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Lists/Paths

In [3]:
fields_jem = ["date", "organism_name", "name", "container", "rigOperator",
              "status", "roi_major", "roi_minor",
              "extraction.postPatch", "extraction.endPipetteR"]
fields_ephys = ["b'name'", "b'patched_cell_container'", "b'vrest'", "b'ri'", "b'sag'", 
                "b'tau'", "b'upstroke_downstroke_ratio_long_square'", 
                "b'latency'", "b'f_i_curve_slope'"]
fields_shiny = ["cell_name_label", "sample_id",
                "subclass_label", "topLeaf_label", "broad_class_label",
                "Tree_call_label", "cell_specimen_project_label", 
                "marker_sum_norm_label", "Norm_Marker_Sum.0.4_label", "Tree_call_label"]


path_jem = "Z:/Patch-Seq/compiled-jem-data/jem_metadata.csv"
#path_jem = "C:/Users/ramr/Documents/Github/analysis_projects/csv/jem_metadata.csv"
path_ephys = "C:/Users/ramr/Documents/Github/analysis_projects/csv/mephys_features.csv"
#path_shiny = "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/star/mouse_patchseq_VISp_current/mapping.df.with.bp.40.lastmap.csv"
path_shiny = "C:/Users/ramr/Documents/Github/analysis_projects/csv/shiny.csv"

drop_cols_list = ["b'name'", "b'patched_cell_container'", "cell_name_label", "sample_id"]
drop_nans_list = ["date", "name", "rigOperator"]

r_users = ["kristenh", "lindsayn", "ramr", "katherineb", "jessicat"]

In [4]:
shiny = shiny = read_file(path_shiny, fields_shiny)

FileNotFoundError: File b'C:/Users/ramr/Documents/Github/analysis_projects/csv/shiny.csv' does not exist

In [None]:
shiny

##### Main

In [None]:
def main():
    global merge_all, merge_all_glu, merge_all_gab, jem, shiny
    jem = read_file(path_jem, fields_jem)
    ephys = read_file(path_ephys, fields_ephys)
    shiny = read_file(path_shiny, fields_shiny)
    merge_je = merge_dataframes(jem, ephys, "name", "b'name'")
    merge_all = merge_dataframes(merge_je, shiny, "name", "cell_name_label")
    merge_all = filter_date(merge_all, "date")
    merge_all = drop_cols(merge_all, drop_cols_list)
    merge_all = drop_nans(merge_all, drop_nans_list)
    merge_all = create_container_df(merge_all, "container")
    merge_all = create_cond_df(merge_all, "rigOperator", r_users)
    merge_all_glu = create_cond_df(merge_all, "broad_class_label", ["Glutamatergic"])
    merge_all_gab = create_cond_df(merge_all, "broad_class_label", ["GABAergic"])

In [None]:
if __name__ == "__main__":
    main()

In [None]:
merge_all

##### Mouse Region Dataframes

In [None]:
rsp = create_cond_df(merge_all, "roi_major", ["RSPd", "RSPv"])
ssp = create_cond_df(merge_all, "roi_major", ["SSp"])
orb = create_cond_df(merge_all, "roi_major", ["ORB"])
ctxsp = create_cond_df(merge_all, "roi_major", ["CTXsp"])
mo = create_cond_df(merge_all, "roi_major", ["MOp", "MOs"])
visp = create_cond_df(merge_all, "roi_major", ["VISp"])

##### Gluatamatergic Mouse Region Dataframes

In [None]:
rsp_glu = create_cond_df(merge_all_glu, "roi_major", ["RSPd", "RSPv"])
ssp_glu = create_cond_df(merge_all_glu, "roi_major", ["SSp"])
orb_glu = create_cond_df(merge_all_glu, "roi_major", ["ORB"])
ctxsp_glu = create_cond_df(merge_all_glu, "roi_major", ["CTXsp"])
mo_glu = create_cond_df(merge_all_glu, "roi_major", ["MOp", "MOs"])
visp_glu = create_cond_df(merge_all_glu, "roi_major", ["VISp"])

rsp_glu.replace({"roi_major": {"RSPd": "RSP", "RSPv": "RSP"}}, inplace=True)
mo_glu.replace({"roi_major": {"MOp": "MO", "MOs": "MO"}}, inplace=True)

##### GABAergic Mouse Region Dataframes

In [None]:
rsp_gab = create_cond_df(merge_all_gab, "roi_major", ["RSPd", "RSPv"])
ssp_gab = create_cond_df(merge_all_gab, "roi_major", ["SSp"])
orb_gab = create_cond_df(merge_all_gab, "roi_major", ["ORB"])
ctxsp_gab = create_cond_df(merge_all_gab, "roi_major", ["CTXsp"])
mo_gab = create_cond_df(merge_all_gab, "roi_major", ["MOp", "MOs"])
visp_gab = create_cond_df(merge_all_gab, "roi_major", ["VISp"])

rsp_gab.replace({"roi_major": {"RSPd": "RSP", "RSPv": "RSP"}}, inplace=True)
mo_gab.replace({"roi_major": {"MOp": "MO", "MOs": "MO"}}, inplace=True)

##### Count Dictionary

In [None]:
region_dict = {"Overall Mouse Total count": merge_all.name.count(),
               "RSP Total": rsp.name.count(),
               "SSp Total": ssp.name.count(),
               "ORB Total": orb.name.count(),
               "CTXsp Total": ctxsp.name.count(),
               "MO Total": mo.name.count(),
               "VISp Total": visp.name.count()}

glu_dict = {"Glutamatergic Mouse Total count": merge_all_glu.name.count(),
            "RSP": rsp_glu.name.count(),
            "SSp": ssp_glu.name.count(),
            "ORB": orb_glu.name.count(),
            "CTXsp": ctxsp_glu.name.count(),
            "MO": mo_glu.name.count(),
            "VISp": visp_glu.name.count()}

gab_dict = {"GABAergic Mouse Total count": merge_all_gab.name.count(),
            "RSP": rsp_gab.name.count(),
            "SSp": ssp_gab.name.count(),
            "ORB": orb_gab.name.count(),
            "CTXsp": ctxsp_gab.name.count(),
            "MO": mo_gab.name.count(),
            "VISp": visp_gab.name.count()}

In [None]:
region_dict
for k,v in region_dict.items():
    print(f"{k} : {v}")

In [None]:
glu_dict
for k,v in glu_dict.items():
    print(f"{k} : {v}")

In [None]:
gab_dict
for k,v in gab_dict.items():
    print(f"{k} : {v}")

##### Paths

In [None]:
csv_path = "C:/Users/ramr/Documents/Github/analysis_projects/csv/"
excel_path = "C:/Users/ramr/Documents/Github/analysis_projects/excel/"
plot_path = "C:/Users/ramr/Documents/Github/analysis_projects/plot/"

##### Writing to csv_file

In [None]:
#jem.to_csv(csv_path + "jem.csv")
#jem_2020.to_csv(csv_path + "jem_2020.csv")

##### Writing to excel_file

In [None]:
writer = pd.ExcelWriter(excel_path + "new_region.xlsx")
merge_all.to_excel(writer, "All", freeze_panes=(1,0))
rsp.to_excel(writer, "RSP", freeze_panes=(1,0))
ssp.to_excel(writer, "SSP", freeze_panes=(1,0))
orb.to_excel(writer, "ORB", freeze_panes=(1,0))
ctxsp.to_excel(writer, "CTXsp", freeze_panes=(1,0))
mo.to_excel(writer, "MO", freeze_panes=(1,0))
visp.to_excel(writer, "VISp", freeze_panes=(1,0))
writer.save()

##### RSP Plots

In [None]:
rsp_visp_glu = pd.concat([rsp_glu, visp_glu])

In [None]:
ssp_glu.min()

In [None]:
def box_plt(col_y, jpg_title, plt_title, ylim_min, ylim_max, df):
    """Creates a box plot using seaborn and compares the ephys features between
    two mouse regions while displaying the t-types.
    Args:
        col_y(string): y-axis column
        jpg_title(string): title of the jpeg file
        plt_tile(string): title of the plot
        df: a pandas dataframe
    Returns: None
    """
    b_colors={"RSP": "#a1d99b", "VISp": "#9ecae1"}
    s_colors={"RSP": "#41ab5d", "VISp": "#4292c6"}

    fig_dims = (12, 8)
    f, axes = plt.subplots(figsize=fig_dims)

    ax = sns.boxplot(x="subclass_label",
                     y=col_y,
                     hue="roi_major",
                     data=df,
                     #order = ["L2/3 IT", "L4", "L5 IT", "L5 PT", "L6 IT", "L6b", "NP"],
                     #hue_order = ["RSP", "VISp"],
                     #palette=b_colors,
                     saturation=1,
                     width=0.8,
                     linewidth=1.3,
                     showfliers=False)

    ax = sns.stripplot(x="subclass_label",
                       y=col_y,
                       hue="roi_major",
                       data=df,
                       #order = ["L2/3 IT", "L4", "L5 IT", "L5 PT", "L6 IT", "L6b", "NP"],
                       #hue_order = ["RSP", "VISp"],
                       jitter=False,
                       size=4.5,
                       #palette=s_colors, #color="0.2",
                       split=True,
                       linewidth=0.5)
    
    #sns.despine(trim=True)
    handles, labels = ax.get_legend_handles_labels()
    l = plt.legend(handles[0:2], labels[0:2], title="Region")
    
    plt.xticks(rotation=90)
    
    #ax.legend_.remove()
    ax.set(xlabel="Subclass", ylabel=plt_title)
    ax.set(ylim=(ylim_min, ylim_max))
    plt.savefig(plot_path + "box_plt_" + jpg_title + ".jpeg", bbox_inches="tight")

In [None]:
box_plt("b'vrest'", "rmp", "Resting Membrane Potential (mV)", -90, -48, rsp_glu)
box_plt("b'vrest'", "rmp", "Resting Membrane Potential (mV)", -90, -48, ssp_glu)
box_plt("b'vrest'", "rmp", "Resting Membrane Potential (mV)", -90, -48, orb_glu)
box_plt("b'vrest'", "rmp", "Resting Membrane Potential (mV)", -90, -48, ctxsp_glu)
box_plt("b'vrest'", "rmp", "Resting Membrane Potential (mV)", -90, -48, mo_glu)

In [None]:
box_plt("b'vrest'", "rmp", "Resting Membrane Potential (mV)", -90, -48,
       rsp_visp_glu[rsp_visp_glu["b'vrest'"] < -50])
box_plt("b'ri'", "ri", "Input Resistance (MOhms)", 0, 500,
        rsp_visp_glu[rsp_visp_glu["b'ri'"] < 450])
box_plt("b'sag'", "sag", "Sag", 0, 0.35,
        rsp_visp_glu[rsp_visp_glu["b'sag'"] < 0.35])
box_plt("b'upstroke_downstroke_ratio_long_square'", "upd", "Upstroke/Downstroke Ratio", 0, 6.5,
        rsp_visp_glu[rsp_visp_glu["b'upstroke_downstroke_ratio_long_square'"] < 7])
box_plt("b'f_i_curve_slope'", "fi", "F-I Curve Slope", 0, 0.65,
       rsp_visp_glu[rsp_visp_glu["b'f_i_curve_slope'"] < 0.6])
box_plt("b'tau'", "tau", "Time Constant", 0, 45,
        rsp_visp_glu[rsp_visp_glu["b'tau'"] < 45]) 
box_plt("b'latency'", "lat", "Latency", 0, 0.15,
        rsp_visp_glu[rsp_visp_glu["b'latency'"] < 0.15]) 

##### All Plots

In [None]:
fig_dims = (20, 10)
f, axes = plt.subplots(1, 3, figsize=fig_dims)

sns.boxplot(x="subclass_label",
              y="b'vrest'",
              data=merge_all_glu, palette="Paired", ax=axes[0])
sns.swarmplot(x="subclass_label",
              y="b'vrest'",
              data=merge_all_glu, color="0.2", ax=axes[0])

sns.boxplot(x="subclass_label",
              y="b'sag'",
              data=merge_all_glu, palette="Paired", ax=axes[1])
sns.swarmplot(x="subclass_label",
              y="b'sag'",
              data=merge_all_glu, color="0.2", ax=axes[1])

sns.boxplot(x="subclass_label",
              y="b'tau'",
              data=merge_all_glu, palette="Paired", ax=axes[2])
sns.swarmplot(x="subclass_label",
              y="b'tau'",
              data=merge_all_glu, color="0.2", ax=axes[2])
sns.despine(trim=True, left=False)

###### Misc


def date_return(df):
    df = df.reset_index()
    start_date = df["date"].iloc[0][0:10]
    end_date = df["date"].iloc[-1][0:10]
    df = df.set_index(["date"])
    logger.info("Created date")
    return df, start_date, end_date



def print_counts():
    """Prints counts of dataframes"""
    #print(f"Date Range: {start_date} to {end_date}")
    print(f"Overall Total count: {jem_2020.container.count()}")
    print()
    print(f"Mouse Total count: {mjem_2020.organism_name.count()}")
    print(f"-RSPd count: {rspd.organism_name.count()}")
    print(f"-RSPv count: {rspv.organism_name.count()}")
    print(f"-SSp count: {ssp.organism_name.count()}")
    print(f"-ORB count: {orb.organism_name.count()}")
    print(f"-CTXsp count: {ctxsp.organism_name.count()}")
    print(f"-MOp count: {mop.organism_name.count()}")
    print(f"-MOs count: {mos.organism_name.count()}")
    print(f"-HY count: {hy.organism_name.count()}")
    print(f"-HIP count: {hip.organism_name.count()}")
    print(f"-VISP count: {visp.organism_name.count()}")
    print()
    print(f"Human Total count: {hjem_2020.organism_name.count()}")
    print(f"-Human Acute count: {hajem_2020.organism_name.count()}")
    print(f"-Human Culture count: {hcjem_2020.organism_name.count()}")

def main():
    jem_2020 = read_jem()
    logger.info("Imported jem_metadata.csv as a dataframe")
    jem_2020 = choice()
    
    jem_2020["p_container"] = jem_2020.container.str[0:4]
    logger.info("Creating column to check p_container")
    jem_2020.p_container.unique()
    
    #jem_2020 = date_return(jem_2020)
    #print(start_date)

    mjem_2020 = create_cond_df(jem_2020, "organism_name", "Mouse")
    hjem_2020 = create_cond_df(jem_2020, "organism_name", "Human")
    hajem_2020 = hjem_2020[hjem_2020["p_container"] != "PCS4"]
    hcjem_2020 = create_cond_df(hjem_2020, "p_container", "PCS4")

    mjem_2020 = create_region_col(mjem_2020)
    rspd = create_cond_df(mjem_2020, "roi_major", "RSPd")
    rspv = create_cond_df(mjem_2020, "roi_major", "RSPv")
    ssp = create_cond_df(mjem_2020, "roi_major", "SSp")
    orb = create_cond_df(mjem_2020, "roi_major", "ORB")
    ctxsp = create_cond_df(mjem_2020, "roi_major", "CTXsp")
    mop = create_cond_df(mjem_2020, "roi_major", "MOp")
    mos = create_cond_df(mjem_2020, "roi_major", "MOs")
    hy = create_cond_df(mjem_2020, "roi_major", "HY")
    hip = create_cond_df(mjem_2020, "roi_major", "HIP")
    visp = create_cond_df(mjem_2020, "roi_major", "VISP")
    logger.info("Created region pandas dataframes")
    print_counts()



def create_region_col(df):
    """"""
    df["new_region"] = "default_value"
    LOGGER.info("Created a new column(new_region) with default_value")
    
    c_region = ["RSPd", "RSPv", "SSp"] 
    s_region = ["MOs", "MOp", "ORB", "CTXsp"]
    o_region = ["HY", "HIP"]
    v_region = ["VISp"]
    
    df["new_region"][df.roi_major.str.contains("|".join(c_region))] = "coronal_region"
    df["new_region"][df.roi_major.str.contains("|".join(s_region))] = "sagittal_region"
    df["new_region"][df.roi_major.str.contains("|".join(o_region))] = "other_region"
    df["new_region"][df.roi_major.str.contains("|".join(v_region))] = "v1_region"
    LOGGER.info("Filled in new_region column with region labels")
    return df


def choice():
    """"""
    r_users = ["kristenh", "lindsayn", "ramr", "katherineb", "jessicat"]
    r_user = ["ramr"]
    
    global entry

    print("These are all user options: kristenh, lindsayn, ramr, katherineb, jessicat")
    entry = input("Enter single user name or all users (a):")
    if entry == "a":
        jem_2020 = sort_df(jem, r_users)
    elif entry == "kristenh":
        jem_2020 = sort_df(jem, r_users[0])
    elif entry == "lindsayn":
        jem_2020 = sort_df(jem, r_users[1])
    elif entry == "ramr":
        jem_2020 = sort_df(jem, r_users[2])
    elif entry == "katherineb":
        jem_2020 = sort_df(jem, r_users[3])
    elif entry == "jessicat":
        jem_2020 = sort_df(jem, r_users[4])
    else: 
        print("Please choose between option 'a' or 'r'.")
    LOGGER.info("Sorted jem by date range: 1/03/2020 - present")
    return jem_2020

#if __name__ == "__main__":
    
     """jem = read_jem()
    logger.info("Imported jem_metadata.csv as a dataframe")
    jem_2020 = choice()
    
    jem_2020["p_container"] = jem_2020.container.str[0:4]
    logger.info("Creating column to check p_container")
    jem_2020.p_container.unique()
    
    jem_2020 = jem_2020.reset_index()
    start_date = jem_2020["date"].iloc[0][0:10]
    end_date = jem_2020["date"].iloc[-1][0:10]
    jem_2020 = jem_2020.set_index(["date"])
    
    mjem_2020 = create_cond_df(jem_2020, "organism_name", "Mouse")
    hjem_2020 = create_cond_df(jem_2020, "organism_name", "Human")
    hajem_2020 = hjem_2020[hjem_2020["p_container"] != "PCS4"]
    hcjem_2020 = create_cond_df(hjem_2020, "p_container", "PCS4")

    mjem_2020 = create_region_col(mjem_2020)
    rspd = create_cond_df(mjem_2020, "roi_major", "RSPd")
    rspv = create_cond_df(mjem_2020, "roi_major", "RSPv")
    ssp = create_cond_df(mjem_2020, "roi_major", "SSp")
    orb = create_cond_df(mjem_2020, "roi_major", "ORB")
    ctxsp = create_cond_df(mjem_2020, "roi_major", "CTXsp")
    mop = create_cond_df(mjem_2020, "roi_major", "MOp")
    mos = create_cond_df(mjem_2020, "roi_major", "MOs")
    hy = create_cond_df(mjem_2020, "roi_major", "HY")
    hip = create_cond_df(mjem_2020, "roi_major", "HIP")
    visp = create_cond_df(mjem_2020, "roi_major", "VISP")
    logger.info("Created region pandas dataframes")
    
    print_counts()"""
    

In [None]:
csv_path = "C:/Users/ramr/Documents/Github/analysis_projects/csv/"
jem.to_csv(csv_path + "jem_metadata.csv", index=False)
shiny.to_csv(csv_path + "shiny.csv", index=False)