In [None]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import glob
import os
from ripple_heterogeneity.utils import functions
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ranksums
from statsmodels.stats.proportion import proportions_ztest
from statannotations.Annotator import Annotator

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
fig_save_path = r'Z:\home\ryanh\projects\ripple_heterogeneity\figures\panels' #change to v7
functions.set_plotting_defaults()

## find data files

In [None]:

files = glob.glob(r"Z:\home\Heath\Viral_tracing\CTB\CALB1\calb1_analysis\**/*.csv",recursive=True)

dirnames = [os.path.dirname(file) for file in files]
dirnames = np.unique(dirnames)

## load data

In [None]:
df = pd.DataFrame()
for dirname in dirnames:
    layer_id = pd.read_csv(glob.glob(os.path.join(dirname,"*layerID.csv"))[0])

    red_cells_file = glob.glob(os.path.join(dirname,"*redcells.csv"))
    green_cells_file = glob.glob(os.path.join(dirname,"*greencells.csv"))
    colabel_cells_file = glob.glob(os.path.join(dirname,"*colabel.csv"))



    if len(red_cells_file) > 0:
        df_temp = pd.read_csv(red_cells_file[0])
        df_temp["color"] = "red"
        df_temp["folder_name"] = dirname
        df = pd.concat([df,df_temp],ignore_index=True)

    if len(green_cells_file) > 0:
        df_temp = pd.read_csv(green_cells_file[0])
        df_temp["color"] = "green"
        df_temp["folder_name"] = dirname
        df = pd.concat([df,df_temp],ignore_index=True)

    if len(colabel_cells_file) > 0:
        df_temp = pd.read_csv(colabel_cells_file[0])
        df_temp["color"] = "co_label"
        df_temp["folder_name"] = dirname
        df = pd.concat([df,df_temp],ignore_index=True)



    df.loc[df.folder_name == dirname,"layer_rad_y"] = layer_id.Y.iloc[0]
    df.loc[df.folder_name == dirname,"layer_or_y"] = layer_id.Y.iloc[1]

df["normalized_dist"] = np.abs(df.Y - df.layer_rad_y) / abs(df.layer_rad_y - df.layer_or_y)
df
#df.to_csv(r"C:\Users\Cornell\Downloads\df.csv",index=False)

In [None]:
df_ind = pd.DataFrame()
for dirname in dirnames:
    layer_id = pd.read_csv(glob.glob(os.path.join(dirname,"*layerID.csv"))[0])


    redalone_cells_file = glob.glob(os.path.join(dirname,"*redalone.csv"))
    greenalone_cells_file = glob.glob(os.path.join(dirname,"*greenalone.csv"))

    if len(redalone_cells_file) > 0:
        df_temp = pd.read_csv(redalone_cells_file[0])
        df_temp["color"] = "redalone"
        df_temp["folder_name"] = dirname
        df_ind = pd.concat([df_ind,df_temp],ignore_index=True)
    if len(greenalone_cells_file) > 0:
        df_temp = pd.read_csv(greenalone_cells_file[0])
        df_temp["color"] = "greenalone"
        df_temp["folder_name"] = dirname
        df_ind = pd.concat([df_ind,df_temp],ignore_index=True)


    df_ind.loc[df_ind.folder_name == dirname,"layer_rad_y"] = layer_id.Y.iloc[0]
    df_ind.loc[df_ind.folder_name == dirname,"layer_or_y"] = layer_id.Y.iloc[1]

df_ind["normalized_dist"] = np.abs(df_ind.Y - df_ind.layer_rad_y) / abs(df_ind.layer_rad_y - df_ind.layer_or_y)
df_ind

In [None]:
red_green_ranksums=ranksums(df.query("color=='Red-Calb'").normalized_dist,df.query("color=='Green-Calb'").normalized_dist)
red_calb_ranksums=ranksums(df.query("color=='Red-Calb'").normalized_dist,df.query("color=='Calb'").normalized_dist)
green_calb_ranksums=ranksums(df.query("color=='Green-Calb'").normalized_dist,df.query("color=='Calb'").normalized_dist)



print(red_green_ranksums)
print(red_calb_ranksums)
print(green_calb_ranksums)


## plot smoothed distribution of distances per projection type

In [None]:
h,w = functions.set_size("thesis", fraction=.5, subplots=(1, 1))

group_colors = {"Red-Calb": "#f9021a", "Green-Calb": "#18ff5e","Calb":"#00FFFF"}





sns.set_palette(sns.color_palette(group_colors.values()))


sns.displot(df, y="normalized_dist", hue="color", hue_order=group_colors, kind="kde", fill=False,common_norm=False,height=h,aspect=w/h,alpha=1)






plt.axhline(0,color="k",linestyle="--")
plt.axhline(1,color="k",linestyle="--")
plt.xlabel("Density of labelled CA1 cells")
plt.ylabel("Normalized Depth")

plt.savefig(os.path.join(fig_save_path,'Normalized_depth_labelled_CA1_cells_Calb.svg'),dpi=300,bbox_inches='tight')


In [None]:
from scipy.stats import ttest_ind

red_green_ind_ttest=ttest_ind(df.query("color=='Red-Calb'").normalized_dist,df.query("color=='Green-Calb'").normalized_dist)
red_calb_ind_ttest=ttest_ind(df.query("color=='Red-Calb'").normalized_dist,df.query("color=='Calb'").normalized_dist)
green_calb_ind_ttest=ttest_ind(df.query("color=='Green-Calb'").normalized_dist,df.query("color=='Calb'").normalized_dist)


print(red_green_ind_ttest)
print(red_calb_ind_ttest)
print(green_calb_ind_ttest)

##Z test between red, green and calb distances


In [None]:



red_green_zscore=zscore(df.query("color=='Red-Calb'").normalized_dist,df.query("color=='Green-Calb'").normalized_dist)



red_calb_zscore=zscore(df.query("color=='Red-Calb'").normalized_dist,df.query("color=='Calb'").normalized_dist)



green_calb_zscore=zscore(df.query("color=='Green-Calb'").normalized_dist,df.query("color=='Calb'").normalized_dist)

print(red_green_zscore)
print(red_calb_zscore)
print(green_calb_zscore)


## get proportions of cells based on calb

In [None]:
prop_df = df.groupby("color").apply(lambda x: pd.Series({"n_cells":len(x)})).reset_index()
prop_df["prop"] = prop_df.n_cells / prop_df.n_cells.sum()
prop_df["constant"] = 1



data = [{'color': 'Green-Calb', 'percentage': 6/435}, {'color': 'Red-Calb', 'percentage': 251/435}]
prop_calb_df = pd.DataFrame(data)
prop_calb_df["constant"] = 1


In [None]:
plt.figure(figsize = functions.set_size("thesis", fraction=.3, subplots=(2, 1)))
group_colors_percentage = {"Red-Calb": "#f9021a", "Green-Calb": "#18ff5e"}

sns.barplot(data=prop_calb_df,x="constant", hue="color", y="percentage",hue_order=group_colors_percentage,edgecolor="k",saturation=1)
sns.despine(bottom=True)
plt.ylabel("Proportion of Calb1 Cells")
plt.xlabel("")
# turn x axis labels off
plt.xticks([])
plt.savefig(os.path.join(fig_save_path,'prop_of_labelled_cells_calb.svg'),dpi=300,bbox_inches='tight')
# plt.ylim(0,.75)


## make distribution plot for each animal