In [1]:
# !pip install odfpy
# !pip install seaborn
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import yaml
from yaml import Loader
import numpy as np
import analyze_cells_energy as ace
import utils
from scipy.stats import ttest_ind

In [2]:
root_directory = '/home/stella/Documents/Torino/projects/'
experiment = 'SexualImprinting'
experimental_group = 'C57_MaleUrine_Exposure_cFos'
subjects = ace.list_subjects(root_directory)
data_directory = root_directory + experiment + '/' \
                + experimental_group + '/'

In [3]:
# load query file where we added volumes for each area
# volumes = pd.read_csv("query_complete_with_volumes.csv")
# volumes = ace.clean_volumes_database(volumes)

In [4]:
# load query file where we added volumes for each area
volumes = pd.read_csv("query_volumes_reordered.csv")
volumes = ace.clean_volumes_database(volumes)

In [5]:
volumes

Unnamed: 0,id,atlas_id,name,st_level,parent_structure_id,depth,structure_id_path,safe_name,mean_volume,0,1,2,3,4,5,6,7,8,9,10
0,997,-1.0,root,0,,0,/997/,root,435.287861,997,,,,,,,,,,
1,8,0.0,Basic cell groups and regions,1,997.0,1,/997/8/,Basic cell groups and regions,197.462441,997,8.0,,,,,,,,,
2,567,70.0,Cerebrum,2,8.0,2,/997/8/567/,Cerebrum,120.883542,997,8.0,567.0,,,,,,,,
3,688,85.0,Cerebral cortex,3,567.0,3,/997/8/567/688/,Cerebral cortex,97.403809,997,8.0,567.0,688.0,,,,,,,
4,695,86.0,Cortical plate,4,688.0,4,/997/8/567/688/695/,Cortical plate,93.461964,997,8.0,567.0,688.0,695.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1322,49,713.0,intraparafloccular fissure,8,1040.0,3,/997/1024/1040/49/,intraparafloccular fissure,,997,1024.0,1040.0,49.0,,,,,,,
1323,57,714.0,paramedian sulcus,8,1040.0,3,/997/1024/1040/57/,paramedian sulcus,,997,1024.0,1040.0,57.0,,,,,,,
1324,65,715.0,parafloccular sulcus,8,1040.0,3,/997/1024/1040/65/,parafloccular sulcus,,997,1024.0,1040.0,65.0,,,,,,,
1325,624,926.0,Interpeduncular fossa,7,1024.0,2,/997/1024/624/,Interpeduncular fossa,,997,1024.0,624.0,,,,,,,,


In [6]:
def load_subject(subject):
    df_mouse = pd.read_csv(data_directory + subject + '/cells300.csv')
    df_mouse = ace.reformat_df_mouse(df=df_mouse)
    return df_mouse

# Analysis on C57 : threshold 300

In [None]:
dict_results_across_mice = {subject: ace.calculate_cells_energy_per_level(df_mouse=load_subject(subject), 
                                                                          vol=volumes, 
                                                                          level=8) for subject in subjects}

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


# List areas with null cell count

In [None]:
db_null_count = {subject: dict_results_across_mice[subject][
    dict_results_across_mice[subject].n_cells == 0] for subject in subjects}

In [None]:
# make histogram of these
null_areas = [db_null_count[subjects[0]].area.values]
for subject in subjects[1:]:
    null_areas.append(db_null_count[subject].area.values)
null_areas = np.hstack(null_areas)
fig, ax = plt.subplots(1,1,figsize=(20,3))
pd.Series(null_areas).value_counts(sort=True).plot(kind='bar', axes=ax)


In [None]:
# print areas that are null for all mice
df = pd.DataFrame(pd.Series(null_areas).value_counts(sort=False))
df.columns = ['counts']
df = df[df.counts == 16]
print(df.to_string())

# List areas with no volume

In [None]:
db_null_volume = {subject: dict_results_across_mice[subject][
    dict_results_across_mice[subject]['energy'].isnull()] for subject in subjects}

In [None]:
# make histogram of these
null_volume = [db_null_volume[subjects[0]].area.values]
for subject in subjects[1:]:
    null_volume.append(db_null_volume[subject].area.values)
null_volume = np.hstack(null_volume)
fig, ax = plt.subplots(1,1,figsize=(20,3))
pd.Series(null_volume).value_counts(sort=True).plot(kind='bar', axes=ax)

In [None]:
# print areas that are null for all mice
df = pd.DataFrame(pd.Series(null_volume).value_counts(sort=False))
df.columns = ['counts']
df = df[df.counts == 16]
print(df.to_string())

# Divide mice into experimental groups: FAM/UNFAM/CONTROL

In [None]:
experimental_groups = utils.divide_in_exp_groups(list_subjects=subjects)
experimental_groups

# Calculate cell count across experimental groups

In [None]:
def calculate_value_across_groups(experimental_groups, dict_results_across_mice, value='n_cells'):
    """
    Value can either be n_cells or energy
    """
    df_control = pd.DataFrame()
    df_fam = pd.DataFrame()
    df_unfam = pd.DataFrame()
    for subject in experimental_groups['Control']:
        df_control['area'] = dict_results_across_mice[subject]['area']
        df_control[subject] = dict_results_across_mice[subject][value]
    for subject in experimental_groups['Fam']:
        df_fam['area'] = dict_results_across_mice[subject]['area']
        df_fam[subject] = dict_results_across_mice[subject][value]
    for subject in experimental_groups['Unfam']:
        df_unfam['area'] = dict_results_across_mice[subject]['area']
        df_unfam[subject] = dict_results_across_mice[subject][value]
    return df_control, df_fam, df_unfam

In [None]:
df_control_cell_count, df_fam_cell_count, df_unfam_cell_count = \
calculate_value_across_groups(experimental_groups=experimental_groups, 
                              dict_results_across_mice=dict_results_across_mice, 
                              value='n_cells')

In [None]:
df_control_cell_count

In [None]:
df_fam_cell_count

In [None]:
df_unfam_cell_count

## Calculate t-tests across groups per area

In [None]:
def ttest_across_groups(df_control, df_fam, df_unfam):
    df_ttest = pd.DataFrame(columns=['area', 'pval_Control_vs_Fam', 
                                     'pval_Control_vs_Unfam', 'pval_Fam_vs_Unfam'])
    df_ttest['area'] = df_control['area']
    # loop over areas
    for area in df_control['area'].values:
        # compare control and fam
        pval_control_fam = ttest_ind(df_control[df_control['area'] == area].values[0][1:],
             df_fam[df_fam['area'] == area].values[0][1:])
        # assign pvalue to dataframe
        df_ttest['pval_Control_vs_Fam'][df_ttest.loc[df_ttest['area'] == area].index[0]] = pval_control_fam[1]

        # compare control and unfam
        pval_control_unfam = ttest_ind(df_control[df_control['area'] == area].values[0][1:],
             df_unfam[df_unfam['area'] == area].values[0][1:])
        # assign pvalue to dataframe
        df_ttest['pval_Control_vs_Unfam'][df_ttest.loc[df_ttest['area'] == area].index[0]] = pval_control_unfam[1]

        # compare fam and unfam
        pval_fam_unfam = ttest_ind(df_fam[df_fam['area'] == area].values[0][1:],
             df_unfam[df_unfam['area'] == area].values[0][1:])
        # assign pvalue to dataframe
        df_ttest['pval_Fam_vs_Unfam'][df_ttest.loc[df_ttest['area'] == area].index[0]] = pval_fam_unfam[1]
    return df_ttest


In [None]:
df_ttest_ncells = ttest_across_groups(df_control=df_control_cell_count, 
                               df_fam=df_fam_cell_count, 
                               df_unfam=df_unfam_cell_count)
df_ttest_ncells

In [None]:
# remove rows corresponding to area that have zero cell count across all experimental groups
df_ttest_not_null_ncells = df_ttest_ncells.dropna(axis=0,
                                   how='all',
                                   subset=['pval_Control_vs_Fam',
                                          'pval_Control_vs_Unfam',
                                          'pval_Fam_vs_Unfam'])
df_ttest_not_null_ncells

In [None]:
# sort dataframe pvalues of control vs fam and pick the first 20
df_ttest_not_null_ncells.sort_values(by='pval_Control_vs_Fam')[['area', 'pval_Control_vs_Fam']][0:20].set_index('area')

In [None]:
# sort dataframe pvalues of control vs unfam and pick the first 20
df_ttest_not_null_ncells.sort_values(by='pval_Control_vs_Unfam')[['area', 'pval_Control_vs_Unfam']][0:20].set_index('area')

In [None]:
# sort dataframe pvalues of fam vs unfam and pick the first 20
df_ttest_not_null_ncells.sort_values(by='pval_Fam_vs_Unfam')[['area', 'pval_Fam_vs_Unfam']][0:20].set_index('area')

## Code to calculate cross corr matrix

In [None]:
def cross_corr(df):
    # remove areas where no cells have been detected in any mouse
    # and remove rows with all nans
    corr_matrix = df.set_index('area').loc[
        ~(df.set_index('area')==0).all(axis=1)].dropna(axis=0).T.corr(method='pearson')
    return corr_matrix

## Calculate cross corr across groups per area on cell count

In [None]:
corr_matrix_control_ncells = cross_corr(df_control_cell_count)
corr_matrix_control_ncells

In [None]:
corr_matrix_fam_ncells = cross_corr(df_fam_cell_count)
corr_matrix_fam_ncells

In [None]:
corr_matrix_unfam_ncells = cross_corr(df_unfam_cell_count)
corr_matrix_unfam_ncells

## Plot cross corr matrix across groups per area on cell count

In [None]:
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix_control_ncells,
    cmap=sns.diverging_palette(220, 10, as_cmap=True),
    vmin=-1.0, vmax=1.0,
    square=True, ax=ax)
ax.set_title('CONTROL')

In [None]:
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix_fam_ncells,
    cmap=sns.diverging_palette(220, 10, as_cmap=True),
    vmin=-1.0, vmax=1.0,
    square=True, ax=ax)
ax.set_title('FAM')

In [None]:
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix_unfam_ncells,
    cmap=sns.diverging_palette(220, 10, as_cmap=True),
    vmin=-1.0, vmax=1.0,
    square=True, ax=ax)
ax.set_title('UNFAM')

# Calculate energy across experimental groups

In [None]:
df_control_energy, df_fam_energy, df_unfam_energy = \
calculate_value_across_groups(experimental_groups, dict_results_across_mice, value='energy')

In [None]:
df_control_energy

In [None]:
df_fam_energy

In [None]:
df_unfam_energy

In [None]:
df_ttest_energy = ttest_across_groups(df_control=df_control_energy,
                                      df_fam=df_fam_energy,
                                      df_unfam=df_unfam_energy)
df_ttest_energy

In [None]:
# remove rows corresponding to area that have zero cell count across all experimental groups
df_ttest_not_null_energy = df_ttest_energy.dropna(axis=0,
                                   how='all',
                                   subset=['pval_Control_vs_Fam',
                                          'pval_Control_vs_Unfam',
                                          'pval_Fam_vs_Unfam'])
df_ttest_not_null_energy

In [None]:
# sort dataframe pvalues of control vs fam and pick the first 20
df_ttest_not_null_energy.sort_values(by='pval_Control_vs_Fam')[['area', 'pval_Control_vs_Fam']][0:20].set_index('area')

In [None]:
# sort dataframe pvalues of control vs fam and pick the first 20
df_ttest_not_null_energy.sort_values(by='pval_Control_vs_Unfam')[['area', 'pval_Control_vs_Unfam']][0:20].set_index('area')

In [None]:
# sort dataframe pvalues of control vs fam and pick the first 20
df_ttest_not_null_energy.sort_values(by='pval_Fam_vs_Unfam')[['area', 'pval_Fam_vs_Unfam']][0:20].set_index('area')

## Calculate cross corr matrix across experimental groups on energy

In [None]:
corr_matrix_control_energy = cross_corr(df_control_energy)
corr_matrix_control_energy

In [None]:
corr_matrix_fam_energy = cross_corr(df_fam_energy)
corr_matrix_fam_energy

In [None]:
corr_matrix_unfam_energy = cross_corr(df_unfam_energy)
corr_matrix_unfam_energy

## Plot cross corr matrix across groups per area on energy

In [None]:
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix_control_energy,
    cmap=sns.diverging_palette(220, 10, as_cmap=True),
    vmin=-1.0, vmax=1.0,
    square=True, ax=ax)
ax.set_title('CONTROL')

In [None]:
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix_fam_energy,
    cmap=sns.diverging_palette(220, 10, as_cmap=True),
    vmin=-1.0, vmax=1.0,
    square=True, ax=ax)
ax.set_title('FAM')

In [None]:
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix_unfam_energy,
    cmap=sns.diverging_palette(220, 10, as_cmap=True),
    vmin=-1.0, vmax=1.0,
    square=True, ax=ax)
ax.set_title('UNFAM')