# Two-way ANOVA test

In [1]:
from typing import Tuple

import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
import pandas as pd
import pickle

from param import *
from modules.dataloader import UniformSegmentDataset, BaseDataset
from modules.utils.util import segment, get_place_cell


output_dir = ParamDir().OUTPUT_ROOT/ "data_exploration/"
data_list = ParamDir().data_list

## place cell ratio

In [8]:
def get_place_cell_ratio(data_name):
    # print(data_name)
    with open(output_dir/data_name/"MI_all.pickle","rb") as f:
        results_all = pickle.load(f)

    pc_beh_id, pc_event_id = get_place_cell(results_all, 0.001)

    return len(pc_beh_id)/len(results_all['original MI'])

In [9]:
anova_data = []

KO_names = ["091317 OF CaMKII HKO M19-n1"] # mice that passed the test
KO_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" in str(data_dir).split('/')[-1]]
for data_dir in KO_data_list:
    data_name = str(data_dir).split('/')[-1]
    ratio = get_place_cell_ratio(data_name)
    if data_name not in KO_names:
        anova_data.append([ratio, "KO", "didnot pass"])
    else:
        anova_data.append([ratio, "KO", "pass"])

WT_names = ["M45_042718_OF", "M46_042718_OF", "092217 OF CaMKII WT M29-n1"] # mice that didn't pass the test
WT_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" not in str(data_dir).split('/')[-1]]
for data_dir in WT_data_list:
    data_name = str(data_dir).split('/')[-1]
    ratio = get_place_cell_ratio(data_name)
    if data_name in WT_names:
        anova_data.append([ratio, "WT", "didnot pass"])
    else:
        anova_data.append([ratio, "WT", "pass"])

df = pd.DataFrame(anova_data, columns=["place_cell_ratio", "mouse_type", "permutation_test"])

model = ols('place_cell_ratio ~ C(mouse_type) + C(permutation_test) + C(mouse_type):C(permutation_test)', data=df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(mouse_type),0.021797,1.0,0.939087,0.360894
C(permutation_test),0.051502,1.0,2.218923,0.174657
C(mouse_type):C(permutation_test),0.000545,1.0,0.023481,0.882008
Residual,0.185683,8.0,,


### discretized place cell ratio

In [10]:
anova_data = []

decoding_res_file = "tsc_train_rocket_Ridge_threshold_segment_False"

KO_names = ["091317 OF CaMKII HKO M19-n1"] # mice that passed the test
KO_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" in str(data_dir).split('/')[-1]]
for data_dir in KO_data_list:
    data_name = str(data_dir).split('/')[-1]
    with open(ParamDir().OUTPUT_ROOT/"time_series_classification"/data_name/(f"{decoding_res_file}.pickle"),"rb") as f:
            results = pickle.load(f)
    score = results["scores"]

    ratio = get_place_cell_ratio(data_name)
    if ratio<0.2:
        pcr_level = "low"
    elif ratio<0.3:
        pcr_level = "mid"
    else:
        pcr_level = "high"

    if data_name not in KO_names:
        anova_data.append([pcr_level, "KO", score])
    else:
        anova_data.append([pcr_level, "KO", score])

WT_names = ["M45_042718_OF", "M46_042718_OF", "092217 OF CaMKII WT M29-n1"] # mice that didn't pass the test
WT_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" not in str(data_dir).split('/')[-1]]
for data_dir in WT_data_list:
    data_name = str(data_dir).split('/')[-1]
    with open(ParamDir().OUTPUT_ROOT/"time_series_classification"/data_name/(f"{decoding_res_file}.pickle"),"rb") as f:
            results = pickle.load(f)
    score = results["scores"]

    ratio = get_place_cell_ratio(data_name)
    if ratio<0.2:
        pcr_level = "low"
    elif ratio<0.3:
        pcr_level = "mid"
    else:
        pcr_level = "high"

    if data_name in WT_names:
        anova_data.append([pcr_level, "WT", score])
    else:
        anova_data.append([pcr_level, "WT", score])

df = pd.DataFrame(anova_data, columns=["pcr_level", "mouse_type", "score"])

model = ols('score ~ C(mouse_type) + C(pcr_level) + C(mouse_type):C(pcr_level)', data=df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(mouse_type),0.045612,1.0,1.692534,0.234452
C(pcr_level),0.078181,2.0,1.450542,0.297135
C(mouse_type):C(pcr_level),0.129874,2.0,2.409654,0.159871
Residual,0.188641,7.0,,


## Median time of staying in one place

In [None]:
anova_data = []

KO_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" in  str(data_dir).split('/')[-1]]

segment_len_all = []
for i, data_dir in enumerate(KO_data_list):
    segment_len = []
    data_name = str(data_dir).split('/')[-1]
    dataset = UniformSegmentDataset(data_dir, ParamData().mobility, ParamData().shuffle, ParamData().random_state)
    (X_train, y_train), (X_test, y_test) = dataset.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

    segment_ind = segment(dataset.y_train_base)
    segment_len.append(round((segment_ind[0]+1)/3, 2))
    for i in range(1, len(segment_ind)):
        segment_len.append(round((segment_ind[i]-segment_ind[i-1])/3, 2))
    segment_len_all.append([segment_len, data_name])
    

for item in segment_len_all:
    if item[1] in ["091317 OF CaMKII HKO M19-n1"]:
        anova_data.append([np.median(item[0]), "KO", "pass"])
    else:
        anova_data.append([np.median(item[0]), "KO", "didnot pass"])

WT_names = ["M45_042718_OF", "M46_042718_OF", "092217 OF CaMKII WT M29-n1"]
WT_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" not in  str(data_dir).split('/')[-1]]

segment_len_all = []
for i, data_dir in enumerate(WT_data_list):
    segment_len = []
    data_name = str(data_dir).split('/')[-1]
    dataset = UniformSegmentDataset(data_dir, ParamData().mobility, ParamData().shuffle, ParamData().random_state)
    (X_train, y_train), (X_test, y_test) = dataset.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

    segment_ind = segment(dataset.y_train_base)
    segment_len.append(round((segment_ind[0]+1)/3, 2))
    for i in range(1, len(segment_ind)):
        segment_len.append(round((segment_ind[i]-segment_ind[i-1])/3, 2))
    segment_len_all.append([segment_len, data_name])
    

for item in segment_len_all:
    if item[1] not in WT_names:
        anova_data.append([np.median(item[0]), "WT", "didnot pass"])
    else:
        anova_data.append([np.median(item[0]), "WT", "pass"])

df = pd.DataFrame(anova_data, columns=["median_time_in_one_position", "mouse_type", "permutation_test"])

model = ols('median_time_in_one_position ~ C(mouse_type) + C(permutation_test) + C(mouse_type):C(permutation_test)', data=df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(mouse_type),28.633978,1.0,29.302299,0.000636
C(permutation_test),0.064732,1.0,0.066243,0.803384
C(mouse_type):C(permutation_test),1.694403,1.0,1.733951,0.224376
Residual,7.817537,8.0,,


## Number of samples after downsample

In [12]:
anova_data = []

KO_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" in  str(data_dir).split('/')[-1]]

data_amout_all = []
for i, data_dir in enumerate(KO_data_list):
    segment_len = []
    data_name = str(data_dir).split('/')[-1]
    dataset = UniformSegmentDataset(data_dir, ParamData().mobility, ParamData().shuffle, ParamData().random_state)
    (X_train, y_train), (X_test, y_test) = dataset.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

    data_amout_all.append([len(X_train), data_name])
    

for item in data_amout_all:
    if item[1] in ["091317 OF CaMKII HKO M19-n1"]:
        anova_data.append([np.mean(item[0]), "KO", "pass"])
    else:
        anova_data.append([np.mean(item[0]), "KO", "didnot pass"])


WT_names = ["M45_042718_OF", "M46_042718_OF", "092217 OF CaMKII WT M29-n1"] # didnot pass
WT_data_list = [data_dir for data_dir in ParamDir().data_list if "KO" not in  str(data_dir).split('/')[-1]]

data_amout_all = []
for i, data_dir in enumerate(WT_data_list):
    segment_len = []
    data_name = str(data_dir).split('/')[-1]
    dataset = UniformSegmentDataset(data_dir, ParamData().mobility, ParamData().shuffle, ParamData().random_state)
    (X_train, y_train), (X_test, y_test) = dataset.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

    data_amout_all.append([len(X_train), data_name])
    

for item in data_amout_all:
    if item[1] not in  WT_names:
        anova_data.append([np.mean(item[0]), "WT", "pass"])
    else:
        anova_data.append([np.mean(item[0]), "WT", "didnot pass"])

df = pd.DataFrame(anova_data, columns=["data_after_downsample", "mouse_type", "permutation_test"])

model = ols('data_after_downsample ~ C(mouse_type) + C(permutation_test) + C(mouse_type):C(permutation_test)', data=df).fit()
sm.stats.anova_lm(model, typ=2)


Unnamed: 0,sum_sq,df,F,PR(>F)
C(mouse_type),894.675325,1.0,4.623645,0.063749
C(permutation_test),1201.875325,1.0,6.211242,0.037392
C(mouse_type):C(permutation_test),44.181818,1.0,0.22833,0.645549
Residual,1548.0,8.0,,
