# Imports and Constants

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
import skelm
import sklearn.linear_model
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import random
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.preprocessing import FunctionTransformer
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE

from saving_outputs import *
from load_data import *
from masks import *
from decoding import *
from plots import *
from utility import *
from metrics import *

In [None]:
from_who = "our"   # 2 possibilities : "mohameds" or "our"
voxel_size = "3" # 2 possibilities : "2" or "3"
radius_mask = "10" # [mm] from 5 to 11
use_t_maps = True # use t-maps or beta-maps
correction = "_Tcorrected" # a t-value has been used to create ROIs or no,
use_pca = False # use PCA or not
components_pca = 20 # number of components to keep in PCA
use_k_selector = False # use k-selector or not
use_rfe = False # use RFE or not
features_rfe = 25 # number of features to keep in RFE

maps_folder="brain_maps/"+from_who+"_maps"+"_"+voxel_size
masks_folder="masks/"+from_who+"_masks"+"_"+voxel_size+"_radius="+radius_mask+correction

SEED = 0
random.seed(SEED)
classes = ['Up', 'Down', 'Right', 'Left']
nb_runs = 12
labels = {'vis' : np.array(classes*nb_runs), 'aud' : np.array(classes*nb_runs)}
labels_same = np.array(classes*nb_runs)
subjects_ids = range(1, 24)
n_subjects = len(subjects_ids)
n_individual_perms = 1000

within_modal_tasks_regions = [(["vis"], ["V5_L", "V5_R"]),
                           (["vis"], ["PT_L", "PT_R"]),
                           (["aud"], ["V5_L", "V5_R"]),
                           (["aud"], ["PT_L", "PT_R"])]

cross_modal_tasks_regions = [(["vis", "aud"], ["V5_L", "V5_R"]),
                            (["vis", "aud"], ["PT_L", "PT_R"])]

def k_selector(n_voxel):
    return int(0.7*n_voxel)

std_scaler = sklearn.preprocessing.StandardScaler()

selector = SelectKBest(score_func=f_classif)

classifiers = {
    'svm':sklearn.svm.SVC(C=1, random_state=SEED),
    #'LR':sklearn.linear_model.LogisticRegression(random_state=SEED),
    #'KNN':sklearn.neighbors.KNeighborsClassifier(),
    #'perceptron':sklearn.linear_model.Perceptron(random_state=SEED)
    #'ELM':skelm.ELMClassifier(random_state=SEED, n_neurons=100, alpha=1),
    }

param_grids = {
    'svm':{
        'svm__C': [1],
        #'svm__gamma': [1, 0.1],
        'svm__kernel': ['linear']},

    'LR':{
        #'LR__C': [10**x for x in np.linspace(-3,3,num=20)]},
        'LR__C': [0.05]},
        'LR__max_iter': [1000],
    'KNN':{
        'KNN__n_neighbors': [1,3,5,10,20],
        'KNN__weights':["uniform", "distance"],
    },
    'perceptron':{
        'perceptron__penalty':["l1", "l2", "elasticnet", None],
        'perceptron__max_iter':[100,300,1000],
    },
    'ELM':{
        'ELM__n_neurons': [100,200,300,400,500],
        'ELM__alpha': [0.1, 0.5, 1, 2, 5, 10, 20],
    }
}
cv_scheme = list()
for i in range(10):
    full_idx = range(40)
    idx_te = [i*4,i*4+1,i*4+2,i*4+3]
    idx_tr = [x for x in full_idx if x not in idx_te]
    tr_te_splits = [idx_tr, idx_te]
    cv_scheme.append(tr_te_splits)

models = dict()
identity_transformer = FunctionTransformer(func=lambda x: x)
for name in classifiers:
    estimator_class = classifiers[name].__class__
    params = classifiers[name].get_params()
    #rfe_estimator = estimator_class().set_params(params) # not used at the moment
    pipeline = Pipeline([('scaler', std_scaler),
                         ('pca', (PCA(n_components=components_pca, random_state=SEED) if use_pca else identity_transformer)),
                         ('kbest', (selector if use_k_selector else identity_transformer)),
                         ('rfe', (RFE(sklearn.linear_model.LogisticRegression(random_state=SEED,C=0.05), n_features_to_select=features_rfe) if use_rfe else identity_transformer)),
                         (name, classifiers[name])
                         ])
    GS = GridSearchCV(pipeline, param_grids[name], cv = cv_scheme, n_jobs = 4)
    models[name] = GS
    
decoder = Decoder(n_perm=n_individual_perms, models=models, n_classes=len(classes), n_splits=nb_runs, seed=SEED, verbose=2)

# Loading data

In [None]:
maps_masked, masks_exist = load_full_data(subjects_ids, len(classes), nb_runs, maps_folder, masks_folder, is_from_mohamed=(from_who=="mohameds"), use_t_maps=use_t_maps)
decoder.set_masks_exist(masks_exist)

In [None]:
n_voxels = maps_masked[0]["vis"][0]["V5_L"].shape[1]
for name in classifiers:
    if isinstance(decoder.models[name].estimator.steps[1][1], SelectKBest): 
        decoder.models[name].estimator.steps[1][1].set_params(k = k_selector(n_voxels))
print("initially "+str(n_voxels)+" voxels, selected = "+str(k_selector(n_voxels))+" voxels.")

# Within-modality decoding

In [None]:
confusion_matrixes_within, val_scores_within = decoder.within_modality_decoding(maps_masked, labels, subjects_ids, within_modal_tasks_regions)
confusion_matrixes_within = change_confusion_matrixes_org(confusion_matrixes_within, subjects_ids, models.keys())
val_scores_within = change_confusion_matrixes_org(val_scores_within, subjects_ids, models.keys())

# Cross-modal decoding

In [None]:
confusion_matrixes_cross, val_scores_cross = decoder.cross_modality_decoding(maps_masked, labels, subjects_ids, cross_modal_tasks_regions)
confusion_matrixes_cross = change_confusion_matrixes_org(confusion_matrixes_cross, subjects_ids, models.keys())
val_scores_cross = change_confusion_matrixes_org(val_scores_cross, subjects_ids, models.keys())

# Bootstrapping to assess group-level significance

In [None]:
n_single_perm = 50
n_bootstrap = 100_000

In [None]:
within_cf_100_perm = decoder.score_bootstrapped_permutations(n_single_perm, labels_same, within_modal_tasks_regions,maps_masked,n_subjects, within_modality=True)
within_cf_100_perm = change_cfm_bootstrap_org(within_cf_100_perm, subjects_ids, models.keys(), n_single_perm)

cross_cf_100_perm = decoder.score_bootstrapped_permutations(n_single_perm, labels_same, cross_modal_tasks_regions,maps_masked,n_subjects, within_modality=False)
cross_cf_100_perm = change_cfm_bootstrap_org(cross_cf_100_perm, subjects_ids, models.keys(), n_single_perm)

In [None]:
bootstrapped_distribution_cross = dict()
bootstrapped_distribution_within = dict()
for name in models.keys():
    within_scores_100_perm = compute_accuracy_bootstrap(n_subjects, n_single_perm, within_cf_100_perm[name], len(classes))
    bootstrapped_distribution_within[name] = compute_bootstrap_distribution(n_bootstrap, n_subjects, within_scores_100_perm, n_single_perm)

    cross_scores_100_perm = compute_accuracy_bootstrap(n_subjects, n_single_perm, cross_cf_100_perm[name], len(classes))
    bootstrapped_distribution_cross[name] = compute_bootstrap_distribution(n_bootstrap, n_subjects, cross_scores_100_perm, n_single_perm)

# Saving results

In [None]:
type_maps = "_t_maps_" if use_t_maps else "_beta_maps_" 
out_directory = "out/"+from_who+type_maps

for name in classifiers:
    out_dir = out_directory+str(classifiers[name])+"_"+voxel_size+"_radius="+radius_mask+correction + ("_pca"+str(components_pca) if use_pca else "") + ("_" + "k_selector" if use_k_selector else "")+ ("_" + "rfe"+str(features_rfe) if use_rfe else "")+"/"
    create_directory(out_dir)
    save_dicts(out_dir+"masks_exist.csv", masks_exist, list(masks_exist[0].keys()), subjects_ids)
    
    save_dicts(out_dir+"confusion_matrixes_within.csv", confusion_matrixes_within[name], list(confusion_matrixes_within[name][0].keys()), subjects_ids)
    save_dicts(out_dir+"validation_scores_within.csv", val_scores_within[name], list(val_scores_within[name][0].keys()), subjects_ids)
    acc_within = compute_metric(out_dir, subjects_ids, {'name' : 'accuracy', 'function':accuracy}, "within", masks_exist, len(classes), ret = True)

    compute_metric(out_dir, subjects_ids, {'name' : 'recall', 'function':recall}, "within", masks_exist, len(classes))
    compute_metric(out_dir, subjects_ids, {'name' : 'precision', 'function':precision}, "within", masks_exist, len(classes))

    within_modality_group_results = average_dicos(acc_within)
    save_dicts(out_dir+"group_scores_within.csv", [within_modality_group_results], list(within_modality_group_results.keys()), [0])

    save_dicts(out_dir+"confusion_matrixes_cross.csv", confusion_matrixes_cross[name], list(confusion_matrixes_cross[name][0].keys()), subjects_ids)
    save_dicts(out_dir+"validation_scores_cross.csv", val_scores_cross[name], list(val_scores_cross[name][0].keys()), subjects_ids)
    acc_cross = compute_metric(out_dir, subjects_ids, {'name' : 'accuracy', 'function':accuracy}, "cross", masks_exist, len(classes), ret = True)
    
    compute_metric(out_dir, subjects_ids, {'name' : 'recall', 'function':recall}, "cross", masks_exist, len(classes))
    compute_metric(out_dir, subjects_ids, {'name' : 'precision', 'function':precision}, "cross", masks_exist, len(classes))

    cross_modality_group_results = average_dicos(acc_cross)
    save_dicts(out_dir+"group_scores_cross.csv", [cross_modality_group_results], list(cross_modality_group_results.keys()), [0])

In [None]:
for name in classifiers:
    out_dir = out_directory+str(classifiers[name])+"_"+voxel_size+"_radius="+radius_mask + ("_" + "pca" if use_pca else "") + ("_" + "k_selector" if use_k_selector else "")+("_" + "rfe"+str(features_rfe) if use_rfe else "")+"/"
    save_dicts(out_dir+"accuracy_bootstraps_within.csv", [bootstrapped_distribution_within[name]], list(bootstrapped_distribution_within[name][0].keys()), range(n_bootstrap))
    save_dicts(out_dir+"accuracy_bootstraps_cross.csv", [bootstrapped_distribution_cross[name]], list(bootstrapped_distribution_cross[name][0].keys()), range(n_bootstrap))

    cv_group_df = retrieve_cv_metric(out_dir, "group_scores")
    bootstrap_df  = retrieve_bootstrap_metric(out_dir, "accuracy")
    pvals = compute_p_val_bootstrap(bootstrap_df, cv_group_df)
    save_dicts(out_dir+"estimated_pval_bootstrap.csv", [pvals], list(pvals.keys()), [0])

# Plotting results (from files of saved results)

In [None]:
type_maps = "_t_maps_" if use_t_maps else "_beta_maps_"
out_directory = "out/"+from_who+type_maps#+"_2tests_"
for name in classifiers:
    out_dir = out_directory+str(classifiers[name])+"_"+voxel_size+"_radius="+radius_mask+correction + ("_" + "pca"+str(components_pca) if use_pca else "") + ("_" + "k_selector" if use_k_selector else "")+("_" + "rfe"+str(features_rfe) if use_rfe else "")+"/"
    masks_exist = retrieve_masks_exist(out_dir)
    cv_group_df = retrieve_cv_metric(out_dir, "group_scores")
    cv_df = retrieve_cv_metric(out_dir, "accuracy")
    cfm_df = retrieve_cv_matrixes(out_dir)
    val_scores_df = retrieve_val_scores(out_dir)
    pvals = retrieve_pvals(out_dir, default_keys=cv_df.columns)

    plt_dir = "plots/"+from_who+type_maps+str(classifiers[name])+"_"+voxel_size+"_radius="+radius_mask+correction + ("_" + "pca"+str(components_pca) if use_pca else "") + ("_" + "k_selector" if use_k_selector else "")+("_" + "rfe"+str(features_rfe) if use_rfe else "")
    create_directory(plt_dir)
    plotter = Plotter(plt_dir, subjects_ids)
    plotter.plot_cv_score_with_points(cv_df, pvals, chance_level = True)
    compute_accuracy_variance(out_dir, "within")
    compute_accuracy_variance(out_dir, "cross")
    #plotter.plot_validation_scores_hyper_param(val_scores_df, "C", param_grids["LR"]["LR__C"], masks_exist, chance_level=True, log10_scale=True)

    group_cfm = compute_group_confusion_matrix(cfm_df, subjects_ids)
    plotter.plot_group_confusion_matrix(group_cfm, classes)

In [None]:
for name in classifiers:
    out_dir = out_directory+str(classifiers[name])+"_"+voxel_size+"_radius="+radius_mask+"/"
    bootstrap_df  = retrieve_bootstrap_metric(out_dir, "accuracy")
    cv_group_df = retrieve_cv_metric(out_dir, "group_scores")
    pvals = retrieve_pvals(out_dir)
    
    plotter.plot_bootstrap(bootstrap_df, cv_group_df, pvals, 30)

# Comparing classifiers

In [None]:
subjects_ids = range(1,24)
x = dict()
type_maps = "_t_maps_" if use_t_maps else "_beta_maps_"
x["2"] = [81, 123, 179, 257, 389, 515]
x["3"] = [19, 33, 57, 81, 123, 171]
acc_group_combination = dict()
var_group_combination = dict()
radiuses = range(5,11)
voxel_sizes = ["2", "3"]

for voxel_size in voxel_sizes:
    acc_group_combination[voxel_size]= dict((radius,None) for radius in radiuses)
    var_group_combination[voxel_size] = dict((radius,None) for radius in radiuses)
    for radius in radiuses:
        folder = "out/our_"+type_maps+str(voxel_size)+"_radius="+str(radius)+"_"+str(classifiers["svm"])+"selecting_20.0%/"
        acc = retrieve_cv_metric(folder, "group_scores")
        dfw = pd.read_csv(folder+"var_within.csv", index_col=0)
        dfc = pd.read_csv(folder+"var_cross.csv", index_col=0)
        df = pd.concat([dfw, dfc])
        new_cols = df.index[1:]
        temp_df = pd.DataFrame(columns=new_cols, index=[1], dtype=float)
        temp_df[new_cols] = df[df.columns[0]].values[1:]

        acc_group_combination[voxel_size][radius] = acc
        var_group_combination[voxel_size][radius] = temp_df
        #if not os.path.exists(folder+"accuracy_cross.csv"): os.rename(folder+"cross_cross.csv", folder+"accuracy_cross.csv")
        #compute_accuracy_variance(folder, "within")
        #compute_accuracy_variance(folder, "cross")

colors = {"2":"tab:green","3":"tab:orange"}
plt_directory = "plots/"+from_who+"_"+type_maps+"vsizes_radiuses_OK"+str(classifiers["svm"])+"/"
create_directory(plt_directory)
plotter = Plotter(plt_directory, subjects_ids)
scores_var = dict()

for i, analysis in enumerate(acc_group_combination["2"][5]):
    # # faire le plot
    # for voxel_size in  voxel_sizes:
    #      scores = [acc_group_combination[voxel_size][radius][analysis] for radius in radiuses]
    # #     scores_var[analysis] = np.var(scores*100)
    # #     print(analysis)
    # #     print((max(scores)-min(scores))*100)
    #      plt.plot(radiuses, scores, '-o', color = colors[voxel_size], label = voxel_size+" mm")
    #      plt.axhline(0.25, color="gray", alpha=0.5)
    # title = plotter.generate_title("Accuracy",analysis,0)
    # plt.ylim(0.2,0.5)
    # plotter.save(title+ ", depending on ROI radius","","mean accuracy","radius [mm]",legend=True)

    # for voxel_size in  voxel_sizes:
    #     scores = [acc_group_combination[voxel_size][radius][analysis] for radius in radiuses]
    #     #n_voxels = list(map(k_selector, np.array(voxel_amounts[voxel_size])))
    #     plt.plot(np.array(x[voxel_size]), scores, '-o', color = colors[voxel_size], label = voxel_size+" mm")
    #     #print("number of voxels : "+str(voxel_amounts[voxel_size]))
    #     #print("number of voxels selected : "+str(n_voxels))
    #     plt.axhline(0.25, color="gray", alpha=0.5)
    # title = plotter.generate_title("Accuracy",analysis,0)
    # plt.ylim(0.2,0.5)
    # plotter.save(title+", depending on amount of voxels","","mean accuracy","voxel amount",legend=True)

    for voxel_size in  voxel_sizes:
        scores = [var_group_combination[voxel_size][radius][analysis] for radius in radiuses]
        #n_voxels = list(map(k_selector, np.array(voxel_amounts[voxel_size])))
        plt.plot(np.array(x[voxel_size]), scores, '-o', color = colors[voxel_size], label = voxel_size+" mm")
        #print("number of voxels : "+str(voxel_amounts[voxel_size]))
        #print("number of voxels selected : "+str(n_voxels))
        #plt.axhline(0.25, color="gray", alpha=0.5)
    title = plotter.generate_title("Variance",analysis,0)
    plt.ylim(0, 0.014)
    plotter.save(title+", depending on amount of voxels","","accuracy variance","voxel amount",legend=True)

# out_folders = list()
# labels = list()
# for kn in ["2", "3", "5"]:
#     for sm in ["15", "30", "50"]:
#         out_folders.append("out/our__t_maps_3_radius=10_SVC(C=1, random_state=0)smote"+sm+"kn"+kn+"/")
#         labels.append("s="+sm+"k="+kn)

# out_folders.append("out/our_3_radius=10_SVC(C=1, random_state=0)/")
# labels.append("no smote")

# print(out_folders)


# plotter = Plotter("plots/comparing_smote_"+"_".join(labels), subjects_ids)
# plotter.plot_tests_scores_from_different_folders(out_folders, labels, "SMOTE", "parameters")
# plotter.plot_accuracy_var_from_different_folders(out_folders, labels)