# Imports and Constants

In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import numpy as np
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, mutual_info_classif, f_classif, chi2

from saving_outputs import *
from load_data import *
from masks import *
from decoding import *
from plots import *
from utility import *
from metrics import *


SEED = 0
random.seed(SEED)
classes = ['Up', 'Down', 'Right', 'Left']
nb_runs = 12
length_one_modality = nb_runs * len(classes)
subjects_ids = range(1, 24)
n_subjects = len(subjects_ids)
n_individual_perms = 1000
labels = {'vis' : np.array(classes*nb_runs), 'aud' : np.array(classes*nb_runs)}
labels_same = np.array(classes*nb_runs)

within_modal_tasks_regions = [(["vis"], ["V5_L", "V5_R"]),
                              (["vis"], ["PT_L", "PT_R"]),
                              (["aud"], ["V5_L", "V5_R"]),
                              (["aud"], ["PT_L", "PT_R"])]

cross_modal_tasks_regions = [(["vis", "aud"], ["V5_L", "V5_R"]),
                             (["vis", "aud"], ["PT_L", "PT_R"])]

# Analyses

In [None]:
from_who = "our"   # 2 possibilities : "mohameds" or "our"
voxel_size = "3" # 2 possibilities : "2" or "3"
use_t_maps = True # use t-maps or beta-maps
correction = "_Tcorrected" # a t-value has been used to create ROIs or no
#radius_mask = "7"
radius = 10

#masks_folder="masks/"+from_who+"_masks"+"_"+voxel_size+"_radius="+radius_mask

std_scaler = sklearn.preprocessing.StandardScaler()

classifiers = {
    'svm':sklearn.svm.SVC(C=1, random_state=SEED),
    #'LR':sklearn.linear_model.LogisticRegression(random_state=SEED)
    }
classifier = classifiers["svm"]

param_grids = {
    'svm':{
        #'svm__C': [0.1, 1],
        #'svm__gamma': [1, 0.1],
        'svm__kernel': ['linear']},
    'LR':{
        'LR__C': [1]}}

cv_scheme = list()
for i in range(11):
    full_idx = range(44)
    idx_te = [i*4,i*4+1,i*4+2,i*4+3]
    idx_tr = [x for x in full_idx if x not in idx_te]
    tr_te_splits = [idx_tr, idx_te]
    cv_scheme.append(tr_te_splits)


In [None]:
acc_group_combination = dict()

maps_folder="brain_maps/"+from_who+"_maps"+"_"+voxel_size
masks_folder="masks/"+from_who+"_masks"+"_"+voxel_size+"_radius="+str(radius)+correction
maps_masked, masks_exist = load_full_data(subjects_ids, len(classes), nb_runs, maps_folder, masks_folder, is_from_mohamed=(from_who=="mohameds"), use_t_maps=False)


start_time = time.time()
for kn in [3, 5]:
    for sm in [15, 20, 30, 40, 50]:

        #def k_selector(): return int(k*171)

        models = dict()
        #selector = SelectKBest(k=k_selector(voxel_amounts[voxel_size][i]), score_func=mut_info)
        for name in classifiers:
            pipeline = Pipeline([('scaler', std_scaler),
                                #('selector', SelectKBest(k=k_selector(), score_func=mutual_info_classif)),
                                (name, classifiers[name])])
            GS = GridSearchCV(pipeline, param_grids[name], cv=cv_scheme, n_jobs=3)
            models[name] = GS

        decoder = Decoder(n_perm=n_individual_perms, models=models, n_classes=len(classes), n_splits=nb_runs, seed=SEED, sm_kn=kn, sm_samples=sm)
        decoder.set_masks_exist(masks_exist)

        # launch analyses
        confusion_matrixes_within = decoder.within_modality_decoding(maps_masked, labels, subjects_ids, within_modal_tasks_regions)
        confusion_matrixes_within = change_confusion_matrixes_org(confusion_matrixes_within[0], subjects_ids, models.keys())

        confusion_matrixes_cross = decoder.cross_modality_decoding(maps_masked, labels, subjects_ids, cross_modal_tasks_regions)
        confusion_matrixes_cross = change_confusion_matrixes_org(confusion_matrixes_cross[0], subjects_ids, models.keys())

        # saving results
        type_maps = "_t_maps_" if use_t_maps else "_beta_maps_"
        out_directory = "out/"+from_who+type_maps+voxel_size+"_radius="+str(radius)+"_"+str(classifier)+correction+"_smotekn="+str(kn)+"_samples="+str(sm)+"/"
        create_directory(out_directory)

        save_dicts(out_directory + "confusion_matrixes_within.csv", confusion_matrixes_within["svm"],
                list(confusion_matrixes_within["svm"][0].keys()), subjects_ids)
        acc_within = compute_metric(out_directory, subjects_ids, {'name' : 'accuracy', 'function':accuracy}, "within", masks_exist, len(classes), ret = True)

        save_dicts(out_directory + "confusion_matrixes_cross.csv", confusion_matrixes_cross["svm"],
                list(confusion_matrixes_cross["svm"][0].keys()), subjects_ids)

        acc_cross = compute_metric(out_directory, subjects_ids, {'name' : 'cross', 'function':accuracy}, "cross", masks_exist, len(classes), ret = True)

        within_modality_group_results = average_dicos(acc_within)
        save_dicts(out_directory + "group_scores_within.csv", [within_modality_group_results],
                list(within_modality_group_results.keys()), [0])
        acc_group_combination = within_modality_group_results

        cross_modality_group_results = average_dicos(acc_cross)
        save_dicts(out_directory + "group_scores_cross.csv", [cross_modality_group_results],
                list(cross_modality_group_results.keys()), [0])
        acc_group_combination.update(cross_modality_group_results)


        duration = time.time()-start_time
        print("_smotekn="+str(kn)+"_samples="+str(sm)+" % done in "+str(duration)+" seconds")

# Plot score and score variance for different k's
### (voxel_size and radius kept constant)

In [None]:
percentages = [0.2,0.4,0.6,0.8]

type_maps = "_t_maps_" if use_t_maps else "_beta_maps_"
out_folders = list()

kns = [2,5]
sms = [15,30,50]

for kn in kns:
    for sm in sms:
        out_folders.append("out/"+from_who+type_maps+voxel_size+"_radius="+str(radius)+"_"+str(classifier)+correction+"_smotekn="+str(kn)+"_samples="+str(sm)+"/")

#out_folders.append("out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_100_f_classif/")
#percentages.append(1)
                     
# for folder in out_folders:
#     #acc_within = compute_metric(folder, subjects_ids, {'name' : 'accuracy', 'function':accuracy}, "within", masks_exist, len(classes), ret = True)
#     #acc_cross = compute_metric(folder, subjects_ids, {'name' : 'accuracy', 'function':accuracy}, "cross", masks_exist, len(classes), ret = True)

#     compute_accuracy_variance(folder, "within")
#     compute_accuracy_variance(folder, "cross")
#     #plotter = Plotter("plots/"+folder[4:], subjects_ids)
#     #cfm_df = retrieve_cv_matrixes(folder)
#     #group_cfm = compute_group_confusion_matrix(cfm_df, subjects_ids)
#     #plotter.plot_group_confusion_matrix(group_cfm, classes) 


    
labels = list()
for kn in kns:
    for sm in sms:
        labels.append("k="+str(kn)+" s="+str(sm))

out_folders.append("out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_100_f_classif/")
labels.append("no smote")

plotter = Plotter("plots/comparing_SMOTE_"+type_maps+"_".join(labels), subjects_ids)
plotter.plot_tests_scores_from_different_folders(out_folders, labels, "SMOTE", "SMOTE parameters")
plotter.plot_accuracy_var_from_different_folders(out_folders, labels, "SMOTE", "SMOTE parameters")

In [None]:
subjects_ids = range(1,24)
out_folders = [
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_20_f_classif/",
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_20_mutual_info/",
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_40_f_classif/",
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_40_mutual_info/",
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_60_f_classif/",
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_60_mutual_info/",
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_80_f_classif/",
                "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_80_mutual_info/",
               ]
classifiers_names = ["20% f_classif", "20% mutual_info",  "40% f_classif", "40% mutual_info", "60% f_classif", "60% mutual_info", "80% f_classif", "80% mutual_info"]
plotter = Plotter("plots/comparing_classifiers_mut_info_f_classif_"+"_".join(classifiers_names), subjects_ids)
plotter.plot_tests_scores_from_different_folders(out_folders, classifiers_names, "feature selection", "SelectKBest parameters")

## Examine accuracy distrib

In [None]:
out_folder = "out/our_t_maps_3_radius=10_SVC(C=1, random_state=0)_Tcorrectedselecting_100_mutual_info/"

acc = retrieve_cv_metric(out_folder, "accuracy")
for analysis in acc:
    #plt.ylim(0.09, 0.7)
    plt.hist(acc[analysis], bins = 23)
    plt.show()
    plt.close()