In [1]:
import pandas as pd
import numpy as np
import preprocessing as pp
import train_test_sets
import modelling
from tqdm import tqdm
from boruta import BorutaPy
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, f1_score, plot_confusion_matrix
import pickle5 as pickle

In [2]:
mri_subs_path = "/home/brunovieira/Tamires_Experiments/Bases_de_Dados/MRI_Data_Voxels_894.parquet" 
mri_areas_path = "/home/brunovieira/Tamires_Experiments/Bases_de_Dados/MRI_Data_Areas_890.csv" 
bb_path = "/home/brunovieira/Tamires_Experiments/Bases_de_Dados/BigBrain.xlsx"
ids_path = "/home/brunovieira/Tamires_Experiments/Bases_de_Dados/participants_nkienhanced.tsv"

In [3]:
# freesufer data
mri_subs_all = pp.read_parquet(mri_subs_path)

# sample, drop zeros and encode cats
mri_subs = pp.clean_sample(mri_subs_all, sample_size=50)

# quality assessment
bad_participants = pp.find_bad_ones(mri_areas_path)
mri_subs = pp.eliminate_bad_ones(bad_participants, mri_subs)

# add identification features
#mri_subs = pp.ids_features(ids_path, mri_subs)
mri_subs1 = pp.cat2int(mri_subs, cat_features=['sex', 'handedness', 'hemisphere'])

# add bigbrain features
mri_subs = pp.bb_features(bb_path, mri_subs)

Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.


In [4]:
mri_subs.head()

Unnamed: 0,participant,hemisphere,atlasEcono,atlasDF,area,curv,sulc,thickness,age,sex,...,bblayer3_age,bblayer4_age,bblayer5_age,bblayer6_age,ve1_age,ve2_age,ve3_age,ve4_age,ve5_age,ve6_age
8618039,A00028522,0,28,11,0.184097,-0.298011,-8.81799,1.157043,84.0,1,...,22.724636,11.362318,15.42029,17.449276,7.826087,9.391305,23.47826,9.391305,11.47826,22.434784
8618040,A00028522,0,28,11,0.150245,-0.313457,-8.855257,1.158602,84.0,1,...,22.724636,11.362318,15.42029,17.449276,7.826087,9.391305,23.47826,9.391305,11.47826,22.434784
8618041,A00028522,0,28,11,0.25101,-0.312042,-8.447415,1.163185,84.0,1,...,22.724636,11.362318,15.42029,17.449276,7.826087,9.391305,23.47826,9.391305,11.47826,22.434784
8618042,A00028522,0,28,11,0.167873,-0.304944,-8.802314,1.181776,84.0,1,...,22.724636,11.362318,15.42029,17.449276,7.826087,9.391305,23.47826,9.391305,11.47826,22.434784
8618043,A00028522,0,28,11,0.145979,-0.315183,-8.659517,1.167591,84.0,1,...,22.724636,11.362318,15.42029,17.449276,7.826087,9.391305,23.47826,9.391305,11.47826,22.434784


In [5]:
# prepare sets to models train and test
Xy_sets = train_test_sets.separate(mri_subs) 

# retorna 4 elementos, 2 conjuntos de treino e teste, um a nivel de vertice e outro a nivel de estrutura cortical
Xy_train_vo, Xy_test_vo, Xy_train_gr, Xy_test_gr = Xy_sets

# lista de variaveis que temos nas bases
#Xy_train_vo.columns#, Xy_train_gr.columns

In [6]:
# experiment 1
m = modelling.Model(Xy_sets,
                     path_images_outputs = '/home/brunovieira/Tamires_Experiments/Outputs/exp0/',
                     features = ['sex', 'handedness', 'hemisphere',
                                 'age','area', 'sulc', 'curv',
                                 'bigbrain_layer_1', 'bigbrain_layer_2','bigbrain_layer_3', 
                                 'bigbrain_layer_4', 'bigbrain_layer_5','bigbrain_layer_6'
                                 ], 
                     hyperparameter_search_size = 2,
                     voxel = False,
                     structure_modeling = False,
                     structure_evaluation = False #so faz sentido para voxel=True
                    )

KeyboardInterrupt: 

In [5]:
#m.test('vizualization')

In [6]:
#m.explicability()

In [7]:
m.set_resume()

{'Target ': 'thickness',
 'Features': 'sex, handedness, hemisphere, age, area, sulc, curv, bigbrain_layer_1, bigbrain_layer_2, bigbrain_layer_3, bigbrain_layer_4, bigbrain_layer_5, bigbrain_layer_6',
 'Algoritmo': 'LGBMClassifier',
 'Hyperparameters': {'learning_rate': 0.005,
  'max_depth': 10,
  'min_child_samples': 10,
  'n_estimators': 700,
  'num_leaves': 50,
  'reg_alpha': 0,
  'reg_lambda': 0,
  'subsample_for_bin': 20000},
 'Base de Dados instancias': 3720,
 'Base de Dados participantes': 48.94736842105263,
 'Model Type': 'Estruturas Corticais',
 'structure_modeling': False,
 'structure_evaluation': False}