# Load Libraries

In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import joblib
import tensorflow as tf
from tensorflow import keras 
from keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import initializers
from tensorflow.keras import regularizers

from keras.layers.core import Dense, Dropout, Activation
import keras.callbacks as callbacks
from keras.utils import np_utils

# Load Data

In [37]:
df_config = pd.read_csv('../data/config.csv')
df_config

Unnamed: 0,Train Label,Raw Data Path,Info Files Path,MFCC Processed Path,Classes Name Path,ID 2 Class Path,Ready to Train Data Path,Indexes Path,Number of Folds,Pipeline Path,Model Path,Hidden Neurons Path,Number of Inits
0,ShipsEar_NN_Classification,../data/shipsEar_AUDIOS,../data/file_info.csv,../data/mfcc_data.csv,../data/class_names.csv,../data/id2classes.csv,../data/train_data.csv,../data/indexes,5,../data/pipelines,../data/models,../data/models/hidden_neurons.pkl,1


In [3]:
df_train = pd.read_csv(df_config['Ready to Train Data Path'][0])

## Fit a preprocessing pipeline for each kFold

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler

index_path = df_config['Indexes Path'][0]
pipe_path = df_config['Pipeline Path'][0]
n_folds =  df_config['Number of Folds'][0]

data = df_train.drop(columns=['target'])

for ifold in range(n_folds):
    index_file = 'kFoldsCV_fold_%i_of_%i_indexes.pkl'%(ifold, n_folds)
    with open(os.path.join(index_path,index_file),'rb') as file_handler:
            [trn_idx,val_idx] = pickle.load(file_handler)
    
    # criando o pipeline
    pipe = Pipeline(steps=[("scaler", MinMaxScaler())])
    pipe.fit(data.loc[trn_idx,:])
    
    pipe_name = 'kFoldsCV_fold_%i_of_%i_pipe.pkl'%(ifold, n_folds)
    with open(os.path.join(pipe_path,pipe_name),'wb') as file_handler:
        joblib.dump(pipe, file_handler)

# Define Model Class

In [4]:
class MLPModel:
    def __init__(self, n_hidden_neurons=2, verbose=2):
        self.n_hidden_neurons = n_hidden_neurons
        self.model = None
        self.trn_history = None
        self.trained = False
        self.verbose = verbose
    def __str__(self):
        m_str = 'Class MLPModel\n'
        if self.trained:
            m_str += 'Model is fitted, '
        else:
            m_str += 'Model is not fitted, '
        m_str += 'instance created with %i hidden neurons'%(self.n_hidden_neurons) 
        return m_str
    def create_model(self, data, target, random_state=0, learning_rate=0.01):
        #tf.random.set_seed(random_state)

        model = tf.keras.Sequential()
        
        # add a input to isolate the input of NN model
        model.add(tf.keras.Input(shape=(data.shape[1],)))
        # add a non-linear single neuron layer
        hidden_layer = layers.Dense(units=self.n_hidden_neurons,
                                    activation='tanh',
                                    kernel_initializer=initializers.RandomNormal(stddev=0.01),
                                    kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4),
                                    bias_regularizer=regularizers.L2(1e-4),
                                    bias_initializer=initializers.Zeros()
                                   )
        model.add(hidden_layer)
        # add a non-linear output layer with max sparse target shape
        output_layer = layers.Dense(units=target.shape[1],
                                    activation='tanh',
                                    kernel_initializer=initializers.RandomNormal(stddev=0.01),
                                    bias_initializer=initializers.Zeros()
                                   )
        model.add(output_layer)
        # creating a optimization function using steepest gradient
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=learning_rate,
                                                                  decay_steps=100,
                                                                  decay_rate=0.9)
        optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

        # compile the model
        loss = keras.losses.CategoricalCrossentropy(from_logits=False,
                                                    label_smoothing=0.0,
                                                    axis=-1,
                                                    reduction="auto",
                                                    name="cat_crossent",)


        cat_cross = keras.losses.BinaryCrossentropy(from_logits=False,
                                                         label_smoothing=0.0,
                                                         axis=-1,
                                                         reduction="auto",
                                                         name="cat_crossent_met",)
        cat_acc_metric = keras.metrics.BinaryAccuracy(name="cat_acc", dtype=None)
        acc_metric = keras.metrics.Accuracy(name="accuracy",dtype=None)
        mse_metric = keras.metrics.MeanSquaredError(name="mse", dtype=None)
        rmse_metric = keras.metrics.RootMeanSquaredError(name="rmse", dtype=None)

        model.compile(loss="mean_squared_error", 
                      optimizer=optimizer,
                      metrics=[acc_metric,mse_metric,rmse_metric])
        return model
    def fit(self, X, Y,
            trn_id=None, 
            val_id=None, 
            epochs=50,
            batch_size=4,
            patience = 100,
            learning_rate=0.01, random_state=0):
        
        X_copy = X.copy()
        Y_copy = Y.copy()
        
        model = self.create_model(X_copy,Y_copy, random_state=random_state, learning_rate=learning_rate)
        
        # early stopping to avoid overtraining
        earlyStopping = callbacks.EarlyStopping(monitor='val_loss', 
                                                patience=patience,verbose=self.verbose, 
                                                mode='auto')
    
        trn_desc = model.fit(X_copy[trn_id,:], Y_copy[trn_id],
                             epochs=epochs,
                             batch_size=batch_size,
                             callbacks=[earlyStopping], 
                             verbose=self.verbose,
                             validation_data=(X_copy[val_id,:],
                                              Y_copy[val_id]),
                            )
        self.model = model
        self.trn_history = trn_desc
        self.trained = True
    def predict(self, data):
        return self.model.predict(data)
    def save(self, file_path):
        with open(file_path,'wb') as file_handler:
            joblib.dump([self.n_hidden_neurons, self.model,
                        self.trn_history, self.trained], file_handler)
    def load(self, file_path):
        with open(file_path,'rb') as file_handler:
            [self.n_hidden_neurons, self.model, self.trn_history, self.trained]= joblib.load(file_handler)
    def model_with_no_output_layer(self):
        buffer_model = tf.keras.Sequential()    
        # add a input to isolate the input of NN model
        buffer_model.add(tf.keras.Input(shape=(model.model.layers[0].get_weights()[0].shape[0],)))
        # add a non-linear single neuron layer
        hidden_layer = layers.Dense(units=model.model.layers[0].get_weights()[1].shape[0],
                                    activation='tanh')
        buffer_model.add(hidden_layer)    
        output_layer = layers.Dense(units=1,activation='tanh')
    
        for idx, layer in enumerate(buffer_model.layers):
            layer.set_weights(model.model.layers[idx].get_weights())
        return buffer_model
    def predict_one_layer_before_output(self, data):
        buffer_model = self.model_with_no_output_layer()
        return buffer_model.predict(data)

    
def write_list_of_hidden_neurons(filename, hidden_neurons):
    with open(filename,'wb') as file_handler:
        pickle.dump([hidden_neurons],file_handler)
    return 0
def get_list_of_hidden_neurons(filename):
    with open(filename,'rb') as file_handler:
        [hidden_neurons] = pickle.load(file_handler)
    return hidden_neurons        

In [40]:
# descomente somente quando for necessário
#hidden_neurons = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
#hidden_neurons = [1]
#write_list_of_hidden_neurons(df_config['Hidden Neurons Path'][0],hidden_neurons)

# Config for kFold training

In [41]:
hidden_neurons = get_list_of_hidden_neurons(df_config['Hidden Neurons Path'][0])
model_path = df_config['Model Path'][0]
n_folds =  df_config['Number of Folds'][0]

print('kFold Files Generation')

buffer = 0*np.ones([n_folds,len(hidden_neurons)])
df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))
df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_acc.csv'%(df_config['Train Label'][0])))
buffer = False*np.ones([n_folds,len(hidden_neurons)])
df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))
df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(df_config['Train Label'][0])))
buffer = False*np.ones([n_folds,len(hidden_neurons)])
df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))
df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(df_config['Train Label'][0])))

kFold Files Generation


In [45]:
# for kFolds CV

from sklearn.metrics import accuracy_score

print('kFold Training Process')

if True: # remove when your are shure to spent time!!!
    index_path = df_config['Indexes Path'][0]
    pipe_path = df_config['Pipeline Path'][0]
    model_path = df_config['Model Path'][0]

    data = df_train.drop(columns=['target']).values
    trgt = df_train['target'].values
    
    df_acc = pd.read_csv(os.path.join(model_path, '%s_kfold_model_acc.csv'%(df_config['Train Label'][0])),index_col=0)
    df_status = pd.read_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(df_config['Train Label'][0])),index_col=0)
    df_status = df_status.astype(bool)
    df_names = pd.read_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(df_config['Train Label'][0])),index_col=0)
    df_names = df_names.astype(str)

    for ifold in range(n_folds):
        print('Training Process %i fold of %i folds'%(ifold, n_folds))
        cv_name = 'kFoldsCV_fold_%i_of_%i_indexes.pkl'%(ifold, n_folds)
        with open(os.path.join(index_path,cv_name),'rb') as file_handler:
            [trn_idx,val_idx] = pickle.load(file_handler)
        pipe_name = 'kFoldsCV_fold_%i_of_%i_pipe.pkl'%(ifold, n_folds)
        with open(os.path.join(pipe_path,pipe_name),'rb') as file_handler:
            pipe = joblib.load(file_handler)
        
        trn_data = pipe.transform(data)
        trn_trgt = tf.keras.utils.to_categorical(trgt, num_classes=len(np.unique(trgt)))
        
        for ineuron in hidden_neurons:
            print('Training for %i neuron in'%(ineuron),hidden_neurons)
            best_acc = -1
            best_init = -1
            if df_status.loc[ifold,str(ineuron)] == False:
                 for iinit in range(df_config['Number of Inits'][0]):
                        print('%s - Training Model with %i neurons - %i init'%(df_config['Train Label'][0], ineuron, iinit))
                        model_name = '%s_kFoldsCV_fold_%i_MLPModel_%i_hidden_neurons_%i_init.pkl'%(df_config['Train Label'][0], 
                                                                                                   ifold, ineuron,
                                                                                                   iinit)
                        if os.path.exists(os.path.join(model_path, model_name)) == False:
                            model = MLPModel(n_hidden_neurons=ineuron,verbose=2)
                            model.fit(trn_data, trn_trgt, trn_id=trn_idx, val_id=val_idx, epochs=5, random_state=iinit)
                            model.save(os.path.join(model_path, model_name))
                        else:
                            model = MLPModel(n_hidden_neurons=ineuron,verbose=0)
                            model.load(os.path.join(model_path, model_name))
                        model_acc = accuracy_score(trn_trgt,np.sign(model.predict(pipe.transform(data))))

kFold Training Process
Training Process 0 fold of 5 folds
Training for 1 neuron in [1]
ShipsEar_NN_Classification - Training Model with 1 neurons - 0 init




Epoch 1/1000
115934/115934 - 148s - loss: 0.0640 - accuracy: 0.0000e+00 - mse: 0.0638 - rmse: 0.2527 - val_loss: 0.0683 - val_accuracy: 0.0000e+00 - val_mse: 0.0681 - val_rmse: 0.2610 - 148s/epoch - 1ms/step
Epoch 2/1000
115934/115934 - 159s - loss: 0.0640 - accuracy: 0.0000e+00 - mse: 0.0638 - rmse: 0.2526 - val_loss: 0.0683 - val_accuracy: 0.0000e+00 - val_mse: 0.0681 - val_rmse: 0.2610 - 159s/epoch - 1ms/step
Epoch 3/1000


KeyboardInterrupt: 

In [31]:
trn_trgt

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

## Create a sklearn pipe por each basic MLP model

In [5]:
from copy import deepcopy

datasets_data = [dev_social, dev_clinical, dev_pos_test, dev_personal]
datasets_name = ['social', 'clinical', 'pos_test', 'personal']
datasets_n_neurons = [1, 1, 1, 1] 
model_path = '../data/models'
n_folds = 5


if True: # remove when your are shure to spent time!!!
    cv_path = '../data/indexes'
    pipe_path = '../data/pipelines'
    model_path = '../data/models'

    for idataset, dataset_name in enumerate(datasets_name): 
        print('Processing %s'%(dataset_name))
        data = datasets_data[idataset]
        print('Dataset shape: %i, %i'%(data.shape[0],data.shape[1]))
        df_acc = pd.read_csv(os.path.join(model_path, '%s_kfold_model_acc.csv'%(dataset_name)),index_col=0)
        df_status = pd.read_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(dataset_name)),index_col=0)
        df_status = df_status.astype(bool)
        df_names = pd.read_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(dataset_name)),index_col=0)
        df_names = df_names.astype(str)

        for ifold in range(n_folds):
            cv_name = 'kFoldsCV_fold_%i_indexes.pkl'%(ifold)
            with open(os.path.join(cv_path,cv_name),'rb') as file_handler:
                [trn_idx,val_idx] = pickle.load(file_handler)
            pipe_name = 'kFoldsCV_fold_%i_pipe_%s.pkl'%(ifold, datasets_name[idataset])
            with open(os.path.join(pipe_path,pipe_name),'rb') as file_handler:
                pipe = joblib.load(file_handler)
            trn_data = pipe.transform(data)
            trn_trgt = dev_target.values
            
            # get the number of neurons choose in analysis notebook and process data
            if df_status.loc[ifold,str(datasets_n_neurons[idataset])] != True:
                print('Train the model')
            else:
                # load model
                print('%s - Loading Model with %i neuron(s): %s'%(dataset_name, datasets_n_neurons[idataset], 
                                                                df_names.loc[ifold, str(datasets_n_neurons[idataset])]))
                model_name = df_names.loc[ifold, str(datasets_n_neurons[idataset])]
                if os.path.exists(os.path.join(model_path, model_name)) == False:
                    print('Please do the training process!!!')
                    continue
                else:
                    model = MLPModel(n_hidden_neurons=datasets_n_neurons[idataset],verbose=0)
                    model.load(os.path.join(model_path, model_name))
                    
                
                # add this model to a pipeline
                # here I found a problem: when I tried to save a new pipeline, thinks start get weird!
                # I realise that I need to load the model and after that append it into the pipeline
                
                new_pipe = deepcopy(pipe)
                new_pipe.steps.append(('nn_%s'%(dataset_name), model.model))
                # for test
                #output_pipe = pipe.predict(data) # error because one of the previous steps has no predict within
                transformed_data = new_pipe[:-1].transform(data)
                output_pipe = new_pipe[-1].predict(transformed_data)
                output_model = model.model.predict(pipe.transform(data))
                print('Error Sum: ',np.sum(output_pipe-output_model))
                #pipe_nn_name = 'kFoldsCV_fold_%i_pipe+basicNN_%s.pkl'%(ifold, datasets_name[idataset])
                #joblib.dump(new_pipe,os.path.join(model_path, pipe_nn_name))
                
                #new_pipe = deepcopy(pipe)
                #new_pipe.steps.append(('nn_%s_no_output'%(dataset_name), model.model_with_no_output_layer()))
                #pipe_nn_name = 'kFoldsCV_fold_%i_pipe+basicNN_no_output_%s.pkl'%(ifold, datasets_name[idataset])
                #joblib.dump(new_pipe,os.path.join(model_path, pipe_nn_name))
                

Processing social
Dataset shape: 54, 13
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_0_MLPModel_1_hidden_neurons_0_init.pkl
Error Sum:  0.0
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_1_MLPModel_1_hidden_neurons_0_init.pkl
Error Sum:  0.0
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_2_MLPModel_1_hidden_neurons_0_init.pkl
Error Sum:  0.0
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_3_MLPModel_1_hidden_neurons_0_init.pkl
Error Sum:  0.0
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_4_MLPModel_1_hidden_neurons_0_init.pkl
Error Sum:  0.0
Processing clinical
Dataset shape: 54, 16
clinical - Loading Model with 1 neuron(s): clinical_kFoldsCV_fold_0_MLPModel_1_hidden_neurons_0_init.pkl
Error Sum:  0.0
clinical - Loading Model with 1 neuron(s): clinical_kFoldsCV_fold_1_MLPModel_1_hidden_neurons_1_init.pkl
Error Sum:  0.0
clinical - Loading Model with 1 neuron(s): clinical_kFoldsCV_fold_2_MLPModel_1_hidden_neuro

## Making model fusion

The fusion process will be using the following struct

1 - Social+Personal => SoPe Step

2 - SoPe + Clinical => SoPeCli

3 - SoPeCli+Pos-Tests

### Social + Personal

In [38]:
hidden_neurons = [1, 2, 3, 4, 5, 6] # first hint!
model_path = '../data/models'
n_folds = 5
datasets_n_neurons = [1, 1, 1, 1] # check the chose in analysis notebook

print('kFold Config Files Generation')
if True: # remove when your are shure to spent time!!!    
    datasets_name = ['social+personal']
    metrics = ['acc', 'sens', 'spec', 'sp']

    buffer = 0*np.ones([n_folds,len(hidden_neurons)])
    df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))

    for idataset, dataset_name in enumerate(datasets_name):
        for imetric in metrics:
            df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_%s.csv'%(dataset_name,imetric)))
    buffer = False*np.ones([n_folds,len(hidden_neurons)])
    df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))
    df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(dataset_name)))
    buffer = False*np.ones([n_folds,len(hidden_neurons)])
    df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))
    df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(dataset_name)))

kFold Config Files Generation


In [39]:
from sklearn.metrics import accuracy_score

print('kFold Training Process')

if True: # remove when your are shure to spent some time!!!
    basic_datasets = [dev_social, dev_personal]
    basic_datasets_names = ['social', 'personal']
    
    for ifold in range(n_folds):
        cv_name = 'kFoldsCV_fold_%i_indexes.pkl'%(ifold)
        fusioned_data = []
        with open(os.path.join(cv_path,cv_name),'rb') as file_handler:
            [trn_idx,val_idx] = pickle.load(file_handler) 
        print('Fold %i - Processing basic datasets'%(ifold))
        for idataset, dataset_name in enumerate(basic_datasets_names): 
            data = basic_datasets[idataset]
            df_acc = pd.read_csv(os.path.join(model_path, '%s_kfold_model_acc.csv'%(dataset_name)),index_col=0)
            df_status = pd.read_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(dataset_name)),index_col=0)
            df_status = df_status.astype(bool)
            df_names = pd.read_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(dataset_name)),index_col=0)
            df_names = df_names.astype(str)

            print('Fold %i - Processing %s'%(ifold, dataset_name))
            pipe_name = 'kFoldsCV_fold_%i_pipe_%s.pkl'%(ifold, dataset_name)
            with open(os.path.join(pipe_path,pipe_name),'rb') as file_handler:
                preproc_pipe = joblib.load(file_handler)
           
            # get the number of neurons choose in analysis notebook and process data
            if df_status.loc[ifold,str(datasets_n_neurons[idataset])] != True:
                print('[ERROR] Train the model')
            else:
                # load model
                print('%s - Loading Model with %i neuron(s): %s'%(dataset_name, datasets_n_neurons[idataset], 
                                                                df_names.loc[ifold, str(datasets_n_neurons[idataset])]))
                model_name = df_names.loc[ifold, str(datasets_n_neurons[idataset])]
                if os.path.exists(os.path.join(model_path, model_name)) == False:
                    print('Please do the training process!!!')
                    continue
                else:
                    model = MLPModel(n_hidden_neurons=datasets_n_neurons[idataset],verbose=0)
                    model.load(os.path.join(model_path, model_name))
            
            nn_pipe = deepcopy(preproc_pipe)
            nn_pipe.steps.append(('nn_%s_no_output'%(dataset_name), model.model_with_no_output_layer()))
            
            if dataset_name == 'social':
                new_data = nn_pipe.predict(data)
            else:
                new_data = nn_pipe[:-1].transform(data)
                new_data = nn_pipe[-1].predict(new_data)
                
            if fusioned_data == []:
                fusioned_data = new_data
            else:
                fusioned_data = np.append(fusioned_data, new_data, axis=1)
                
    # training process using data processed with previous models
    dataset_name = 'social+personal'
    df_acc = pd.read_csv(os.path.join(model_path, '%s_kfold_model_acc.csv'%(dataset_name)),index_col=0)
    df_status = pd.read_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(dataset_name)),index_col=0)
    df_status = df_status.astype(bool)
    df_names = pd.read_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(dataset_name)),index_col=0)
    df_names = df_names.astype(str)

    for ineuron in hidden_neurons:
        best_acc = -1
        best_init = -1
        if df_status.loc[ifold,str(ineuron)] == False:
            for iinit in range(1):
                print('%s - Training Model with %i neurons - %i init'%(dataset_name, ineuron, iinit))
                model_name = '%s_kFoldsCV_fold_%i_MLPModel_%i_hidden_neurons_%i_init.pkl'%(dataset_name, 
                                                                                                    ifold, ineuron,
                                                                                                    iinit)
                if os.path.exists(os.path.join(model_path, model_name)) == False:
                    model = MLPModel(n_hidden_neurons=ineuron,verbose=0)
                    model.fit(fusioned_data, trn_trgt, trn_id=trn_idx, val_id=val_idx, epochs=1000, random_state=iinit)
                    model.save(os.path.join(model_path, model_name))
                else:
                    model = MLPModel(n_hidden_neurons=ineuron,verbose=0)
                    model.load(os.path.join(model_path, model_name))
                model_acc = accuracy_score(trn_trgt,np.sign(model.predict(fusioned_data)))   
                print('Acc (%i hidden neurons, %i init): '%(ineuron,iinit), model_acc)
                if model_acc > best_acc:
                    best_acc = model_acc
                    best_init = iinit
                    df_status.loc[ifold,str(ineuron)] = True
                    df_acc.loc[ifold,str(ineuron)] = model_acc
                    df_names.loc[ifold,str(ineuron)] = model_name
                    df_acc.to_csv(os.path.join(model_path, '%s_kfold_model_acc.csv'%(dataset_name)))
                    df_status.to_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(dataset_name)))
                    df_names.to_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(dataset_name)))
                del model
        else:
            print('Model training: done!')

kFold Training Process
Fold 0 - Processing basic datasets
Fold 0 - Processing social
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_0_MLPModel_1_hidden_neurons_0_init.pkl
Fold 0 - Processing personal
personal - Loading Model with 1 neuron(s): personal_kFoldsCV_fold_0_MLPModel_1_hidden_neurons_2_init.pkl
Fold 1 - Processing basic datasets
Fold 1 - Processing social
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_1_MLPModel_1_hidden_neurons_0_init.pkl


  if fusioned_data == []:


Fold 1 - Processing personal
personal - Loading Model with 1 neuron(s): personal_kFoldsCV_fold_1_MLPModel_1_hidden_neurons_3_init.pkl
Fold 2 - Processing basic datasets
Fold 2 - Processing social
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_2_MLPModel_1_hidden_neurons_0_init.pkl


  if fusioned_data == []:


Fold 2 - Processing personal
personal - Loading Model with 1 neuron(s): personal_kFoldsCV_fold_2_MLPModel_1_hidden_neurons_0_init.pkl
Fold 3 - Processing basic datasets
Fold 3 - Processing social
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_3_MLPModel_1_hidden_neurons_0_init.pkl


  if fusioned_data == []:


Fold 3 - Processing personal
personal - Loading Model with 1 neuron(s): personal_kFoldsCV_fold_3_MLPModel_1_hidden_neurons_1_init.pkl
Fold 4 - Processing basic datasets
Fold 4 - Processing social
social - Loading Model with 1 neuron(s): social_kFoldsCV_fold_4_MLPModel_1_hidden_neurons_0_init.pkl


  if fusioned_data == []:


Fold 4 - Processing personal
personal - Loading Model with 1 neuron(s): personal_kFoldsCV_fold_4_MLPModel_1_hidden_neurons_2_init.pkl
social+personal - Training Model with 1 neurons - 0 init


  if fusioned_data == []:


Acc (1 hidden neurons, 0 init):  0.8888888888888888
social+personal - Training Model with 2 neurons - 0 init
Acc (2 hidden neurons, 0 init):  0.8888888888888888
social+personal - Training Model with 3 neurons - 0 init
Acc (3 hidden neurons, 0 init):  0.8888888888888888
social+personal - Training Model with 4 neurons - 0 init
Acc (4 hidden neurons, 0 init):  0.8888888888888888
social+personal - Training Model with 5 neurons - 0 init
Acc (5 hidden neurons, 0 init):  0.8888888888888888
social+personal - Training Model with 6 neurons - 0 init
Acc (6 hidden neurons, 0 init):  0.8888888888888888


### (Social+Personal) + Clinical

In [40]:
hidden_neurons = [1, 2, 3, 4, 5, 6] # first hint!
model_path = '../data/models'
n_folds = 5
datasets_n_neurons = [1, 1, 1, 1] # check the chose in analysis notebook

print('kFold Config Files Generation')
if True: # remove when your are shure to spent time!!!    
    datasets_name = ['social+personal+clinical']
    metrics = ['acc', 'sens', 'spec', 'sp']

    buffer = 0*np.ones([n_folds,len(hidden_neurons)])
    df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))

    for idataset, dataset_name in enumerate(datasets_name):
        for imetric in metrics:
            df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_%s.csv'%(dataset_name,imetric)))
    buffer = False*np.ones([n_folds,len(hidden_neurons)])
    df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))
    df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_status.csv'%(dataset_name)))
    buffer = False*np.ones([n_folds,len(hidden_neurons)])
    df_buffer = pd.DataFrame(data=buffer, columns=hidden_neurons,index=range(n_folds))
    df_buffer.to_csv(os.path.join(model_path, '%s_kfold_model_names.csv'%(dataset_name)))

kFold Config Files Generation


In [41]:
# training process