In [None]:
#import tensorboard
#%load_ext tensorboard
#%tensorboard --logdir 'logs/'
import datetime
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_score
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import KFold
# Se importan librerías para graficar.
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import backend as K
import kerastuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Activation, Input, Dropout
from keras import regularizers
from keras.callbacks import ModelCheckpoint
from tensorflow.keras import optimizers
from sklearn.preprocessing import PolynomialFeatures
from keras.regularizers import l2, l1

In [None]:
df = pd.read_csv('diabetes.csv')

In [None]:
df.describe()

In [None]:
healthy_people_num = (df['Outcome'] == 0).sum()
sick_people_num = (df['Outcome'] != 0).sum()
total = df.shape[0]
print("Healthy people: " + str(healthy_people_num))
print("Sick people: " + str(sick_people_num))
print("Total: " + str(total))

In [None]:
sns.set(style="whitegrid")
labels = ['No Diabéticos', 'Diabétos']
sizes = [healthy_people_num,sick_people_num]
colors = ["green","red"]
plt.figure(figsize=(7,7))
plt.pie(sizes, labels=labels, explode= (0.01,0) , colors=colors, autopct='%1.1f%%', shadow=True, startangle=90,)

plt.title('Porcentaje de diabéticos.')
plt.show()

In [None]:
df2 = df.copy()
df2[['Glucose','BloodPressure','SkinThickness','Insulin','BMI']] = df2[['Glucose','BloodPressure','SkinThickness','Insulin','BMI']].replace(0,np.NaN)

In [None]:
f, ax = plt.subplots(figsize=(20, 10))
plt.ylabel('Variables')
plt.title("Boxplots")
ax = sns.boxplot(data = df2, 
  orient = 'h', 
  palette = 'Set2')

In [None]:
sns.set_theme(style="whitegrid")
ax = sns.barplot(x=df2.columns, y=df2.isnull().sum())
plt.xticks(rotation=45);
for p in ax.patches:
    ax.annotate(format(100*p.get_height()/df.shape[0], '.1f') + "%", 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   xytext = (0, 10), 
                   textcoords = 'offset points')

In [None]:
correlations = df2.corr()
correlations['Outcome'].sort_values(ascending=False)

In [None]:
df = df.drop(['Insulin'], axis=1)
df.head()

In [None]:
 def separate_data_and_labels(df):
        data = df.copy()
        y_values = data[data.columns[-1]].values.reshape(data.shape[0], 1)
        data = data.drop([data.columns[-1]], axis=1)
        return data, y_values

In [None]:
np.random.seed(0)
msk = np.random.rand(len(df)) < 0.8
train_val_df = df[msk]
testData = df[~msk]

train_val_data , y_train_val = separate_data_and_labels(train_val_df)
x_test_data, y_test_values = separate_data_and_labels(testData)

In [None]:
def specificity(y_true, y_pred):
    tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
    fp = K.sum(K.round(K.clip((1 - y_true) * y_pred, 0, 1)))
    return tn / (tn + fp + K.epsilon())


def negative_predictive_value(y_true, y_pred):
    tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
    fn = K.sum(K.round(K.clip(y_true * (1 - y_pred), 0, 1)))
    return tn / (tn + fn + K.epsilon())

def sensitivity(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    sens_keras = true_positives / (possible_positives + K.epsilon())
    return sens_keras

def positive_predictive_value(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    ppv_keras = true_positives / (predicted_positives + K.epsilon())
    return ppv_keras

In [None]:
def plot_loss(history):
    # Plot the training loss.
    plt.plot(history.history['loss'], 'r-')
    # Plot the validation loss.
    plt.plot(history.history['val_loss'], 'b-')
    # X-axis label.
    plt.xlabel('Epochs')
    # Y-axis label.
    plt.ylabel('Cost')
    # Graph legend.
    plt.legend(["Training loss", "Validation loss"])
    # Graph title.
    plt.title('Loss Graph')
    plt.show()

In [None]:
class DiabetesPredictor:
    def __init__(self, name):
        """DiabetesPredictor

    This is a class contains the most part of the methods needed for the diabetes predictor,
    first get the data of the csv file and then perform some methods to clean the data insid
    and allows you to choose if it has to replace outliers or not and replace nulls values or not.

    """
        self.name = name

    def fit(self, train_df, y_train, val_df, y_val, replaceOutliers, replaceNulls, nullColumns, outliersColumnsMap, columnsToRemove, polyFeatDeg, binsDiscretizer, earlyStop, dropOut, regu, batchNormalization, learning_rate, momentum, decay, multilayer, layerUnits):
        train_dataframe = train_df.copy()

        self.columnsToRemove = columnsToRemove.copy()
        self.nullCols = nullColumns.copy()
        self.replaceNulls = replaceNulls
        self.replaceOutliers = replaceOutliers
        self.polyFeatDeg = polyFeatDeg
        self.binsDiscretizer = binsDiscretizer
        self.earlyStop = earlyStop
        self.dropOut = dropOut
        self.regu = regu
        self.batchNormalization = batchNormalization
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.decay = decay
        self.multilayer = multilayer
        self.layerUnits = layerUnits

        self.replace_values_nulls = []
        self.replace_values_outliers = []
        self.outliersLimits = []
        self.history = None

        self.nullCols = [n for n in self.nullCols if n not in self.columnsToRemove]
        self.outlierCols = {}
        for k in outliersColumnsMap:
            if k not in self.columnsToRemove:
                self.outlierCols[k] = outliersColumnsMap[k].copy()

        train_dataframe = self.__preprocess_data__(train_dataframe, training=True)

        x_train_values = train_dataframe.values
        y_train_values = y_train.copy()

        self.input_shape = x_train_values.shape

        x_val_df = val_df.copy()
        x_val_df = self.__preprocess_data__(x_val_df)
        x_val_values = x_val_df.values
        y_val_values = y_val.copy()

        self.model = self.__train_model__(x_train_values, y_train_values, x_val_values,y_val_values)

        self.auc, self.spe, self.sen, self.ppv, self.npv, self.fpr, self.tpr = self.evaluate(x_val_df, y_val_values, testing = False)

        self.aucTrain, self.speTrain, self.senTrain, self.ppvTrain, self.npvTrain, self.fprTrain, self.tprTrain = self.evaluate(train_dataframe, y_train_values, testing = False)
        
    def __preprocess_data__(self, data, training = False):
        df = data.copy()
        df = self.__remove_columns__(df)

        if self.replaceNulls:
            df[self.nullCols] = df[self.nullCols].replace(0,np.NaN)

        if training:
            self.replace_values_outliers = self.__get_cols_median__(df)
            self.outliersLimits = self.__get_outliers_limits__(df)

        if(self.replaceOutliers):
            df = self.__replace_outliers__(df)

        if training:
            self.replace_values_nulls = self.__get_cols_median__(df)

        if(self.replaceNulls):
            df = self.__replace_nulls__(df)

        if (self.polyFeatDeg > 0):
            poly = PolynomialFeatures(degree=self.polyFeatDeg)
            polyArray = poly.fit_transform(df)
            c = poly.get_feature_names(df.columns)
            df = pd.DataFrame(polyArray, columns = c)

        if (self.binsDiscretizer > 1):
            disc = KBinsDiscretizer(n_bins=self.binsDiscretizer, encode='ordinal', strategy='uniform')
            df = disc.fit_transform(df)

        if training:
            self.mean_cols = self.__get_cols_mean__(df)
            self.std_cols = self.__get_cols_std__(df)

        df = self.__normalize_data__(df)

        return df

    def __get_cols_mean__(self, data):
        meandf = data.mean(axis = 0)
        meandf.columns = data.columns
        return meandf

    def __get_cols_std__(self, data):
        stddf= data.std(axis = 0)
        stddf.columns = data.columns
        return stddf

    def __get_cols_median__(self, data):
        mediandf = data.median(axis = 0)
        mediandf.columns = data.columns
        return mediandf

    def __get_outliers_limits__(self, data):
        df = pd.DataFrame(np.zeros((1,len(data.columns))), columns=data.columns)
        df = df.astype('object')
        for col in data.columns:
            col_min = 0
            col_max = np.Infinity
            if (col in self.outlierCols):
                #Cálculo de parámetros para obtener los outliers.
                q1 = data[col].quantile(0.25)
                q3 = data[col].quantile(0.75)
                iqr = q3-q1
                #Límites de los valores tipicos.
                lower_tail = q1 - 1.5 * iqr
                upper_tail = q3 + 1.5 * iqr

                col_min = min(lower_tail, self.outlierCols[col][0])
                col_max = max(upper_tail,self.outlierCols[col][1])
            lims = [col_min, col_max]
            df[col][0] = lims
        return df

    def __replace_outliers__(self, dataframe):
        #Para cada columna (excepto para el Outcome) se reemplazan los outliers por su mediana.
        df = dataframe.copy()
        for col in df.columns:
            out_min = self.outliersLimits[col][0][0]
            out_max = self.outliersLimits[col][0][1]
            for i in df[col]:
                if i > out_max or i < out_min:
                    df[col] = df[col].replace(i, self.replace_values_outliers[col])
        return df

    def __replace_nulls__(self, dataframe):
        df = dataframe.copy()
        for col in self.nullCols:
            df[col]=df[col].replace(np.NaN, self.replace_values_nulls[col])
        return df

    def __normalize_data__(self, dataframe):
        # Mean, columnar axis.
        df = dataframe.copy()
        for col in df.columns:
            if (self.std_cols[col] != 0):
                df[col] = (df[col] - self.mean_cols[col]) / self.std_cols[col]
            else:
                df[col] = 0
        return df

    def __remove_columns__(self, dataframe):
        df = dataframe.copy()
        df = df.drop(self.columnsToRemove, axis=1)
        return df

    def __model_builder__(self):
        model = Sequential()
        initializer = tf.keras.initializers.GlorotNormal(seed=7)
        if (self.multilayer):
            model.add(Dense(self.layerUnits[0], kernel_initializer=initializer, bias_initializer=initializer, input_shape=(self.input_shape[1],), activation='relu'))
            model.add(Dense(self.layerUnits[1], kernel_initializer=initializer, bias_initializer=initializer, activation='relu'))
            model.add(Dense(1, kernel_initializer=initializer, bias_initializer=initializer,activation='sigmoid'))
        else:
            if(self.dropOut):
                model.add(Dropout(0.5,input_shape=(self.input_shape[1],)))
            elif(self.batchNormalization):
                model.add(BatchNormalization(input_shape=(self.input_shape[1],)))
            
            if(self.regu == 'l1' or self.regu == 'l2'):
                reg = reg_wrapper(self.regu, 0.1)
                model.add(Dense(1, kernel_initializer=initializer,bias_initializer=initializer,kernel_regularizer=reg, activation='sigmoid'))
            else:
                model.add(Dense(1, kernel_initializer=initializer, bias_initializer=initializer,activation='sigmoid'))

        learning_rate = 1e-3
        momentum = 0.99
        decay = 1e-4

        model.compile(optimizer=optimizers.SGD(learning_rate=learning_rate, momentum=momentum, decay = decay),
                        loss=keras.losses.BinaryCrossentropy(),
                        metrics=[tf.keras.metrics.AUC()])
        return model

    def __train_model__(self, x_train, y_train, x_val, y_val):        
        stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

        model = self.__model_builder__()

        ckpt_model = 'saved_models/'+ self.name
        checkpoint = ModelCheckpoint(ckpt_model, 
                            monitor='val_loss',
                            verbose=0,
                            save_best_only=True,
                            mode='min')

        cbks = [checkpoint]
        if (self.earlyStop):
            cbks.append(stop_early)
        
        history = model.fit(x_train, y_train, epochs=125, validation_data = (x_val, y_val), verbose=0, callbacks = cbks)

        self.history = history
        # Plot Loss
        plot_loss(history)        

        # Return trained model
        return model

    def evaluate(self, x_val_df, y_val, testing=True):
        x_df = x_val_df.copy()
        predictions, rounded_preds = self.predict(x_df, testing=testing)

        fpr_keras, tpr_keras, thresholds_keras = roc_curve(y_val, predictions)
        auc_keras = auc(fpr_keras, tpr_keras)
        spe = specificity(y_val.astype(float),  rounded_preds.astype(float))
        sen = sensitivity(y_val.astype(float),  rounded_preds.astype(float))
        ppv = positive_predictive_value(y_val.astype(float), rounded_preds.astype(float))
        npv = negative_predictive_value(y_val.astype(float), rounded_preds.astype(float))

        if (testing):
            self.plot_roc(fpr_keras, tpr_keras, auc_keras)
            print('AUC ' + str(auc_keras))
            print('Specificity: ' + str(spe))
            print('Sensitivity: ' + str(sen))
            print('Positive Predictive Value: ' + str(ppv))
            print('Negative Predictive Value: ' + str(npv))

        return auc_keras, spe, sen, ppv, npv, fpr_keras, tpr_keras

    def predict(self, x_val_df, testing=True):
        x_df = x_val_df.copy()
        if (testing):
            x_df = self.__preprocess_data__(x_df)
        predictions = self.model(x_df.values)
        rounded_preds = np.rint(predictions)
        return predictions, rounded_preds

    def plot_roc_validation(self):
        self.plot_roc(self.fpr, self.tpr, self.auc)

    def plot_roc(self, fpr, tpr, auc):
        plt.figure(1)
        plt.plot([0, 1], [0, 1], 'k--')
        plt.plot(fpr, tpr, label=' (Area = {:.3f})'.format(auc))
        plt.xlabel('False positive rate')
        plt.ylabel('True positive rate')
        plt.title('ROC curve ' + self.name)
        plt.legend(loc='best')
        plt.show()

In [None]:
def reg_wrapper(type, value):
    if type == 'l2':
        return regularizers.l2(value)
    if type == 'l1':
        return regularizers.l1(value)

In [None]:
kf = KFold(n_splits = 5)

def cross_val(name, replaceOutliers=False, replaceNulls=False, nullColumns=[], outliersColumnsMap={}, columnsToRemove=[], polyFeatDeg = -1, binsDiscretizer = -1, earlyStop = False, dropOut = False, regu = '', batchNormalization = False, learning_rate = 1e-5, momentum= 0.99, decay = 1e-4, multilayer = False, layerUnits = []):
    folds = kf.split(train_val_data)
    bestModel = None
    aucs = []
    spes = []
    sens = []
    ppvs = []
    npvs = []
    aucsTrain = []
    i = 0
    for train_index, val_index in folds:
        print('Fold ' + str(i))
        newModel = DiabetesPredictor(name + str(i))
        i += 1
        X_train, X_val = train_val_data.iloc[train_index], train_val_data.iloc[val_index]
        Y_train, Y_val = y_train_val[train_index], y_train_val[val_index]
        newModel.fit(X_train, Y_train, X_val, Y_val, replaceOutliers=replaceOutliers, replaceNulls=replaceNulls, nullColumns = nullColumns, outliersColumnsMap=outliersColumnsMap, columnsToRemove=columnsToRemove, polyFeatDeg=polyFeatDeg, binsDiscretizer=binsDiscretizer, earlyStop=earlyStop, dropOut=dropOut, regu=regu, batchNormalization=batchNormalization, learning_rate = learning_rate, momentum= momentum, decay = decay, multilayer = multilayer, layerUnits = layerUnits)
        aucs.append(newModel.auc)
        spes.append(newModel.spe)
        sens.append(newModel.sen)
        ppvs.append(newModel.ppv)
        npvs.append(newModel.npv)
        aucsTrain.append(newModel.aucTrain)
        if (bestModel==None or newModel.auc > bestModel.auc):
            bestModel = newModel
    
    aucTrainMean = np.mean(np.array(aucsTrain))
    aucMean = np.mean(np.array(aucs))
    speMean = np.mean(np.array(spes))
    senMean = np.mean(np.array(sens))
    ppvMean = np.mean(np.array(ppvs))
    npvMean = np.mean(np.array(npvs))

    print('AUC Val: ' + str(aucMean))
    print('AUC Train: ' + str(aucTrainMean))
    print('Specificity: ' + str(speMean))
    print('Sensitivity: ' + str(senMean))
    print('Positive Predictive Value: ' + str(ppvMean))
    print('Negative Predictive Value: ' + str(npvMean))

    
    return bestModel, aucMean, speMean, senMean, ppvMean, npvMean, aucTrainMean

### Sin limpiar datos

In [None]:
simplePred, aucPred, spePred, senPred, ppvPred, npvPred, aucTrainPred = cross_val('predictor')

## Limpiando Datos
#### Reemplazando Nulls por la mediana

In [None]:
repNullPred, aucrepNull, sperepNull, senrepNull, ppvrepNull, npvrepNull, aucTrainRepNull = cross_val('rep_nulls_predictor',replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'])

#### Reemplazando Outliers

In [None]:
cross_val('rep_outliers_predictor', replaceOutliers=True, outliersColumnsMap={'BMI': [18.5, 50],'BloodPressure':[40,120],'SkinThickness': [0, np.Infinity], 'Pregnancies': [0, np.Infinity]})

#### Reemplazando Outliers y nulls

In [None]:
cross_val('rep_outliers_nulls_predictor',replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], replaceOutliers=True, outliersColumnsMap={'BMI': [18.5, 50],'BloodPressure':[40,120],'SkinThickness': [0, np.Infinity], 'Pregnancies': [0, np.Infinity]})

Comparando los resultados anteriores:
- AUC reemplazando nulls: 0.849
- AUC reemplazando outliers y nulls: 0.847
- AUC reemplazando outliers: 0.840

Dado que el valor más alto es reemplazando nulls, continuamos el análisis con este parámetro.

#### Removiendo Columnas

In [None]:
bestAuc = aucrepNull
tryRemovingCols = ['BloodPressure', 'Age', 'DiabetesPedigreeFunction', 'Pregnancies', 'SkinThickness']
removeCols = []
removeColsAuc = []
nullCols = ['Glucose','BloodPressure','SkinThickness','BMI']	
		
for col in tryRemovingCols:
	print(col)	
	remColPred, aucremCol, speremCol, senremCol, ppvremCol, npvremCol, aucTrainRemCol = cross_val('rem_col_predictor_' + col ,replaceNulls=True, nullColumns=nullCols, columnsToRemove=[col])
	if (aucremCol > bestAuc):
		removeCols.append(col)
		removeColsAuc.append(aucremCol)

In [None]:
print(removeCols)
print(removeColsAuc)

#### Eliminando todas las columnas que superaron el valor anterior.

In [None]:
cross_val('rem_cols_predictor' ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=removeCols)

AUC:
- Eliminando Age: 0.85
- Eliminando 'Age', 'SkinThickness': 0.849
- Eliminando SkinThickness: 0.849

Procedemos eliminando la columna de Age

In [None]:
aucPolys = []
for i in range(1,5):
    print(i)
    polyPred, aucPoly, spePoly, senPoly, ppvPoly, npvPoly, aucTrainPoly = cross_val('featPol' + str(i) + '_' ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=i)
    aucPolys.append(aucPoly)

bestPolDeg = aucPolys.index(max(aucPolys)) + 1

In [None]:
print(aucPolys)

In [None]:
print('Best Degree: ' + str(bestPolDeg) + " --- AUC: " + str(max(aucPolys)))

#### Vuelvo a probar con EarlyStopping (había overfitting)

In [None]:
aucEarlyPolys = []
for i in range(1,5):
    print(i)
    polyPred, aucPoly, spePoly, senPoly, ppvPoly, npvPoly, aucTrainPoly = cross_val('featPolEarly' + str(i) + '_' ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=i, earlyStop=True)
    aucEarlyPolys.append(aucPoly)

bestPolDeg = aucEarlyPolys.index(max(aucEarlyPolys)) + 1

In [None]:
print('Best Degree: ' + str(bestPolDeg) + " --- AUC: " + str(max(aucPolys)))

#### Con reg L1

In [None]:
aucReguPolys = []
for i in range(1,6):
    print(i)
    polyPred, aucPoly, spePoly, senPoly, ppvPoly, npvPoly, aucTrainPoly = cross_val('featPolEarly' + str(i) + '_' ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=i, regu='l1')
    aucReguPolys.append(aucPoly)

bestPolReguL1Deg = aucReguPolys.index(max(aucReguPolys)) + 1

In [None]:
print('Best Degree: ' + str(bestPolReguL1Deg) + " --- AUC: " + str(max(aucPolys)))

#### Con reg L2

In [None]:
aucReguPolys = []
for i in range(1,6):
    print(i)
    polyPred, aucPoly, spePoly, senPoly, ppvPoly, npvPoly, aucTrainPoly = cross_val('featPolEarly' + str(i) + '_' ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=i, regu='l2')
    aucReguPolys.append(aucPoly)

bestPolReguL2Deg = aucReguPolys.index(max(aucReguPolys)) + 1

In [None]:
print('Best Degree: ' + str(bestPolReguL2Deg) + " --- AUC: " + str(max(aucPolys)))

#### Con DropOut

In [None]:
cross_val('dropOut' ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=4, dropOut=True)

#### Con Batch Normalization

In [None]:
cross_val("batch",replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'])

Entre EarlyStopping, DropOut y regularizadores, los mejores resultados se obtuvieron con regularizadores.

Aunque no se obtuvieron mejoras utilizando featuresPolinomiales y regularización, estos se utilizarán en los próximos análisis a fin de evitar overfitting.

In [None]:
cross_val('l2' ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'],polyFeatDeg=3, regu="l2")

### Variación del learning rate

In [None]:
aucLrPolys = []
lrs = [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1]
for lr in lrs:
    print(lr)
    lrPred, aucLr, speLr, senLr, ppvLr, npvLr, aucTrainLr = cross_val('lr'+str(lr) ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'],polyFeatDeg=3, regu="l2", learning_rate=lr)
    aucLrPolys.append(aucLr)

bestLr = lrs[aucLrPolys.index(max(aucLrPolys))]

In [None]:
print('Best LR: ' + str(bestLr) + " --- AUC: " + str(max(aucLrPolys)))

### Variando momentum

In [None]:
aucsMom = []
moms = [0.9, 0.99, 0.999, 0.9999]
for mom in moms:
    print(mom)
    momPred, aucMom, speMom, senMom, ppvMom, npvMom, aucTrainMom = cross_val('lr'+str(lr) ,replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=3, regu="l2", learning_rate=1e-5, momentum=mom)
    aucsMom.append(aucMom)

bestMom = moms[aucsMom.index(max(aucsMom))]

In [None]:
print('Best Mom: ' + str(bestMom) + " --- AUC: " + str(max(aucsMom)))

### Variando learning rate decay

In [None]:
aucsDecays = []
decays = [1e-3, 1e-4, 1e-5]
for dec in decays:
    print(dec)
    decPred, aucDec, speDec, senDec, ppvDec, npvDec, aucTrainDec = cross_val("dec"+str(dec),replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=3, regu="l2", learning_rate=1e-5, momentum=0.999, decay=dec)
    aucsDecays.append(aucDec)

bestDec = decays[aucsDecays.index(max(aucsDecays))]

In [None]:
print('Best Decay: ' + str(bestDec) + " --- AUC: " + str(max(aucsDecays)))

### Multilayer

In [None]:
cross_val('multi',replaceNulls=True, nullColumns=['Glucose','BloodPressure','SkinThickness','BMI'], columnsToRemove=['Age'], polyFeatDeg=3, learning_rate=1, momentum=0.9, decay=1e-5, multilayer=True, earlyStop = True)