In [None]:
# imports
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score, precision_score, f1_score, recall_score
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.python.keras.utils.vis_utils import plot_model
from tensorflow.python.keras.layers import Dense, Dropout
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import Sequential
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras import metrics
import pandas as pd

# import custom modules
import sys
sys.path.insert(1, '../utils/')
import model_utils

In [None]:
df = pd.read_csv("../../pre_processing/datasets/no_filtered_analysis.csv", sep=",")

In [None]:
# Drop RA column if exists
if "RA" in df.columns:
    print("Removing column RA")
    df.drop('RA', inplace=True, axis=1)

In [None]:
df.head()

In [None]:
# Configurate neural network features (layers, layer density, etc)
def create_model(optimizer="adam", dropout=0.2, init='uniform', input_dim=61):
    model = Sequential()
    model.add(Dense(units=8, input_dim=input_dim, activation='relu'))
    model.add(Dropout(rate = dropout))
    model.add(Dense(units=8, input_dim=input_dim, activation='relu'))
    model.add(Dropout(rate = dropout))
    model.add(Dense(units=2, activation='softmax'))

    adam = tf.keras.optimizers.Adam(lr=0.01)
    model.compile(loss='categorical_crossentropy', optimizer = adam, metrics=[
        "acc",
        "mse",
        metrics.Precision(),
        metrics.Recall(),
    ])

    return model

In [None]:
def run_prediction (X_train, X_test, y_train, y_test):
    
    # used to evaluate best model params
    gridsearch_metrics = {
        'REC':'recall', 
        'AUC':'roc_auc', 
        'Accuracy': 'accuracy'
    }
    
    # define the grid search parameters
    param_grid = {
        'kc__epochs': [10, 50, 100],
        #'kc__init': [ 'uniform', 'zeros', 'normal' ],
        #'kc__batch_size':[2, 16, 32],
        #'kc__optimizer':['RMSprop', 'Adam', 'Adamax', 'sgd'],
        'kc__dropout': [0.3, 0.2, 0.1, 0]
    }
    
    kears_estimator = KerasClassifier(build_fn=create_model, verbose=2)
    estimator = Pipeline([('ss', MinMaxScaler()),
                       ("kc", kears_estimator)])
    
    clf = GridSearchCV(estimator=estimator,  
                    n_jobs= 10, 
                    verbose= 2,
                    return_train_score=True,
                    param_grid = param_grid,
                    #scoring = gridsearch_metrics,
                    refit = 'Accuracy')
    
    clf.fit(X_train, y_train)
    predictions = clf.predict(X_test)

    predict_data = {
        "confusion_matrix": confusion_matrix(y_test, predictions),
        "classification_report": classification_report(y_test, predictions),
        "model_accuracy": accuracy_score(y_test, predictions),
        "f1_score": f1_score(y_test, predictions),
        "precision_score": precision_score(y_test, predictions),
        "roc_score": roc_auc_score(y_test, predictions),
        "recall_score": recall_score(y_test, predictions),
        "epoch_params": clf.best_params_
    }
    
    return predict_data

In [None]:
def main():
    X = df.iloc[:, df.columns != "EVADIDO"].values
    y = df["EVADIDO"]

    skf = StratifiedKFold(n_splits = 4)
    skf.get_n_splits(X, y)
    scaler = MinMaxScaler()
    print(f"StratifiedKFold config: {skf} \n")
    all_predictions = []
    for train_index, test_index in skf.split(X, y):

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        # return best k element
        all_predictions.append(run_prediction(X_train, X_test, y_train, y_test))
        
    # General model outputs
    model_utils.generate_output(all_predictions)

    # General model outputs .csv
    model_utils.generate_output_csv(all_predictions)

    print("############################ FINISHED ############################")

In [None]:
if __name__ == "__main__":
    main()