In [None]:
## IMPORTS
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, \
                            roc_auc_score, precision_score, f1_score, recall_score, \
                            mean_squared_error, mean_absolute_error, max_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from ast import literal_eval as make_tuple
from sklearn.pipeline import Pipeline
from datetime import timedelta
from sklearn.svm import SVC
from time import time
import pandas as pd
import numpy as np
import sys

In [None]:
sys.path.insert(1, "../../utils/")
import model_utils

results_dir = "./"
best_results_path = "../grid_search_results/svc_params.csv"
best_results = pd.read_csv(best_results_path)

In [None]:
# get best params according to grid_search best params
top3_best_params = best_results.nlargest(3,['model_accuracy', 'f1_score', 'roc_score'])
top3_best_params.drop_duplicates(["epoch_best_params"], inplace = True)

# list top 3 results in which epoch_best_params are different
top3_best_params.head(3)
#print(top3_best_params["epoch_best_params"].values)

In [None]:
all_predictions = []
for row in top3_best_params.iterrows():
    # load correct database in which best results were saved
    dbname = row[1]["database"]
    dataframe_path = f"../../pre_processing/datasets/preprocessed_data/{dbname}_filtered_analysis.csv"
    df = pd.read_csv(dataframe_path, sep = ",")

    # Drop RA column if exists
    if "RA" in df.columns:
        print("Removing column RA")
        df.drop('RA', inplace=True, axis=1)

    X = df.iloc[:, df.columns != "EVADIDO"].values
    y = df["EVADIDO"]

    input_dim = len(df.columns)
    input_dim = input_dim - 1
    n_splits = row[1]["n_splits"]
    best_param = row[1][-1] # row with best params
    best_param_tuple = make_tuple(best_param)
    
    kernel = best_param_tuple['clf__kernel']
    c = best_param_tuple['clf__C']
    gamma = best_param_tuple['clf__gamma']
    decision_function_shape = best_param_tuple['clf__decision_function_shape']
    shrinking = best_param_tuple['clf__shrinking']
    max_iter = best_param_tuple['clf__max_iter']

    # Apply best params to model to check accuracy
    clf = SVC(
        kernel = kernel,
        C = c,
        gamma = gamma,
        decision_function_shape =  decision_function_shape,
        shrinking = shrinking,
        max_iter = max_iter
    )

    estimator = Pipeline([("scl", StandardScaler()),
                          ("clf", clf)])

    skf = StratifiedKFold(n_splits = n_splits, shuffle = True)
    for i in range(1, 11):
        print(f"{i}'st run")
        for index, (train_index, test_index) in enumerate(skf.split(X, y)):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            #print(f"Params: {best_param}")
            start_time = time()        
            estimator.fit(X_train, y_train)
            predictions = estimator.predict(X_test)
            end_time = time()
            total_time = timedelta(seconds=end_time-start_time)

            predict_data = {
                "database": dbname,
                "n_splits": n_splits,
                "confusion_matrix": confusion_matrix(y_test, predictions, labels=np.unique(predictions)),
                "classification_report": classification_report(y_test, predictions),
                "model_accuracy": accuracy_score(y_test, predictions),
                "f1_score": f1_score(y_test, predictions, labels=np.unique(predictions)),
                "precision_score": precision_score(y_test, predictions),
                "roc_score": roc_auc_score(y_test, predictions),
                "recall_score": recall_score(y_test, predictions),
                "epoch_params": best_param,
                "max_error": max_error(y_test, predictions),
                "mean_squared_error": mean_squared_error(y_test, predictions),
                "mean_absolute_error": mean_absolute_error(y_test, predictions),
                "train_test_duration": total_time,
                "train_size": "uniform",
                "train_size": len(X_train),
                "test_size": len(X_test)
            }

            all_predictions.append(predict_data)

# CSV Output results
model_utils.generate_output_csv(all_predictions, results_dir, "svc_results")