# ACP Project - Systematic Model Comparison

In [None]:
SCRIPT = False


In [None]:
import warnings, pickle, os, itertools
from dataclasses import dataclass
from joblib import Parallel, delayed, parallel_backend

try:
    from sklearnex import patch_sklearn
    patch_sklearn()
except ImportError:
    pass

import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 300)

from IPython.display import display
import matplotlib.pyplot as plt

import seaborn as sns
sns.set(rc={'figure.figsize':(10,10)})

import shap
import optuna, sqlalchemy
optuna.logging.set_verbosity(optuna.logging.WARNING)

%load_ext autoreload
%autoreload 1

In [None]:
from utils.evaluation import get_metrics, get_threshold_fpr
%aimport utils.evaluation

In [None]:
from systematic_comparison import  *
from models import * 
%aimport systematic_comparison, models

In [None]:
from sklearn.model_selection import train_test_split
from dataset import SCIData, SCICols
%aimport dataset

sci = SCIData.load('data/sci.h5')

scii = (
    SCIData(SCIData.quickload("data/sci_processed.h5").sort_values("AdmissionDateTime"))
    .mandate(SCICols.news_data_raw)
    .derive_critical_event(within=1, return_subcols=True)
    .augment_shmi(onehot=True)
    .omit_redundant()
    .derive_ae_diagnosis_stems(onehot=False)
    .categorize()
   # .onehot_encode_categories()
)

sci_train, sci_test, _, y_test_mortality, _, y_test_criticalcare = train_test_split(
    scii,
    scii.DiedWithinThreshold,
    scii.CriticalCare,
    test_size=0.33,
    random_state=42,
    shuffle=False,
)
sci_train, sci_test = SCIData(sci_train), SCIData(sci_test)
# (X_train, y_train), (X_test, y_test) = (
#     sci_train.xy(outcome="CriticalEvent", dropna=False, fillna=False),
#     sci_test.xy(outcome="CriticalEvent", dropna=False, fillna=False),
# )

In [None]:
# X_train, y_train = SCIData(sci_train.head(1000)).xy(imputation=False, fillna=True, onehot_encoding=False, ordinal_encoding=True)
# #XX, yy = Resampler_RandomUnderSampler(sci_train).factory().fit_resample(X_train, y_train)

In [None]:
optuna.logging.set_verbosity(optuna.logging.INFO)
studies = study_grid(
    estimators=[Estimator_OneClassSVM],
    resamplers=[None, Resampler_SMOTE, Resampler_RandomUnderSampler],
    sci_train=sci_train
)

for _ in studies:
    s = construct_study(**_, sci_train=SCIData(sci_train), sci_test=SCIData(sci_test))

    r = s(n_trials=2, model_persistence_path='models/test/')
    


In [None]:
# optuna.logging.set_verbosity(optuna.logging.INFO)
# studies = [construct_study(**_, storage='sqlite:///models/studies.db', n_trials=2, sci_train=SCIData(sci_train.head(1000)), sci_test=SCIData(sci_test.head(1000))) for _ in get_studies(sci_train, study_grid)[:5]]
# with parallel_backend("loky", inner_max_num_threads=1):
#             results = Parallel(n_jobs=1)(
#                 delayed(_)(n_trials=2) for _ in studies[:2]
#             )