# Random Forest Optuna Model

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import optuna 
import optuna.visualization as optvis
from sklearn.model_selection import cross_val_score

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [None]:
def opt_plot(study, plot):
    if plot == 0: return optvis.plot_optimization_history(study)
    if plot == 1: return optvis.plot_slice(study)
    if plot == 2: return optvis.plot_parallel_coordinate(study)
    if plot == 3: return optvis.plot_contour(study)
    if plot == 4: return optvis.plot_param_importances(study)

# Load Data

In [None]:
train = pd.read_csv('../input/reducing-commercial-aviation-fatalities/train.csv')
print(train.shape)

# Check Missing Values

In [None]:
train.isna().sum(axis=0).to_frame().T

In [None]:
(train.event.value_counts() / len(train)).to_frame()

# Feature Engineering w/ Montage 2

In [None]:
train['fp1_f7'] = train['eeg_fp1'] - train['eeg_f7']
train['f7_t3'] = train['eeg_f7'] - train['eeg_t3']
train['t3_t5'] = train['eeg_t3'] - train['eeg_t5']
train['t5_o1'] = train['eeg_t5'] - train['eeg_o1']

train['fp1_f3'] = train['eeg_fp1'] - train['eeg_f3']
train['f3_c3'] = train['eeg_f3'] - train['eeg_c3']
train['c3_p3'] = train['eeg_c3'] - train['eeg_p3']
train['p3_o1'] = train['eeg_p3'] - train['eeg_o1']

train['fz_cz'] = train['eeg_fz'] - train['eeg_cz']
train['cz_pz'] = train['eeg_cz'] - train['eeg_pz']


train['fp2_f8'] = train['eeg_fp2'] - train['eeg_f8']
train['f8_t4'] = train['eeg_f8'] - train['eeg_t4']
train['t4_t6'] = train['eeg_t4'] - train['eeg_t6']
train['t6_o2'] = train['eeg_t6'] - train['eeg_o2']

train['fp2_f4'] = train['eeg_fp2'] - train['eeg_f4']
train['f4_c4'] = train['eeg_f4'] - train['eeg_c4']
train['c4_p4'] = train['eeg_c4'] - train['eeg_p4']
train['p4_o2'] = train['eeg_p4'] - train['eeg_o2']

In [None]:
Original_Features = train.columns[4:27].to_list()
Other_Features = train.columns[24:27].to_list()
Montage2 = ['fp1_f7', 'f7_t3', 't3_t5', 't5_o1', 'fp1_f3', 'f3_c3', 'c3_p3', 'p3_o1', 'fz_cz', 'cz_pz', 'fp2_f8', 'f8_t4', 't4_t6', 't6_o2', 'fp2_f4', 'f4_c4', 'c4_p4', 'p4_o2', "ecg", "r", "gsr"]

In [None]:
y_train = train.event.values
train.drop(['crew', 'experiment', 'time', 'seat', 'event'], axis=1, inplace=True)

In [None]:
x_train = train[Montage2 + Original_Features]
#x_train = preprocessor.transform(train)

print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train_idx, valid_idx = train_test_split(range(len(x_train)), test_size=0.9, random_state=1, stratify=y_train)

print(len(train_idx))
print(len(valid_idx))

indices = [(train_idx, valid_idx)]

# Extra Random Forest Model

In [None]:
%%time 

def ex_objective(trial):
    
    n  = trial.suggest_int('n_estimators', 20, 300)
    md = trial.suggest_int('max_depth', 2, 556)
    mi = trial.suggest_int('min_inst', 1, 64)
    nf = trial.suggest_int('features', 4, 12)
    cr = trial.suggest_categorical('criterion', ['gini', 'entropy'])
    
    clf =  ExtraTreesClassifier(random_state=1, max_depth=md, min_samples_leaf=mi, n_estimators=n,
                               max_features=nf, criterion=cr)
    
    scores = cross_val_score(clf, x_train, y_train, n_jobs=-1, cv=indices, scoring='neg_log_loss')
    
    return scores.mean()
    
ex_study = optuna.create_study(direction='maximize')
ex_study.optimize(ex_objective, n_trials=40)

print()
print(ex_study.best_value)
print(ex_study.best_params)

In [None]:
opt_plot(ex_study, plot=0)

In [None]:
opt_plot(ex_study, plot=1)

In [None]:
opt_plot(ex_study, plot=2)

In [None]:
opt_plot(ex_study, plot=3)

In [None]:
opt_plot(ex_study, plot=4)