# 2.0 Model

## Simple set up

In [None]:
%run 0.2-jvs-before_to_start.ipynb

## Import libraries

In [None]:
import janitor
import shap

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_flavor as pf
import seaborn as sns
import xgboost as xgb

from sklearn.model_selection import (
    GridSearchCV,
    train_test_split,
    ShuffleSplit
)

## Set plot defaults

In [None]:
plt.style.use("seaborn")
sns.set_style("whitegrid")

## Load data

In [None]:
titanic_dfs = {
    f"{file.stem}": pd.read_csv(filepath_or_buffer=file)
    for file in data_processed_dir().glob("*.csv")
}

[display(df_name, df.head(2)) for df_name, df in titanic_dfs.items()];
locals().update(titanic_dfs)

## Prepare data

In [None]:
titanic_proccessed = (
    titanic_train_df
    .remove_columns(["passenger_id"])
    .select_columns([pd.api.types.is_numeric_dtype, "sex", "embarked"])
    .transform_columns(
        column_names=["sex", "embarked"],
        function=lambda column: column.factorize()[0],
        elementwise=False
    )
    .dropna()
)

titanic_proccessed

## Modelling

### Splitting data

In [None]:
X, y = titanic_proccessed.get_features_targets(target_column_names="survived")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
cv_train = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

### Create model

In [None]:
xgb_model = (
    xgb.XGBClassifier(
        objective="binary:hinge",
        eval_metric="error",
        random_state=0,
        use_label_encoder=False
    )
    .fit(X_train, y_train)
)

## Model visualization

### XGB

In [None]:
xgb.plot_importance(xgb_model);

In [None]:
fig = plt.figure(figsize=(100, 100))
axes = fig.add_subplot(111)
xgb.plot_tree(xgb_model, ax=axes);

### SHAP (SHapley Additive exPlanations)

In [None]:
explainer = shap.TreeExplainer(xgb_model)
shap_values = explainer.shap_values(X_train)

In [None]:
shap.summary_plot(shap_values, X_train)

In [None]:
shap.dependence_plot("age", shap_values, X_train)

In [None]:
shap.initjs()

In [None]:
shap.force_plot(explainer.expected_value, shap_values[0,:], X_train.iloc[0,:])

In [None]:
shap.force_plot(explainer.expected_value, shap_values, X_train)