In [1]:
import pandas as pd
from my_libs import lib_tools as pt

run_gridSearchCV = True  # True to run hyperparameters optimization with GridSearchCV()
run_optuna = True        # True to run hyperparameters optimization with Optuna

run_type = 'dev'
# run_type = 'prd'

if run_type == 'dev': filename_train, filename_test = 'pickles/df-dev-train.pkl', 'pickles/df-dev-test.pkl'
if run_type == 'prd': filename_train, filename_test = 'pickles/df-prd-train.pkl', 'pickles/df-prd-test.pkl'

# classifier_name = 'DecisionTreeClassifier'
# classifier_name = 'RandomForestClassifier'
# classifier_name = 'GradientBoostingClassifier'

# columns = ['catv', 'agg', 'dep', 'col', 'catr', 'catu', 'trajet', 'locp', 'circ', 'situ', 'lum', 'age_cls']
X_train, y_train, X_test, y_test, X_test_final, y_test_final = pt.get_train_valid_test_data(filename_train, filename_test)

In [None]:
import time
from my_libs.encoder_custom import EncoderCustom

start_time = time.time()

cols_target_encoded = ['dep']
cols_onehot_encoded = X_train.columns.drop(cols_target_encoded)

encoder = EncoderCustom(cols_target_encoded=cols_target_encoded, cols_onehot_encoded=cols_onehot_encoded)

X_train, y_train = encoder.transform(X_train, y_train, 'Train')
X_test, y_test  = encoder.transform(X_test,  y_test,  'Test')
X_test_final, y_test_final = encoder.transform(X_test_final, y_test_final,  'Test')

print("--- Features encoding performed in %s seconds ---" % (time.time() - start_time))

X_train.to_pickle('./X_train.pkl')
y_train.to_pickle('./y_train.pkl')

In [None]:
from my_libs.model_evaluator import ModelEvaluator

# model_type = 'DecisionTreeClassifier'
model_type = 'RandomForestClassifier'
# model_type = 'GradientBoostingClassifier'

if model_type == 'DecisionTreeClassifier':
    best_params = {'max_depth': 11, 'criterion': 'gini', 'max_features': 'auto', 'min_samples_split': 4}

if model_type == 'RandomForestClassifier':
    best_params = {'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 50}

if model_type == 'GradientBoostingClassifier':
    best_params = {'learning_rate': 0.22, 'n_estimators': 51}

evaluator = ModelEvaluator(model_type=model_type, params=best_params, X_train=X_train, y_train=y_train, X_test=X_test_final, y_test=y_test_final)
model = evaluator.evaluate()

In [None]:
from shapash import SmartExplainer
import shap
shap.initjs()

xpl = SmartExplainer(
    model=model,
    # features_dict=house_dict,  # Optional parameter
    # preprocessing=encoder, # Optional: compile step can use inverse_transform method
    # postprocessing=postprocess # Optional: see tutorial postprocessing
)

y_test.index = X_test.index

xpl.compile(
    x=X_test,
    # y_pred=y_pred, # Optional: for your own prediction (by default: model.predict)
    y_target=y_test, # Optional: allows to display True Values vs Predicted Values
)

app = xpl.run_app(title_story=f"{model_type}", port=8020)

In [None]:
# app.kill()