#### Comparing GAM to GLM/GBM using the commision model dataset


Work flow:
- Check / Clean data if needed
- Look at TVH used for Radar models
- Run grid-search of GAM model
- Log time taken to build + run

In [None]:
!pip install interpret

In [None]:
"""
Importing:
    Packages
    Data
"""

import os as os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import base
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('/home/jovyan/Optimisation Team Work/Commission Model/Files/CDL_Commission_Data_Radar_Export_100.csv')

print("Packages loaded, dataset read in")
print("Current directory is location in:", os.getcwd())

#newpath = f"{peril_name}_MODEL\\"
#if not os.path.exists(newpath):
#    os.makedirs(newpath)
    
#print(f"New folder called {newpath} created.")

In [None]:
df.columns = map(str.lower, df.columns)
for col in df:
    print(col)

In [None]:
feature_list=['policyyearswithintermediary',
'broker_names',
'premiumexclcommissionandipt',
'main_driver_age',
'voluntary_excess',
'marketmodel',
'ws_sector_ra',
'ws_vehicle_group',
'ad_vehicle_group',
'differencetomarketpremium',
'years_owned',
'postal_sector_lookup_xspi_rating_area',
'ad_sector_ra',
'vehicle_group',
'frth_vehicle_group',
'transunionconceptsgsb',
'transunionconceptsodb',
'transunionconceptsycb',
'transunionconceptsqub',
'transunionconceptsjf',
'dist_from_acc_man_co',
'vehicle_value',
'vehicle_age',
'pd_sector_ra',
'transunionconceptsjwb',
'frthsm_sector_ra',
'annual_mileage_banded',
'transunionconceptscsb',
'pi_vehicle_group',
'pd_vehicle_group',
'large_sector_ra',
'transunionconceptstrb',
'vehicle_age_at_purchase',
'detailed_occupation_type',
'transunionconceptsef',
'large_vehicle_group',
'rating_area',
'add_driver_age_banded',
'youngest_additional_driver_age_difference',
'pi_sector_ra',
'tv_region',
'opt_main_employment_type',
'pdr_code',
'ad_occupation_group',
'duration',
'transunionconceptsmdb',
'main_employer_business_type',
'opt_main_marital_status',
'transunionconceptsbf',
'main_occupation_type',
'transunionconceptsw',
'class_of_use',
'frth_occupation_group',
'ws_occupation_group',
'main_driver_access_to_other_vehicles',
'garaged',
'cancellation_prediction',
'_2nd_car_flag_unisex',
'additional_driver_access_to_other_vehicles',
'latest_non_fault_accident_claim',
'proposer_homeowner',
'ncd_allowed_unisex',
'ncd_protected',
'additional_driver_other_vehicles_owned',
'opt_years_owned',
'vehicle_keeper',
'vehicle_owner',
'transunionconceptsad',
'latest_fault_accident_claim',
'youngest_additional_driver_experience',
'latest_conviction',
'latest_windscreen_claim',
'cover',
'tot_fault_claims_in_last_5_years',
'tot_windscreen_claims',
'ncd_earned',
'tot_non_fault_accident_claims',
'most_severe_conviction',
'transunionconceptsoe',
'tot_convictions',
'tot_theft_claims',
'tot_vandalism_claims',
'large_occupation_group',
'main_driving_experience_years',
'tot_fault_accident_claims',
'tot_fire_claims',
'transunionconceptsgc',
'main_uk_residency',
'pi_occupation_group',
'pd_occupation_group'
]

In [None]:
df['training80']

In [None]:
df_train=df[df['training80']==True]
df_val=df[df['training80']!=True]

df_train.shape

In [None]:
X_train=df_train[feature_list]
y_train=df_train['modelresponse']

X_val=df_val[feature_list]
y_val=df_val['modelresponse']

In [None]:
from useful_functions import gini

In [None]:
!pip install optuna

In [None]:
import optuna

def objective(trial):
    

    # 2. Suggest values of the hyperparameters using a trial object.
    param = {
        'validation_size' : trial.suggest_categorical('validation_size', [0.25]),
        'max_bins': trial.suggest_categorical("max_bins",[256, 512, 1024,2048]),
        'objective': 'rmse',        
        'cyclic_progress': trial.suggest_categorical('cyclic_progress', [0.0]),#, 0.5, 1.0]),
        'inner_bags': trial.suggest_categorical('inner_bags', [50]),
        'outer_bags': trial.suggest_categorical('outer_bags', [50]),
        'learning_rate' : trial.suggest_categorical('learning_rate', [0.6,0.4,0.2,0.1,0.05,0.01]),#0.05,0.02,0.01]),
        'interactions' : trial.suggest_categorical('interactions', [0,5,10]),#, 0.25, 0.5, 0.75, 0.95]),
        'greedy_ratio' : trial.suggest_categorical('greedy_ratio', [0.0, 0.25, 0.5,0.75,1]),# 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 4.0]),
        'smoothing_rounds' : trial.suggest_categorical('smoothing_rounds',  [0, 50, 100, 500, 1000])
    }
    
    
    ebm=ExplainableBoostingRegressor(**param)
    ebm.fit(X_train,y_train)
    val_preds=ebm.predict(X_val)
    val_mse = mean_squared_error(y_val, val_preds)
    return val_mse



# 3. Create a study object and optimize the objective function.
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(), 
                            direction='minimize')
study.optimize(objective, n_trials=52)

In [None]:
from sklearn.metrics import mean_squared_error

def detailed_objective(trial):
        
    param = {
        'validation_size' : trial.suggest_categorical('validation_size', [0.25]),
        'max_bins': trial.suggest_categorical("max_bins",[256, 512, 1024,2048]),
        'objective': 'rmse',        
        'cyclic_progress': trial.suggest_categorical('cyclic_progress', [0.0]),#, 0.5, 1.0]),
        'inner_bags': trial.suggest_categorical('inner_bags', [50]),
        'outer_bags': trial.suggest_categorical('outer_bags', [50]),
        'learning_rate' : trial.suggest_categorical('learning_rate', [0.6,0.4,0.2,0.1,0.05,0.01]),#0.05,0.02,0.01]),
        'interactions' : trial.suggest_categorical('interactions', [0,5,10]),#, 0.25, 0.5, 0.75, 0.95]),
        'greedy_ratio' : trial.suggest_categorical('greedy_ratio', [0.0, 0.25, 0.5,0.75,1]),# 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 4.0]),
        'smoothing_rounds' : trial.suggest_categorical('smoothing_rounds',  [0, 50, 100, 500, 1000])
    }
    
    
    ebm=ExplainableBoostingRegressor(**param)
    ebm.fit(X_train,y_train)
    
    train_preds=ebm.predict(X_train)
    #test_preds=ebm.predict(X_test)
    val_preds=ebm.predict(X_val)
    
    train_gini = gini(y_train, train_preds)
    val_gini = gini(y_val, val_preds)
    train_mse=mean_squared_error(y_train, train_preds)
    val_mse=mean_squared_error(y_val, val_preds)
    
    #test_gini = gini(y_test, test_preds)
    return train_gini, val_gini, train_mse, val_mse#, test_gini

##### Check the best trial result for more detailed information

In [None]:
detailed_objective(study.best_trial)

In [None]:
def build_top_ebm(trial):
        
    # 2. Suggest values of the hyperparameters using a trial object.
    # 2. Suggest values of the hyperparameters using a trial object.
    param = {
        'validation_size' : trial.suggest_categorical('validation_size', [0.25]),
        'max_bins': trial.suggest_categorical("max_bins",[256, 512, 1024,2048]),
        'objective': 'rmse',        
        'cyclic_progress': trial.suggest_categorical('cyclic_progress', [0.0]),#, 0.5, 1.0]),
        'inner_bags': trial.suggest_categorical('inner_bags', [50]),
        'outer_bags': trial.suggest_categorical('outer_bags', [50]),
        'learning_rate' : trial.suggest_categorical('learning_rate', [0.6,0.4,0.2,0.1,0.05,0.01]),#0.05,0.02,0.01]),
        'interactions' : trial.suggest_categorical('interactions', [0,5,10]),#, 0.25, 0.5, 0.75, 0.95]),
        'greedy_ratio' : trial.suggest_categorical('greedy_ratio', [0.0, 0.25, 0.5,0.75,1]),# 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 4.0]),
        'smoothing_rounds' : trial.suggest_categorical('smoothing_rounds',  [0, 50, 100, 500, 1000])
    }
    
    
    ebm=ExplainableBoostingRegressor(**param)
    ebm.fit(X_train,y_train)
    return ebm

best_ebm = build_top_ebm(study.best_trial)

In [None]:
from interpret import show
from interpret.provider import InlineProvider
from interpret import set_visualize_provider

set_visualize_provider(InlineProvider())

ebm_global = best_ebm.explain_global(name='EBM')
show(ebm_global)

In [None]:
from interpret import preserve

preserve(ebm_global, file_name='best_model_export.html')

In [None]:
help(ebm_global)