## 1. Import libraries

In [36]:
!pip install hyperopt -q

In [37]:
import numpy as np
import pandas as pd
import sklearn
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
# packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

## 2. Read the data

In [2]:
path = r"C:\Users\cocod\OneDrive\Desktop\Credit_Risk_Modelling\data\train.csv"
train = pd.read_csv(path)
train.head()

Unnamed: 0,pct_tl_open_l6m,pct_tl_closed_l6m,tot_tl_closed_l12m,pct_tl_closed_l12m,tot_missed_pmnt,cc_tl,home_tl,pl_tl,secured_tl,unsecured_tl,other_tl,age_oldest_tl,age_newest_tl,time_since_recent_payment,max_recent_level_of_deliq,num_deliq_6_12mts,num_times_60p_dpd,num_std_12mts,num_sub,num_sub_6mts,num_sub_12mts,num_dbt,num_dbt_12mts,num_lss,recent_level_of_deliq,cc_enq_l12m,pl_enq_l12m,time_since_recent_enq,enq_l3m,netmonthlyincome,time_with_curr_empr,cc_flag,pl_flag,pct_pl_enq_l6m_of_ever,pct_cc_enq_l6m_of_ever,hl_flag,gl_flag,maritalstatus_Married,maritalstatus_Single,gender_F,gender_M,last_prod_enq2_AL,last_prod_enq2_CC,last_prod_enq2_ConsumerLoan,last_prod_enq2_HL,last_prod_enq2_PL,last_prod_enq2_others,first_prod_enq2_AL,first_prod_enq2_CC,first_prod_enq2_ConsumerLoan,first_prod_enq2_HL,first_prod_enq2_PL,first_prod_enq2_others,approved_flag
0,0.0,0.0,0,0.0,0,0,0,0,1,0,0,37,37,645,0,0,0,0,0,0,0,0,0,0,0,0,0,1100,0,30000,128,0,0,0.0,0.0,0,0,False,True,False,True,False,False,False,False,False,True,False,False,False,False,False,True,P2
1,1.0,0.0,0,0.0,0,0,0,0,1,0,0,4,4,43,0,0,0,0,0,0,0,0,0,0,0,0,0,69,2,50000,38,0,0,0.0,0.0,0,0,False,True,False,True,False,False,True,False,False,False,True,False,False,False,False,False,P2
2,0.0,0.5,2,0.5,0,0,0,0,2,2,1,42,18,73,25,1,0,0,0,0,0,0,0,0,7,0,0,555,0,14500,58,0,0,0.0,0.0,0,0,True,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,P2
3,0.0,0.0,1,0.333,0,0,0,0,0,3,2,10,8,62,32,3,1,0,0,0,0,0,0,0,1,2,0,1,1,22000,120,0,0,0.0,0.0,0,0,False,True,False,True,False,False,True,False,False,False,False,False,False,False,False,True,P3
4,0.4,0.0,0,0.0,2,0,0,0,4,1,0,38,2,420,0,0,0,0,0,0,0,0,0,0,0,0,0,59,1,15000,191,0,0,0.0,0.0,1,0,True,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,P2


In [10]:
X_train = train.drop(columns="approved_flag")
y_train = train.approved_flag.copy()

In [11]:
path = r"C:\Users\cocod\OneDrive\Desktop\Credit_Risk_Modelling\data\test.csv"
test = pd.read_csv(path)
X_test = test.drop(columns="approved_flag")
y_test = test.approved_flag.copy()

In [12]:
path = r"C:\Users\cocod\OneDrive\Desktop\Credit_Risk_Modelling\data\val.csv"
val = pd.read_csv(path)
X_val = val.drop(columns="approved_flag")
y_val = val.approved_flag.copy()

In [13]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

(26920, 53) (26920,)
(6731, 53) (6731,)
(8413, 53) (8413,)


## 3. Data processing

### 3.1 Random forest

In [14]:
rf_classifier = RandomForestClassifier(n_estimators=200, random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

In [15]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.5f}")

Accuracy: 0.76239


In [17]:
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

In [18]:
for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision:{precision[i]}")
    print(f"Recall:{recall[i]}")
    print(f"F1 Score:{f1_score[i]}")
    print()

Class p1:
Precision:0.8367103694874851
Recall:0.6923076923076923
F1 Score:0.7576902320561252

Class p2:
Precision:0.7929522846063058
Recall:0.9322101090188305
F1 Score:0.8569606413994169

Class p3:
Precision:0.4276629570747218
Recall:0.2030188679245283
F1 Score:0.27533265097236437

Class p4:
Precision:0.7297830374753451
Recall:0.7191448007774538
F1 Score:0.7244248653940284



### 3.2 XGBoost

In [19]:
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

In [20]:
xgb_classifier = xgb.XGBClassifier(objective="multi:softmax", num_class=4)

In [21]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

In [22]:
xgb_classifier.fit(X_train, y_train_encoded)
y_pred_xgb = xgb_classifier.predict(X_test)

In [23]:
y_test_encoded = label_encoder.fit_transform(y_test)

In [24]:
accuracy_xgb = accuracy_score(y_test_encoded, y_pred_xgb)

In [25]:
print(f"Accuracy through XG-Boost: {accuracy_xgb:.5f}")

Accuracy through XG-Boost: 0.77404


In [26]:
precision_xgb, recall_xgb, f1_score_xgb, _ = precision_recall_fscore_support(y_test_encoded, y_pred_xgb)

In [27]:
for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision:{precision_xgb[i]}")
    print(f"Recall:{recall_xgb[i]}")
    print(f"F1 Score:{f1_score_xgb[i]}")
    print()

Class p1:
Precision:0.825668449197861
Recall:0.7613412228796844
F1 Score:0.7922011287839917

Class p2:
Precision:0.8222063548732595
Recall:0.9129831516352824
F1 Score:0.8652202498356345

Class p3:
Precision:0.4528521536670547
Recall:0.29358490566037737
F1 Score:0.35622710622710624

Class p4:
Precision:0.7325467059980334
Recall:0.7240038872691934
F1 Score:0.7282502443792767



### 3.3 DecisionTreeClassifier

In [28]:
from sklearn.tree import DecisionTreeClassifier

In [29]:
dt_classifier = DecisionTreeClassifier(random_state=42)

In [30]:
dt_classifier.fit(X_train, y_train_encoded)
y_pred_dt = dt_classifier.predict(X_test)

In [31]:
accuracy_dt = accuracy_score(y_test_encoded, y_pred_dt)

In [32]:
print(f"Accuracy through Decision Trees: {accuracy_dt:.5f}")

Accuracy through Decision Trees: 0.69702


In [33]:
precision_dt, recall_dt, f1_score_dt, _ = precision_recall_fscore_support(y_test_encoded, y_pred_dt)

In [34]:
for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision:{precision_dt[i]}")
    print(f"Recall:{recall_dt[i]}")
    print(f"F1 Score:{f1_score_dt[i]}")
    print()

Class p1:
Precision:0.7238883143743536
Recall:0.6903353057199211
F1 Score:0.7067137809187279

Class p2:
Precision:0.8046298144083017
Recall:0.7992071357779981
F1 Score:0.801909307875895

Class p3:
Precision:0.3325958702064897
Recall:0.34037735849056605
F1 Score:0.33644162625885865

Class p4:
Precision:0.6311399443929564
Recall:0.6618075801749271
F1 Score:0.6461100569259962



* We can see XGBoost is better so we can fine tune it

## 4. Hyperparameter tuning for XGBoost

### 4.1 Define the hyperparameter grid

In [41]:
param_grid = {
    "colsample_bytree": [0.1, 0.3, 0.5, 0.7, 0.9],
    "learning_rate": [0.001, 0.01, 0.1, 1],
    "max_depth": [3, 5, 8, 10],
    "alpha": [1, 10, 100],
    "n_estimators": [10, 50, 100]
}

index = 0

answers_grid = {
    'combination': [],
    'train_accuracy': [],
    'val_accuracy': [],
    'colsample_bytree': [],
    'learning_rate': [],
    'max_depth': [],
    'alpha': [],
    'n_estimators': []
}

In [43]:
# Loop through each combination of hyperparameters
for colsample_bytree in param_grid['colsample_bytree']:
    for learning_rate in param_grid['learning_rate']:
        for max_depth in param_grid['max_depth']:
            for alpha in param_grid['alpha']:
                for n_estimators in param_grid['n_estimators']:
                    
                    index = index + 1
                    
                    # Define and train the XGBoost model
                    model = xgb.XGBClassifier(objective='multi:softmax',
                                             num_class=4,
                                             learning_rate=learning_rate,
                                             max_depth=max_depth,
                                             alpha=alpha,
                                             n_estimators=n_estimators)
                    
                    label_encoder = LabelEncoder()
                    y_train_encoded = label_encoder.fit_transform(y_train)
                    y_val_encoded = label_encoder.fit_transform(y_val)
                    
                    model.fit(X_train, y_train_encoded)
                    
                    # Predict on training and validation sets
                    y_pred_train = model.predict(X_train)
                    y_pred_val = model.predict(X_val)
                    
                    # calculate train and validation results
                    train_accuracy = accuracy_score(y_train_encoded, y_pred_train)
                    val_accuracy = accuracy_score(y_val_encoded, y_pred_val)
                    
                    
                    # include into the list
                    answers_grid['combination'].append(index)
                    answers_grid['train_accuracy'].append(train_accuracy)
                    answers_grid['val_accuracy'].append(val_accuracy)
                    answers_grid['colsample_bytree'].append(colsample_bytree)
                    answers_grid['learning_rate'].append(learning_rate)
                    answers_grid['max_depth'].append(max_depth)
                    answers_grid['alpha'].append(alpha)
                    answers_grid['n_estimators'].append(n_estimators)
                    
                    # print results for this combination
                    print(f"Combination {index}")
                    print(f"colsample_bytree: {colsample_bytree} learning_rate: {learning_rate}, max_depth: {max_depth}, alpha: {alpha}, n_estimators: {n_estimators}")
                    print(f"Train Accuracy: {train_accuracy:.2f}")
                    print(f"Val Accuracy: {val_accuracy:.2f}")
                    print("-" * 30)
        

Combination 31
colsample_bytree: 0.1 learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 10
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 32
colsample_bytree: 0.1 learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 33
colsample_bytree: 0.1 learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 34
colsample_bytree: 0.1 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 35
colsample_bytree: 0.1 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 36
colsample_bytree: 0.1 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71


Combination 79
colsample_bytree: 0.1 learning_rate: 0.01, max_depth: 5, alpha: 10, n_estimators: 10
Train Accuracy: 0.75
Val Accuracy: 0.74
------------------------------
Combination 80
colsample_bytree: 0.1 learning_rate: 0.01, max_depth: 5, alpha: 10, n_estimators: 50
Train Accuracy: 0.76
Val Accuracy: 0.75
------------------------------
Combination 81
colsample_bytree: 0.1 learning_rate: 0.01, max_depth: 5, alpha: 10, n_estimators: 100
Train Accuracy: 0.76
Val Accuracy: 0.75
------------------------------
Combination 82
colsample_bytree: 0.1 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 10
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 83
colsample_bytree: 0.1 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 50
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 84
colsample_bytree: 0.1 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 100
Train Accuracy: 0.74
Val Accuracy: 0.73


Combination 127
colsample_bytree: 0.1 learning_rate: 0.1, max_depth: 8, alpha: 100, n_estimators: 10
Train Accuracy: 0.75
Val Accuracy: 0.74
------------------------------
Combination 128
colsample_bytree: 0.1 learning_rate: 0.1, max_depth: 8, alpha: 100, n_estimators: 50
Train Accuracy: 0.77
Val Accuracy: 0.76
------------------------------
Combination 129
colsample_bytree: 0.1 learning_rate: 0.1, max_depth: 8, alpha: 100, n_estimators: 100
Train Accuracy: 0.78
Val Accuracy: 0.77
------------------------------
Combination 130
colsample_bytree: 0.1 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 10
Train Accuracy: 0.84
Val Accuracy: 0.77
------------------------------
Combination 131
colsample_bytree: 0.1 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 50
Train Accuracy: 0.90
Val Accuracy: 0.78
------------------------------
Combination 132
colsample_bytree: 0.1 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 100
Train Accuracy: 0.95
Val Accuracy: 0.78


Combination 176
colsample_bytree: 0.3 learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 177
colsample_bytree: 0.3 learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 178
colsample_bytree: 0.3 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 179
colsample_bytree: 0.3 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 180
colsample_bytree: 0.3 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 181
colsample_bytree: 0.3 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 10
Train Accuracy: 0.70
Val Accurac

Combination 224
colsample_bytree: 0.3 learning_rate: 0.01, max_depth: 5, alpha: 10, n_estimators: 50
Train Accuracy: 0.76
Val Accuracy: 0.75
------------------------------
Combination 225
colsample_bytree: 0.3 learning_rate: 0.01, max_depth: 5, alpha: 10, n_estimators: 100
Train Accuracy: 0.76
Val Accuracy: 0.75
------------------------------
Combination 226
colsample_bytree: 0.3 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 10
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 227
colsample_bytree: 0.3 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 50
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 228
colsample_bytree: 0.3 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 100
Train Accuracy: 0.74
Val Accuracy: 0.73
------------------------------
Combination 229
colsample_bytree: 0.3 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 

Combination 272
colsample_bytree: 0.3 learning_rate: 0.1, max_depth: 8, alpha: 100, n_estimators: 50
Train Accuracy: 0.77
Val Accuracy: 0.76
------------------------------
Combination 273
colsample_bytree: 0.3 learning_rate: 0.1, max_depth: 8, alpha: 100, n_estimators: 100
Train Accuracy: 0.78
Val Accuracy: 0.77
------------------------------
Combination 274
colsample_bytree: 0.3 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 10
Train Accuracy: 0.84
Val Accuracy: 0.77
------------------------------
Combination 275
colsample_bytree: 0.3 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 50
Train Accuracy: 0.90
Val Accuracy: 0.78
------------------------------
Combination 276
colsample_bytree: 0.3 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 100
Train Accuracy: 0.95
Val Accuracy: 0.78
------------------------------
Combination 277
colsample_bytree: 0.3 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 0.77


Combination 321
colsample_bytree: 0.5 learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 322
colsample_bytree: 0.5 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 323
colsample_bytree: 0.5 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 324
colsample_bytree: 0.5 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 325
colsample_bytree: 0.5 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 10
Train Accuracy: 0.70
Val Accuracy: 0.70
------------------------------
Combination 326
colsample_bytree: 0.5 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 50
Train Accuracy: 0.70
Val Accur

Combination 369
colsample_bytree: 0.5 learning_rate: 0.01, max_depth: 5, alpha: 10, n_estimators: 100
Train Accuracy: 0.76
Val Accuracy: 0.75
------------------------------
Combination 370
colsample_bytree: 0.5 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 10
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 371
colsample_bytree: 0.5 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 50
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 372
colsample_bytree: 0.5 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 100
Train Accuracy: 0.74
Val Accuracy: 0.73
------------------------------
Combination 373
colsample_bytree: 0.5 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 374
colsample_bytree: 0.5 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 50
Train Accuracy: 0.80
Val Accuracy: 0

Combination 417
colsample_bytree: 0.5 learning_rate: 0.1, max_depth: 8, alpha: 100, n_estimators: 100
Train Accuracy: 0.78
Val Accuracy: 0.77
------------------------------
Combination 418
colsample_bytree: 0.5 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 10
Train Accuracy: 0.84
Val Accuracy: 0.77
------------------------------
Combination 419
colsample_bytree: 0.5 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 50
Train Accuracy: 0.90
Val Accuracy: 0.78
------------------------------
Combination 420
colsample_bytree: 0.5 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 100
Train Accuracy: 0.95
Val Accuracy: 0.78
------------------------------
Combination 421
colsample_bytree: 0.5 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 422
colsample_bytree: 0.5 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 50
Train Accuracy: 0.82
Val Accuracy: 0.78


Combination 466
colsample_bytree: 0.7 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 467
colsample_bytree: 0.7 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 468
colsample_bytree: 0.7 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 469
colsample_bytree: 0.7 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 10
Train Accuracy: 0.70
Val Accuracy: 0.70
------------------------------
Combination 470
colsample_bytree: 0.7 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 50
Train Accuracy: 0.70
Val Accuracy: 0.70
------------------------------
Combination 471
colsample_bytree: 0.7 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 100
Train Accuracy: 0.70
Val Acc

Combination 514
colsample_bytree: 0.7 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 10
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 515
colsample_bytree: 0.7 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 50
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 516
colsample_bytree: 0.7 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 100
Train Accuracy: 0.74
Val Accuracy: 0.73
------------------------------
Combination 517
colsample_bytree: 0.7 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 518
colsample_bytree: 0.7 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 50
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 519
colsample_bytree: 0.7 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 100
Train Accuracy: 0.81
Val Accuracy: 0.

Combination 562
colsample_bytree: 0.7 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 10
Train Accuracy: 0.84
Val Accuracy: 0.77
------------------------------
Combination 563
colsample_bytree: 0.7 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 50
Train Accuracy: 0.90
Val Accuracy: 0.78
------------------------------
Combination 564
colsample_bytree: 0.7 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 100
Train Accuracy: 0.95
Val Accuracy: 0.78
------------------------------
Combination 565
colsample_bytree: 0.7 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 566
colsample_bytree: 0.7 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 50
Train Accuracy: 0.82
Val Accuracy: 0.78
------------------------------
Combination 567
colsample_bytree: 0.7 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 100
Train Accuracy: 0.85
Val Accuracy: 0.78


Combination 611
colsample_bytree: 0.9 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 612
colsample_bytree: 0.9 learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train Accuracy: 0.71
Val Accuracy: 0.71
------------------------------
Combination 613
colsample_bytree: 0.9 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 10
Train Accuracy: 0.70
Val Accuracy: 0.70
------------------------------
Combination 614
colsample_bytree: 0.9 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 50
Train Accuracy: 0.70
Val Accuracy: 0.70
------------------------------
Combination 615
colsample_bytree: 0.9 learning_rate: 0.001, max_depth: 3, alpha: 100, n_estimators: 100
Train Accuracy: 0.70
Val Accuracy: 0.70
------------------------------
Combination 616
colsample_bytree: 0.9 learning_rate: 0.001, max_depth: 5, alpha: 1, n_estimators: 10
Train Accuracy: 0.75
Val Accu

Combination 659
colsample_bytree: 0.9 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 50
Train Accuracy: 0.73
Val Accuracy: 0.72
------------------------------
Combination 660
colsample_bytree: 0.9 learning_rate: 0.01, max_depth: 5, alpha: 100, n_estimators: 100
Train Accuracy: 0.74
Val Accuracy: 0.73
------------------------------
Combination 661
colsample_bytree: 0.9 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 662
colsample_bytree: 0.9 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 50
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 663
colsample_bytree: 0.9 learning_rate: 0.01, max_depth: 8, alpha: 1, n_estimators: 100
Train Accuracy: 0.81
Val Accuracy: 0.77
------------------------------
Combination 664
colsample_bytree: 0.9 learning_rate: 0.01, max_depth: 8, alpha: 10, n_estimators: 10
Train Accuracy: 0.78
Val Accuracy: 0.7

Combination 707
colsample_bytree: 0.9 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 50
Train Accuracy: 0.90
Val Accuracy: 0.78
------------------------------
Combination 708
colsample_bytree: 0.9 learning_rate: 0.1, max_depth: 10, alpha: 1, n_estimators: 100
Train Accuracy: 0.95
Val Accuracy: 0.78
------------------------------
Combination 709
colsample_bytree: 0.9 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 10
Train Accuracy: 0.80
Val Accuracy: 0.77
------------------------------
Combination 710
colsample_bytree: 0.9 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 50
Train Accuracy: 0.82
Val Accuracy: 0.78
------------------------------
Combination 711
colsample_bytree: 0.9 learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 100
Train Accuracy: 0.85
Val Accuracy: 0.78
------------------------------
Combination 712
colsample_bytree: 0.9 learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 10
Train Accuracy: 0.75
Val Accuracy: 0.7

In [44]:
hyperparameters_df = pd.DataFrame.from_dict(answers_grid)
hyperparameters_df.to_csv("hyperparameters.csv", index=False)

In [45]:
hyperparameters_df.head()

Unnamed: 0,combination,train_accuracy,val_accuracy,colsample_bytree,learning_rate,max_depth,alpha,n_estimators
0,1,0.0,0.706879,0.1,0.001,3,1,10
1,2,0.0,0.707919,0.1,0.001,3,1,50
2,3,0.0,0.711336,0.1,0.001,3,1,100
3,4,0.0,0.706581,0.1,0.001,3,10,10
4,5,0.0,0.710593,0.1,0.001,3,10,50


In [47]:
hyperparameters_df.train_accuracy.max()

np.float64(0.999925705794948)

### 4.2 Final model with the tuned hyperparameters

In [58]:
model = xgb.XGBClassifier(objective='multi:softmax',
                                             num_class=4,
                                             col_sample_bytree=0.9,
                                             learning_rate=1,
                                             max_depth=3,
                                             alpha=10,
                                             n_estimators=100)

model.fit(X_train, y_train_encoded)

In [59]:
y_pred = model.predict(X_test)

In [60]:
accuracy_final = accuracy_score(y_test_encoded, y_pred)
print(f"Accuracy Final: {accuracy_final}")

Accuracy Final: 0.7793890407702365
