In [1]:
 import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_validate
from sklearn.preprocessing import LabelEncoder

import xgboost

sns.set_theme(style="whitegrid")
sns.set_palette(palette="Paired")
SEED: int = 42

In [2]:
train_data = pd.read_csv("../../data/processed/train_data_cleaned.csv", index_col="building_id")
y_train = pd.read_csv("../../data/processed/train_labels.csv", index_col="building_id", usecols=["building_id", "damage_grade"])

In [3]:
encoder = LabelEncoder()
train_labels = encoder.fit_transform(y_train["damage_grade"].to_numpy())

# Grid Search on XGBoost

In [4]:
from sklearn.model_selection import GridSearchCV

In [5]:
xgboost_grid = {
    "n_estimators"    : [60, 70, 80, 90, 100, 110, 120, 130, 140],
    "max_depth"       : [14, 16, 18, 20, 22, 24, 26, 28, 30],
    "learning_rate"   : [0.001, 0.005, 0.006, 0.007, 0.008, 0.009, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.2], 
    "colsample_bytree": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3]
}

In [6]:
model = xgboost.XGBClassifier(random_state=42,
                              n_jobs=-1)

In [7]:
search = GridSearchCV(model, xgboost_grid)

In [None]:
%%time 

search.fit(train_data[:10000], train_labels[:10000], verbose=2)