In [5]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import TargetEncoder

dataset_cleaned = pd.read_csv('datasets/dataset_cleaned.csv')
dataset = pd.get_dummies(dataset_cleaned, columns=['STATE'], dtype=int)
dataset.head()

dataset['STATE'] = TargetEncoder(smooth='auto').fit_transform(pd.DataFrame(dataset_cleaned['STATE']), dataset_cleaned['TOTAL_FIRE_SIZE'])


X = dataset.drop('TOTAL_FIRE_SIZE', axis=1)
y = dataset['TOTAL_FIRE_SIZE']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the XGBoost classifier
model = xgb.XGBRegressor(objective='reg:squarederror')
model.fit(X_train, y_train)

def calc_log_clamped_score(y_true, y_pred):
    """
    Implements: mean( min( |log(y_pred / y_true)|, 10 ) )
    """
    eps = 1e-15
    y_pred = np.maximum(y_pred, eps)
    y_true = np.maximum(y_true, eps)

    log_errors = np.abs(np.log(y_pred / y_true))
    log_errors_clamped = np.minimum(log_errors, 10.0)
    return np.mean(log_errors_clamped)


# Make predictions and calculate mean squared error
y_pred = model.predict(X_test)
score = calc_log_clamped_score(y_test, y_pred)
print("Score:", score)

Score: 3.5180669229924977
