<a href="https://colab.research.google.com/github/prodramp/wildfire/blob/main/ml/ca_wildfire_ml_xgboost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import xgboost as xbg
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import pandas as pd
import plotly.express as px

In [2]:
train_source = "https://github.com/prodramp/wildfire/raw/main/california-data/ca_fire_train.csv.zip"
valid_source = "https://github.com/prodramp/wildfire/raw/main/california-data/ca_fire_valid.csv.zip"
test_source = "https://github.com/prodramp/wildfire/raw/main/california-data/ca_fire_test.csv.zip"

In [3]:
train = pd.read_csv(train_source)
valid = pd.read_csv(valid_source)
test = pd.read_csv(test_source)

In [4]:
features = [
    'latitude', 'longitude', 'month',
    'fire_cnt_before', 'fire_before',
    'fire_cnt_last_year', 'fire_last_year',
    'fire_cnt_last_year_same_month', 'fire_last_year_same_month'
]

In [5]:
train.columns

Index(['latitude', 'longitude', 'year', 'month', 'fire_count', 'fire',
       'fire_cnt_before', 'fire_before', 'fire_cnt_last_year',
       'fire_last_year', 'fire_cnt_last_year_same_month',
       'fire_last_year_same_month'],
      dtype='object')

In [6]:
xgbc = XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=100,nthread=-1)

In [None]:
# Option one - without validation dataset
xgbc.fit(train[features],train.fire, verbose=True)

In [7]:
# Option two - With validation dataset
eval_set = [(valid[features], valid.fire)]
xgbc.fit(train[features],train.fire, eval_set=eval_set, verbose=True)

[0]	validation_0-error:0.071106
[1]	validation_0-error:0.066273
[2]	validation_0-error:0.066434
[3]	validation_0-error:0.066188
[4]	validation_0-error:0.066426
[5]	validation_0-error:0.066494
[6]	validation_0-error:0.066358
[7]	validation_0-error:0.065714
[8]	validation_0-error:0.06562
[9]	validation_0-error:0.065637
[10]	validation_0-error:0.065612
[11]	validation_0-error:0.066078
[12]	validation_0-error:0.06562
[13]	validation_0-error:0.065536
[14]	validation_0-error:0.06551
[15]	validation_0-error:0.065205
[16]	validation_0-error:0.065374
[17]	validation_0-error:0.065154
[18]	validation_0-error:0.064908
[19]	validation_0-error:0.065307
[20]	validation_0-error:0.065035
[21]	validation_0-error:0.065324
[22]	validation_0-error:0.065273
[23]	validation_0-error:0.065264
[24]	validation_0-error:0.06501
[25]	validation_0-error:0.065256
[26]	validation_0-error:0.065374
[27]	validation_0-error:0.0654
[28]	validation_0-error:0.065425
[29]	validation_0-error:0.065408
[30]	validation_0-error:0.

XGBClassifier(colsample_bytree=0.8, learning_rate=0.02, max_depth=10,
              nthread=-1)

In [None]:
# Optional - Cross Validation Score 
scores = cross_val_score(xgbc, train[features],train.fire, cv=5)

In [None]:
print("Mean cross-validation score: %.5f" % scores.mean())

In [8]:
test_predictions = xgbc.predict(test[features])

In [9]:
test_auc = metrics.roc_auc_score(test.fire, test_predictions)
test_auc

0.7801654363270638

In [10]:
fpr, tpr, thr = metrics.roc_curve(test.fire, test_predictions)
px.line(pd.DataFrame(dict(FPR=fpr, TPR=tpr)),
        x='FPR', y='TPR', title='Wildfire Hotspot model performance for 2022')
