<a href="https://colab.research.google.com/github/prodramp/wildfire/blob/main/ml/ca_wildfire_ml_lightgbm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import lightgbm as lgb
from sklearn import metrics
import pandas as pd
import plotly.express as px

In [2]:
train_source = r"C:\Users\HP\Downloads\wildfire-main\california-data\ca_fire_train.csv.zip"
valid_source = r"C:\Users\HP\Downloads\wildfire-main\california-data\ca_fire_valid.csv.zip"
test_source = r"C:\Users\HP\Downloads\wildfire-main\california-data\ca_fire_test.csv.zip"

In [3]:
train = pd.read_csv(train_source)
valid = pd.read_csv(valid_source)
test = pd.read_csv(test_source)

In [4]:
train.shape

(1071252, 12)

In [5]:
valid.shape

(117936, 12)

In [6]:
test.shape

(14742, 12)

In [7]:
features = [
    'latitude', 'longitude', 'month',
    'fire_cnt_before', 'fire_before',
    'fire_cnt_last_year', 'fire_last_year',
    'fire_cnt_last_year_same_month', 'fire_last_year_same_month'
]

In [8]:
train.columns

Index(['latitude', 'longitude', 'year', 'month', 'fire_count', 'fire',
       'fire_cnt_before', 'fire_before', 'fire_cnt_last_year',
       'fire_last_year', 'fire_cnt_last_year_same_month',
       'fire_last_year_same_month'],
      dtype='object')

In [9]:
train_data = lgb.Dataset(train[features], label=train.fire)
valid_data = lgb.Dataset(valid[features], label=valid.fire)

In [10]:
parameters = {'num_leaves': 10, 'max_depth': 8, 'objective': 'binary', 'metric': 'auc'}
num_round = 500

In [11]:
model = lgb.train(parameters, train_data, num_round, valid_sets=[valid_data])

[LightGBM] [Info] Number of positive: 45684, number of negative: 1025568
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.034758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 1071252, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.042645 -> initscore=-3.111254
[LightGBM] [Info] Start training from score -3.111254


In [12]:
test_predictions = model.predict(test[features])

In [13]:
test_auc = metrics.roc_auc_score(test.fire, test_predictions)
test_auc

0.9723113492972817

In [14]:
fpr, tpr, thr = metrics.roc_curve(test.fire, test_predictions)
px.line(pd.DataFrame(dict(FPR=fpr, TPR=tpr)),
        x='FPR', y='TPR', title='Wildfire Hotspot model performance for 2022')
