# Wild Fire Challenge
## Team DeapMind
Data Preparation


Import relevant libraries
Import the csv file and save as a dataframe
Parse the year,month,day in to different columns


In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
import random
InteractiveShell.ast_node_interactivity = "all"
import os
from datetime import datetime
import matplotlib.pyplot as plt
#import plotly.express as px
import glob
from tqdm import tqdm

# import plotly.figure_factory as ff
# import plotly.express as px


In [3]:
aus_fires = pd.read_csv('./data/australia_1_1.csv.gz', parse_dates=['acq_date'])
aus_fires.shape
aus_fires.head()

(1445364, 15)

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,-14.9764,145.2801,320.6,2.0,1.4,2015-01-01,104,Terra,MODIS,24,6.2,294.8,25.0,D,0
1,-15.8931,136.6094,324.4,1.2,1.1,2015-01-01,104,Terra,MODIS,28,6.2,302.3,11.6,D,0
2,-18.5115,139.5995,331.8,1.0,1.0,2015-01-01,105,Terra,MODIS,37,6.2,305.0,19.1,D,0
3,-19.0015,121.9994,326.9,2.9,1.6,2015-01-01,243,Terra,MODIS,37,6.2,300.0,46.9,D,0
4,-18.0765,122.69,314.5,3.2,1.7,2015-01-01,243,Terra,MODIS,25,6.2,292.5,42.0,D,0


# Feature Engineering

<ul>
<li>Calculate the estimate fire area using along scan value and along track value (by multiplying scan with track)</li>
<li>Filter Relevant data from the dataframe</li>
<li>Calculate the number of fire count and merge it with dataframe</li>
    <li>Estimate the critical nature of the fire using confidence value and the fire count<ul><li>0 - No fire
        </li><li>1 - low risk areas</li>
<li>2 - Medium Risk areas</li><li>3 - High risk areas</li><li>4 - Critical areas
</li></ul>
        <li>Change the float values to int by rounding</li>
</ul>



In [4]:
aus_fires['year'] = aus_fires.acq_date.dt.year
aus_fires['month'] = aus_fires.acq_date.dt.month
aus_fires['day'] = aus_fires.acq_date.dt.day

aus_fires['est_fire_area'] = aus_fires['scan'] * aus_fires['track']
aus_fires['est_brightness'] = (aus_fires['brightness'] + aus_fires['bright_t31'])/2
aus_fires.latitude = aus_fires.latitude.round(1)
aus_fires.longitude = aus_fires.longitude.round(1)

aus_fires.shape
aus_fires.head()

(1445364, 20)

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,year,month,day,est_fire_area,est_brightness
0,-15.0,145.3,320.6,2.0,1.4,2015-01-01,104,Terra,MODIS,24,6.2,294.8,25.0,D,0,2015,1,1,2.8,307.7
1,-15.9,136.6,324.4,1.2,1.1,2015-01-01,104,Terra,MODIS,28,6.2,302.3,11.6,D,0,2015,1,1,1.32,313.35
2,-18.5,139.6,331.8,1.0,1.0,2015-01-01,105,Terra,MODIS,37,6.2,305.0,19.1,D,0,2015,1,1,1.0,318.4
3,-19.0,122.0,326.9,2.9,1.6,2015-01-01,243,Terra,MODIS,37,6.2,300.0,46.9,D,0,2015,1,1,4.64,313.45
4,-18.1,122.7,314.5,3.2,1.7,2015-01-01,243,Terra,MODIS,25,6.2,292.5,42.0,D,0,2015,1,1,5.44,303.5


In [5]:
fires = aus_fires[['latitude', 'longitude', 'year', 'month','day','confidence',
                           'est_fire_area','est_brightness','frp','type','daynight']].copy()
# fires.columns = ['latitude', 'longitude', 'year', 'month','day','confidence','est_fire_area','est_brightness','frp','type','daynight']

fires.shape
fires.head(10)
fires.nunique()

(1445364, 11)

Unnamed: 0,latitude,longitude,year,month,day,confidence,est_fire_area,est_brightness,frp,type,daynight
0,-15.0,145.3,2015,1,1,24,2.8,307.7,25.0,0,D
1,-15.9,136.6,2015,1,1,28,1.32,313.35,11.6,0,D
2,-18.5,139.6,2015,1,1,37,1.0,318.4,19.1,0,D
3,-19.0,122.0,2015,1,1,37,4.64,313.45,46.9,0,D
4,-18.1,122.7,2015,1,1,25,5.44,303.5,42.0,0,D
5,-18.1,122.7,2015,1,1,31,5.44,303.3,39.2,0,D
6,-17.1,122.3,2015,1,1,0,4.64,305.3,36.1,0,D
7,-22.0,116.4,2015,1,1,100,1.43,346.1,117.1,0,D
8,-22.0,116.4,2015,1,1,28,1.43,335.55,35.9,0,D
9,-22.0,116.4,2015,1,1,66,1.43,332.85,26.2,0,D


latitude            328
longitude           403
year                  6
month                12
day                  31
confidence          101
est_fire_area        47
est_brightness     4327
frp               14934
type                  3
daynight              2
dtype: int64

In [6]:
count = fires.groupby(['latitude', 'longitude', 'year', 'month','day']).size().reset_index().rename(columns={0:'fire_count'})
count
fire_copy = fires.merge(count,how='outer', on=['latitude', 'longitude', 'year', 'month','day'])
fire_copy['type']

Unnamed: 0,latitude,longitude,year,month,day,fire_count
0,-43.5,146.2,2018,3,23,6
1,-43.5,146.8,2020,3,27,1
2,-43.5,146.9,2020,3,18,1
3,-43.4,146.9,2015,4,14,2
4,-43.4,146.9,2016,4,24,3
...,...,...,...,...,...,...
428766,-9.4,142.7,2020,11,12,1
428767,-9.3,142.3,2020,11,10,1
428768,-9.2,142.2,2018,10,13,1
428769,-9.2,142.2,2018,11,23,1


0          0
1          0
2          0
3          0
4          0
          ..
1445359    0
1445360    0
1445361    0
1445362    2
1445363    2
Name: type, Length: 1445364, dtype: int64

In [7]:
def fire_happend(fire_copy): # here fire_copy == row
    if ((fire_copy['confidence']>= 70) & (fire_copy['fire_count'] > 1) & (fire_copy['type'] == 0)):
        return 4
    elif ((fire_copy['confidence']>= 70) & (fire_copy['fire_count'] == 1) & (fire_copy['type'] == 0)):
        return 3
    elif ((fire_copy['confidence'] < 70) & (fire_copy['confidence']>= 40) & (fire_copy['fire_count'] > 1) & (fire_copy['type'] == 0)):
        return 3
    elif ((fire_copy['confidence'] < 70) &(fire_copy['confidence']>= 40) & (fire_copy['fire_count'] == 1) & (fire_copy['type'] == 0)):
        return 2
    elif ((fire_copy['confidence'] < 40) &(fire_copy['confidence']>= 0) & (fire_copy['fire_count'] > 1) & (fire_copy['type'] == 0)):
        return 2
    elif ((fire_copy['confidence'] < 40) &(fire_copy['confidence']>= 0) & (fire_copy['fire_count'] == 1) & (fire_copy['type'] == 0)):
        return 1
    else:
        return 0

fire_copy['est_fire_happend'] = fire_copy.apply (lambda row: fire_happend(row), axis=1)       
    
# fire_copy['est_fire_happen'] = 1*((fire_copy['confidence']>= 70) & (fire_copy['type'] == 0))
fire_copy

Unnamed: 0,latitude,longitude,year,month,day,confidence,est_fire_area,est_brightness,frp,type,daynight,fire_count,est_fire_happend
0,-15.0,145.3,2015,1,1,24,2.80,307.70,25.0,0,D,1,1
1,-15.9,136.6,2015,1,1,28,1.32,313.35,11.6,0,D,1,1
2,-18.5,139.6,2015,1,1,37,1.00,318.40,19.1,0,D,1,1
3,-19.0,122.0,2015,1,1,37,4.64,313.45,46.9,0,D,1,1
4,-18.1,122.7,2015,1,1,25,5.44,303.50,42.0,0,D,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1445359,-28.3,122.3,2020,12,31,31,3.30,299.50,32.8,0,N,3,2
1445360,-28.3,122.3,2020,12,31,65,1.32,306.00,20.6,0,N,3,3
1445361,-28.3,122.3,2020,12,31,100,1.32,321.10,69.0,0,N,3,4
1445362,-30.9,121.5,2020,12,31,73,1.32,300.20,10.3,2,N,2,0


In [8]:
fire_copy = fire_copy.groupby(['latitude', 'longitude', 'year', 'month','day'])[['fire_count', 'confidence','frp','est_fire_area','est_brightness','est_fire_happend']].mean().reset_index()

In [9]:
fire_copy

fire_copy.shape

Unnamed: 0,latitude,longitude,year,month,day,fire_count,confidence,frp,est_fire_area,est_brightness,est_fire_happend
0,-43.5,146.2,2018,3,23,6.0,56.000000,92.700000,5.433333,306.166667,3.000000
1,-43.5,146.8,2020,3,27,1.0,0.000000,33.600000,1.210000,306.100000,1.000000
2,-43.5,146.9,2020,3,18,1.0,100.000000,0.000000,1.680000,327.850000,3.000000
3,-43.4,146.9,2015,4,14,2.0,84.000000,14.750000,1.000000,297.025000,4.000000
4,-43.4,146.9,2016,4,24,3.0,64.333333,135.966667,3.196667,309.383333,3.333333
...,...,...,...,...,...,...,...,...,...,...,...
428766,-9.4,142.7,2020,11,12,1.0,0.000000,19.500000,2.340000,306.550000,1.000000
428767,-9.3,142.3,2020,11,10,1.0,0.000000,8.500000,1.100000,308.400000,1.000000
428768,-9.2,142.2,2018,10,13,1.0,38.000000,8.800000,1.000000,312.350000,1.000000
428769,-9.2,142.2,2018,11,23,1.0,37.000000,9.800000,1.100000,310.300000,1.000000


(428771, 11)

In [10]:
fire_copy.est_fire_happend = fire_copy.est_fire_happend.round().astype(int)
fire_copy.est_fire_area = fire_copy.est_fire_area.round(1)
fire_copy.est_brightness = fire_copy.est_brightness.round(1)
fire_copy.confidence = fire_copy.confidence.round().astype(int)
fire_copy.frp = fire_copy.frp.round(1)
fire_copy.fire_count = fire_copy.fire_count.round().astype(int)

fire_copy.head(20)

Unnamed: 0,latitude,longitude,year,month,day,fire_count,confidence,frp,est_fire_area,est_brightness,est_fire_happend
0,-43.5,146.2,2018,3,23,6,56,92.7,5.4,306.2,3
1,-43.5,146.8,2020,3,27,1,0,33.6,1.2,306.1,1
2,-43.5,146.9,2020,3,18,1,100,0.0,1.7,327.8,3
3,-43.4,146.9,2015,4,14,2,84,14.8,1.0,297.0,4
4,-43.4,146.9,2016,4,24,3,64,136.0,3.2,309.4,3
5,-43.4,146.9,2016,4,25,2,66,45.7,2.9,301.8,3
6,-43.4,146.9,2017,5,19,1,88,74.0,1.3,315.2,3
7,-43.4,146.9,2018,4,4,3,79,148.4,1.3,325.8,3
8,-43.4,146.9,2018,4,20,1,28,15.3,1.3,298.4,1
9,-43.4,146.9,2018,5,1,2,69,100.4,2.9,311.6,4


In [11]:
fire_copy.nunique()

latitude             328
longitude            403
year                   6
month                 12
day                   31
fire_count           110
confidence           101
frp                 5289
est_fire_area         87
est_brightness       939
est_fire_happend       5
dtype: int64

In [12]:
fire_copy = fire_copy.sort_values(by = ['latitude', 'longitude'])
fire_copy["year"]

0         2018
1         2020
2         2020
3         2015
4         2016
          ... 
428766    2020
428767    2020
428768    2018
428769    2018
428770    2018
Name: year, Length: 428771, dtype: int64

# Model Training

<ul>
    <li>Split the dataset into train,validate,test sets</li>
    <li><b>Import lightgbm model</b>
        Why Lightgbm?  LightGBM can handle a large amount of data, less memory usage, has parallel and GPU learning, good accuracy, faster training speed and efficiency. So what makes LightGBM a better model
        </li>
    <li><b>Set hyper parameters</b>Multiclass classification method has used</li>
    <li>Train the model</li>
    <li>Check the accuracy of the model using test data and save the model</li>
    
</ul>

In [13]:
train = fire_copy[fire_copy.year < 2019].dropna()
valid = fire_copy[(fire_copy.year >= 2019) & (fire_copy.year < 2020)]
test = fire_copy[fire_copy.year == 2020]

train.to_csv('australia_fire_train.csv', index=False)
valid.to_csv('australia_fire_valid.csv', index=False)
test.to_csv('australia_fire_test.csv', index=False)

In [14]:
!pip install lightgbm

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [15]:
train.shape
test.shape
valid.shape

(303652, 11)

(49128, 11)

(75991, 11)

In [16]:
import lightgbm as lgb
from sklearn import metrics

In [17]:
features = [
    'latitude', 'longitude', 'year', 'month','day',
    'est_fire_area','est_brightness','frp','fire_count',
    'confidence'
    
]

In [18]:
train.columns

Index(['latitude', 'longitude', 'year', 'month', 'day', 'fire_count',
       'confidence', 'frp', 'est_fire_area', 'est_brightness',
       'est_fire_happend'],
      dtype='object')

In [19]:
train_data = lgb.Dataset(train[features], label=train.est_fire_happend)
valid_data = lgb.Dataset(valid[features], label=valid.est_fire_happend)

In [20]:
parameters = {'num_leaves': 10, 'max_depth': 10, 'objective': 'multiclass',
              'num_class':5, 'metric': 'multi_logloss','learning_rate':0.03,
             'boosting_type':'gbdt'}
num_round = 500

In [21]:
model = lgb.train(parameters, train_data, num_round, valid_sets=[valid_data],
                  early_stopping_rounds=20, verbose_eval=50)



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1298
[LightGBM] [Info] Number of data points in the train set: 303652, number of used features: 10
[LightGBM] [Info] Start training from score -4.658439
[LightGBM] [Info] Start training from score -2.891888
[LightGBM] [Info] Start training from score -1.404546
[LightGBM] [Info] Start training from score -1.043334
[LightGBM] [Info] Start training from score -1.086810
Training until validation scores don't improve for 20 rounds
[50]	valid_0's multi_logloss: 0.282063
[100]	valid_0's multi_logloss: 0.204394
[150]	valid_0's multi_logloss: 0.188961
[200]	valid_0's multi_logloss: 0.181956
[250]	valid_0's multi_logloss: 0.177837
Early stopping, best iteration is:
[268]	valid_0's multi_logloss: 0.176812


In [22]:
test_predictions = model.predict(test[features])


In [23]:
test_auc = metrics.roc_auc_score(test.est_fire_happend, test_predictions,multi_class='ovr')
test_auc

0.9877411141564709

In [24]:
model.save_model('lightgbm_model_latest.txt')

<lightgbm.basic.Booster at 0x7f69357aa760>

In [25]:
test_predictions


array([[2.45729594e-02, 9.72054288e-01, 2.36147775e-03, 7.96428880e-04,
        2.14845803e-04],
       [2.99115998e-02, 1.15122279e-04, 1.34243350e-03, 9.68534101e-01,
        9.67438797e-05],
       [1.94044737e-02, 2.42410392e-02, 5.78471793e-01, 3.76780999e-01,
        1.10169577e-03],
       ...,
       [1.73627100e-03, 2.85265230e-04, 9.97476504e-01, 4.57291134e-04,
        4.46683642e-05],
       [3.32642586e-03, 9.95602358e-01, 8.52716196e-04, 1.56051120e-04,
        6.24488275e-05],
       [5.00605891e-03, 9.93810952e-01, 9.67102395e-04, 1.53890933e-04,
        6.19955359e-05]])

In [26]:
sample = test.copy()
sample

Unnamed: 0,latitude,longitude,year,month,day,fire_count,confidence,frp,est_fire_area,est_brightness,est_fire_happend
1,-43.5,146.8,2020,3,27,1,0,33.6,1.2,306.1,1
2,-43.5,146.9,2020,3,18,1,100,0.0,1.7,327.8,3
13,-43.4,146.9,2020,3,27,2,32,95.8,1.3,320.6,2
14,-43.4,146.9,2020,3,28,4,75,91.0,4.2,309.4,4
87,-43.2,147.9,2020,4,9,1,0,12.4,1.2,300.0,1
...,...,...,...,...,...,...,...,...,...,...,...
428727,-10.2,142.3,2020,9,22,2,72,13.1,1.3,312.3,4
428752,-10.1,142.2,2020,10,2,1,70,14.7,1.0,314.7,3
428761,-9.4,142.6,2020,10,24,1,41,11.7,1.3,310.2,2
428766,-9.4,142.7,2020,11,12,1,0,19.5,2.3,306.6,1


In [27]:
test.shape
test_predictions = model.predict(test[features])
test_predictions.shape
test

(49128, 11)

(49128, 5)

Unnamed: 0,latitude,longitude,year,month,day,fire_count,confidence,frp,est_fire_area,est_brightness,est_fire_happend
1,-43.5,146.8,2020,3,27,1,0,33.6,1.2,306.1,1
2,-43.5,146.9,2020,3,18,1,100,0.0,1.7,327.8,3
13,-43.4,146.9,2020,3,27,2,32,95.8,1.3,320.6,2
14,-43.4,146.9,2020,3,28,4,75,91.0,4.2,309.4,4
87,-43.2,147.9,2020,4,9,1,0,12.4,1.2,300.0,1
...,...,...,...,...,...,...,...,...,...,...,...
428727,-10.2,142.3,2020,9,22,2,72,13.1,1.3,312.3,4
428752,-10.1,142.2,2020,10,2,1,70,14.7,1.0,314.7,3
428761,-9.4,142.6,2020,10,24,1,41,11.7,1.3,310.2,2
428766,-9.4,142.7,2020,11,12,1,0,19.5,2.3,306.6,1


In [28]:
prediction = pd.DataFrame(test_predictions)
prediction

Unnamed: 0,0,1,2,3,4
0,0.024573,0.972054,0.002361,0.000796,0.000215
1,0.029912,0.000115,0.001342,0.968534,0.000097
2,0.019404,0.024241,0.578472,0.376781,0.001102
3,0.001452,0.000278,0.000645,0.037544,0.960081
4,0.022063,0.974574,0.002809,0.000405,0.000150
...,...,...,...,...,...
49123,0.000132,0.000049,0.000475,0.004936,0.994407
49124,0.003469,0.000057,0.000659,0.995704,0.000111
49125,0.001736,0.000285,0.997477,0.000457,0.000045
49126,0.003326,0.995602,0.000853,0.000156,0.000062


In [29]:
prediction[0] = prediction[0].round().astype(int)
prediction[1] = prediction[1].round().astype(int)
prediction[2] = prediction[2].round().astype(int)
prediction[3] = prediction[3].round().astype(int)
prediction[4] = prediction[4].round().astype(int)
prediction


Unnamed: 0,0,1,2,3,4
0,0,1,0,0,0
1,0,0,0,1,0
2,0,0,1,0,0
3,0,0,0,0,1
4,0,1,0,0,0
...,...,...,...,...,...
49123,0,0,0,0,1
49124,0,0,0,1,0
49125,0,0,1,0,0
49126,0,1,0,0,0


In [30]:
def prediction_handle(prediction): #prediction = row
    if (prediction[0] == 1):
        return 0
    elif (prediction[1] == 1):
        return 1
    elif (prediction[2] == 1):
        return 2
    elif (prediction[3] == 1):
        return 3
    elif (prediction[4] == 1):
        return 4
    else:
        return 0
    
      
    

In [31]:
prediction['prediction'] = prediction.apply (lambda row: prediction_handle(row), axis=1)
prediction

Unnamed: 0,0,1,2,3,4,prediction
0,0,1,0,0,0,1
1,0,0,0,1,0,3
2,0,0,1,0,0,2
3,0,0,0,0,1,4
4,0,1,0,0,0,1
...,...,...,...,...,...,...
49123,0,0,0,0,1,4
49124,0,0,0,1,0,3
49125,0,0,1,0,0,2
49126,0,1,0,0,0,1


In [32]:
sample

Unnamed: 0,latitude,longitude,year,month,day,fire_count,confidence,frp,est_fire_area,est_brightness,est_fire_happend
1,-43.5,146.8,2020,3,27,1,0,33.6,1.2,306.1,1
2,-43.5,146.9,2020,3,18,1,100,0.0,1.7,327.8,3
13,-43.4,146.9,2020,3,27,2,32,95.8,1.3,320.6,2
14,-43.4,146.9,2020,3,28,4,75,91.0,4.2,309.4,4
87,-43.2,147.9,2020,4,9,1,0,12.4,1.2,300.0,1
...,...,...,...,...,...,...,...,...,...,...,...
428727,-10.2,142.3,2020,9,22,2,72,13.1,1.3,312.3,4
428752,-10.1,142.2,2020,10,2,1,70,14.7,1.0,314.7,3
428761,-9.4,142.6,2020,10,24,1,41,11.7,1.3,310.2,2
428766,-9.4,142.7,2020,11,12,1,0,19.5,2.3,306.6,1


In [33]:
pred = prediction['prediction']

In [34]:
pred

0        1
1        3
2        2
3        4
4        1
        ..
49123    4
49124    3
49125    2
49126    1
49127    1
Name: prediction, Length: 49128, dtype: int64

In [35]:
sample

Unnamed: 0,latitude,longitude,year,month,day,fire_count,confidence,frp,est_fire_area,est_brightness,est_fire_happend
1,-43.5,146.8,2020,3,27,1,0,33.6,1.2,306.1,1
2,-43.5,146.9,2020,3,18,1,100,0.0,1.7,327.8,3
13,-43.4,146.9,2020,3,27,2,32,95.8,1.3,320.6,2
14,-43.4,146.9,2020,3,28,4,75,91.0,4.2,309.4,4
87,-43.2,147.9,2020,4,9,1,0,12.4,1.2,300.0,1
...,...,...,...,...,...,...,...,...,...,...,...
428727,-10.2,142.3,2020,9,22,2,72,13.1,1.3,312.3,4
428752,-10.1,142.2,2020,10,2,1,70,14.7,1.0,314.7,3
428761,-9.4,142.6,2020,10,24,1,41,11.7,1.3,310.2,2
428766,-9.4,142.7,2020,11,12,1,0,19.5,2.3,306.6,1


In [36]:
sample['pred2'] = prediction['prediction'].values

In [37]:
sample

Unnamed: 0,latitude,longitude,year,month,day,fire_count,confidence,frp,est_fire_area,est_brightness,est_fire_happend,pred2
1,-43.5,146.8,2020,3,27,1,0,33.6,1.2,306.1,1,1
2,-43.5,146.9,2020,3,18,1,100,0.0,1.7,327.8,3,3
13,-43.4,146.9,2020,3,27,2,32,95.8,1.3,320.6,2,2
14,-43.4,146.9,2020,3,28,4,75,91.0,4.2,309.4,4,4
87,-43.2,147.9,2020,4,9,1,0,12.4,1.2,300.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
428727,-10.2,142.3,2020,9,22,2,72,13.1,1.3,312.3,4,4
428752,-10.1,142.2,2020,10,2,1,70,14.7,1.0,314.7,3,3
428761,-9.4,142.6,2020,10,24,1,41,11.7,1.3,310.2,2,2
428766,-9.4,142.7,2020,11,12,1,0,19.5,2.3,306.6,1,1
