# Creating wildfire dataset for any country using global satellite data #
## We are using country USA and state California as our target country and region in this example ##

## Resources: ##
    - https://medium.com/ibm-data-ai/predicting-australian-wildfires-with-weather-forecast-data-8d1cc983c863
    - https://github.com/Call-for-Code/Spot-Challenge-Wildfires
    - https://h2o.ai/wildfire/
    - https://github.com/h2oai/challenge-wildfires/blob/main/notebook/DataPreparation.ipynb
    - https://github.com/mapbox/mapboxgl-jupyter
    - https://www.bigendiandata.com/2017-06-27-Mapping_in_Jupyter/

![modpas-nasa](https://github.com/prodramp/wildfire/blob/main/images/modaps-nasa.png?raw=true)

## Data Collection ##
- Please Visit https://firms.modaps.eosdis.nasa.gov/download/ to download both MODIS (2000-2020) and VIIRS (2012-2020) datasets based on each country wildfire data
  - MODIS - https://firms.modaps.eosdis.nasa.gov/country/
  - VIIRS - https://firms.modaps.eosdis.nasa.gov/country/
  - Active Fire (24 Hours, 48 Hours, 7 days)-  https://firms.modaps.eosdis.nasa.gov/usfs/active_fire/


<div style="max-width:400px;">
    <img src="https://github.com/prodramp/wildfire/blob/main/images/disclaimer.png?raw=true" size="400px"/>
</div>

In [1]:
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
import random
InteractiveShell.ast_node_interactivity = "all"
import os
from datetime import datetime
import plotly.express as px
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
!ls -l ../

total 8
-rw-r--r--   1 avkash  staff  1044 Mar 26 15:53 README.md
drwxr-xr-x@ 19 avkash  staff   608 Mar 27 08:03 [1m[36mdataset[m[m
drwxr-xr-x   6 avkash  staff   192 Mar 26 08:03 [1m[36mimages[m[m
drwxr-xr-x  10 avkash  staff   320 Mar 27 08:20 [1m[36mml[m[m
drwxr-xr-x@  5 avkash  staff   160 Mar 25 15:32 [1m[36mwildfire-data[m[m
drwxr-xr-x   9 avkash  staff   288 Mar 26 14:35 [1m[36mwildfire-data-streamlit[m[m
drwxr-xr-x@ 24 avkash  staff   768 Mar 25 15:29 [1m[36mzips_modis[m[m
drwxr-xr-x@ 13 avkash  staff   416 Mar 25 15:26 [1m[36mzips_viirs[m[m


In [3]:
! ls -lh ../wildfire-data

total 0
drwxr-xr-x@ 24 avkash  staff   768B Mar 25 15:28 [1m[36mmodis[m[m
drwxr-xr-x@ 13 avkash  staff   416B Mar 25 15:26 [1m[36mviirs-snpp[m[m


In [4]:
! ls -lh ../wildfire-data/modis

total 0
drwxr-xr-x@ 165 avkash  staff   5.2K Feb 12  2019 [1m[36m2000[m[m
drwxr-xr-x@ 199 avkash  staff   6.2K Nov 16 09:06 [1m[36m2001[m[m
drwxr-xr-x@ 201 avkash  staff   6.3K Nov 16 09:06 [1m[36m2002[m[m
drwxr-xr-x@ 206 avkash  staff   6.4K Nov 16 09:06 [1m[36m2003[m[m
drwxr-xr-x@ 204 avkash  staff   6.4K Nov 16 09:06 [1m[36m2004[m[m
drwxr-xr-x@ 210 avkash  staff   6.6K Nov 16 09:06 [1m[36m2005[m[m
drwxr-xr-x@ 204 avkash  staff   6.4K Nov 16 09:06 [1m[36m2006[m[m
drwxr-xr-x@ 210 avkash  staff   6.6K Nov 16 09:06 [1m[36m2007[m[m
drwxr-xr-x@ 207 avkash  staff   6.5K Nov 16 09:06 [1m[36m2008[m[m
drwxr-xr-x@ 207 avkash  staff   6.5K Nov 16 09:06 [1m[36m2009[m[m
drwxr-xr-x@ 205 avkash  staff   6.4K Nov 16 09:06 [1m[36m2010[m[m
drwxr-xr-x@ 203 avkash  staff   6.3K Nov 16 09:06 [1m[36m2011[m[m
drwxr-xr-x@ 206 avkash  staff   6.4K Nov 16 09:06 [1m[36m2012[m[m
drwxr-xr-x@ 209 avkash  staff   6.5K Nov 16 09:06 [1m[36m2013[m[

In [5]:
! ls -lh ../wildfire-data/viirs-snpp

total 0
drwxr-xr-x@ 222 avkash  staff   6.9K Nov 16 09:04 [1m[36m2012[m[m
drwxr-xr-x@ 225 avkash  staff   7.0K Nov 16 09:04 [1m[36m2013[m[m
drwxr-xr-x@ 216 avkash  staff   6.8K Nov 16 09:04 [1m[36m2014[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Nov 16 09:04 [1m[36m2015[m[m
drwxr-xr-x@ 222 avkash  staff   6.9K Nov 16 09:04 [1m[36m2016[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Nov 16 09:04 [1m[36m2017[m[m
drwxr-xr-x@ 221 avkash  staff   6.9K Nov 16 09:04 [1m[36m2018[m[m
drwxr-xr-x@ 219 avkash  staff   6.8K Nov 16 09:04 [1m[36m2019[m[m
drwxr-xr-x@ 220 avkash  staff   6.9K Nov 16 09:04 [1m[36m2020[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Jan 27 05:18 [1m[36m2021[m[m


In [126]:
all_csv_files = glob.glob('../wildfire-data/**/*.csv', recursive=True)

In [127]:
all_csv_files

['../wildfire-data/MODIS-01012022-03252022-United_States.csv',
 '../wildfire-data/VIIRS-SUOMI-01012022-03252022-United_States.csv',
 '../wildfire-data/VIIRS-J1-01012022-03252022-United_States.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Reunion.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Barbados.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Greenland.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Burundi.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Nepal.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Moldova.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Papua_New_Guinea.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Cook_Islands.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Belgium.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Ethiopia.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Cuba.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Democratic_Republic_of_

In [128]:
len(all_csv_files)

6434

In [129]:
type(all_csv_files)

list

In [130]:
all_us_files = list(filter(lambda k: 'United_States' in k, all_csv_files))

In [131]:
all_us_files

['../wildfire-data/MODIS-01012022-03252022-United_States.csv',
 '../wildfire-data/VIIRS-SUOMI-01012022-03252022-United_States.csv',
 '../wildfire-data/VIIRS-J1-01012022-03252022-United_States.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_United_States.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_United_States.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States_Minor_Outlying_Islands.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States_Minor_Outlying_Islands.csv',
 '../wildfire-data/viirs-snpp/2017/viirs-snpp_2017_United_States.csv',
 

In [132]:
all_us_files = list(filter(lambda k: 'United_States.csv' in k, all_csv_files))

In [133]:
all_us_files

['../wildfire-data/MODIS-01012022-03252022-United_States.csv',
 '../wildfire-data/VIIRS-SUOMI-01012022-03252022-United_States.csv',
 '../wildfire-data/VIIRS-J1-01012022-03252022-United_States.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_United_States.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_United_States.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States.csv',
 '../wildfire-data/viirs-snpp/2017/viirs-snpp_2017_United_States.csv',
 '../wildfire-data/viirs-snpp/2019/viirs-snpp_2019_United_States.csv',
 '../wildfire-data/viirs-snpp/2021/viirs-snpp_2021_United_States.csv',
 '../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_United_States.csv',
 '../wildfire-data/viirs-snpp/2018/viirs-snpp_2018_United_States.csv',
 '../wildfire-data/viirs-snpp/2016/viirs-snpp_2016_United_States.csv',
 '../wildfire-data/modis/2013/modis_2013_United_States.csv',
 '../wildfire-data/modis/2014/m

In [134]:
df_viirs__su_2022 = pd.read_csv('../wildfire-data/VIIRS-SUOMI-01012022-03252022-United_States.csv')

In [135]:
df_viirs__su_2022

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,33.44813,-88.56963,309.21,0.48,0.40,2022-01-01,726,N,VIIRS,n,2.0NRT,290.56,1.52,N
1,32.60958,-82.26559,302.85,0.41,0.37,2022-01-01,726,N,VIIRS,n,2.0NRT,286.26,0.76,N
2,33.99229,-88.47186,317.08,0.49,0.40,2022-01-01,726,N,VIIRS,n,2.0NRT,284.51,1.85,N
3,33.99599,-88.47121,319.58,0.49,0.40,2022-01-01,726,N,VIIRS,n,2.0NRT,284.82,1.85,N
4,33.01337,-79.88333,306.75,0.50,0.41,2022-01-01,726,N,VIIRS,n,2.0NRT,288.94,1.42,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87924,19.41079,-155.27417,367.00,0.49,0.65,2022-03-25,2248,N,VIIRS,h,2.0NRT,325.02,251.28,D
87925,19.40383,-155.27739,367.00,0.49,0.65,2022-03-25,2248,N,VIIRS,h,2.0NRT,321.21,124.47,D
87926,19.40281,-155.28198,367.00,0.49,0.65,2022-03-25,2248,N,VIIRS,h,2.0NRT,327.60,106.01,D
87927,19.40777,-155.28777,343.11,0.49,0.65,2022-03-25,2248,N,VIIRS,n,2.0NRT,301.96,107.44,D


In [136]:
df_viirs_j1_2022 = pd.read_csv('../wildfire-data/VIIRS-J1-01012022-03252022-United_States.csv')

In [137]:
df_viirs_j1_2022

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,35.59524,-79.47195,297.12,0.54,0.51,2022-01-01,631,1,VIIRS,n,2.0NRT,267.70,0.76,N
1,28.29930,-82.19985,303.28,0.33,0.55,2022-01-01,633,1,VIIRS,n,2.0NRT,287.66,0.15,N
2,28.40821,-82.21400,298.01,0.33,0.55,2022-01-01,633,1,VIIRS,n,2.0NRT,287.89,0.25,N
3,28.70181,-82.04972,306.11,0.32,0.55,2022-01-01,633,1,VIIRS,n,2.0NRT,288.52,0.51,N
4,31.15173,-87.98898,304.21,0.77,0.78,2022-01-01,633,1,VIIRS,n,2.0NRT,287.80,1.96,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89694,19.41052,-155.28645,352.58,0.39,0.36,2022-03-25,2337,1,VIIRS,n,2.0NRT,314.70,136.79,D
89695,19.40829,-155.27849,367.00,0.39,0.36,2022-03-25,2337,1,VIIRS,h,2.0NRT,345.27,57.33,D
89696,19.40110,-155.28111,348.82,0.39,0.36,2022-03-25,2337,1,VIIRS,n,2.0NRT,316.51,50.50,D
89697,19.40443,-155.28163,351.30,0.39,0.36,2022-03-25,2337,1,VIIRS,l,2.0NRT,338.97,50.50,D


In [138]:
df_modis_2022 = pd.read_csv('../wildfire-data/MODIS-01012022-03252022-United_States.csv')

In [139]:
df_modis_2022

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,19.41736,-155.27985,333.19,1.39,1.17,2022-01-01,1223,Aqua,MODIS,100,6.1NRT,290.13,57.45,N
1,19.39973,-155.27629,301.55,1.40,1.17,2022-01-01,1223,Aqua,MODIS,28,6.1NRT,286.69,9.62,N
2,19.40192,-155.28940,309.17,1.39,1.17,2022-01-01,1223,Aqua,MODIS,77,6.1NRT,286.69,17.35,N
3,19.41009,-155.27425,391.88,1.40,1.17,2022-01-01,1223,Aqua,MODIS,100,6.1NRT,299.67,340.62,N
4,19.41229,-155.28754,420.54,1.39,1.17,2022-01-01,1223,Aqua,MODIS,100,6.1NRT,298.81,651.82,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26458,19.40477,-155.29367,409.81,1.00,1.00,2022-03-25,2346,Aqua,MODIS,100,6.1NRT,325.46,301.59,D
26459,19.41502,-155.28574,327.75,1.00,1.00,2022-03-25,2346,Aqua,MODIS,55,6.1NRT,311.22,14.56,D
26460,19.39722,-155.28285,372.80,1.00,1.00,2022-03-25,2346,Aqua,MODIS,100,6.1NRT,315.68,111.31,D
26461,19.39587,-155.29224,339.47,1.00,1.00,2022-03-25,2346,Aqua,MODIS,89,6.1NRT,311.52,26.62,D


In [140]:
df_viirs = pd.read_csv('../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_United_States.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [141]:
df_viirs = pd.read_csv('../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_United_States.csv', low_memory=False)

In [142]:
df_viirs

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type
0,46.730213,-92.075386,303.31,0.56,0.52,2020-01-01,727,N,VIIRS,n,1,264.04,1.43,N,3
1,46.729069,-92.077477,301.76,0.56,0.52,2020-01-01,727,N,VIIRS,n,1,263.05,1.27,N,2
2,40.721516,-84.126244,323.46,0.39,0.36,2020-01-01,728,N,VIIRS,n,1,268.47,1.80,N,2
3,41.639717,-87.130882,300.05,0.45,0.39,2020-01-01,728,N,VIIRS,n,1,268.17,1.23,N,3
4,41.619049,-87.328514,297.25,0.46,0.39,2020-01-01,728,N,VIIRS,n,1,268.34,0.82,N,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
672445,19.409372,-155.295761,332.17,0.49,0.49,2020-12-31,2306,N,VIIRS,n,1,290.31,3.08,1,0
672446,19.402180,-155.285187,367.00,0.49,0.49,2020-12-31,2306,N,VIIRS,l,1,286.75,26.13,1,0
672447,19.404821,-155.271347,350.90,0.49,0.49,2020-12-31,2306,N,VIIRS,n,1,288.63,67.31,1,0
672448,19.399593,-155.298752,326.91,0.49,0.49,2020-12-31,2306,N,VIIRS,n,1,285.83,4.16,1,0


In [143]:
df_viirs['satellite'].unique()

array(['N'], dtype=object)

In [144]:
df_viirs['instrument'].unique()

array(['VIIRS'], dtype=object)

In [145]:
type(df_viirs.confidence[0])

str

In [146]:
isinstance(df_viirs.confidence[0], str)

True

In [147]:
df_viirs.confidence.unique()

array(['n', 'h', 'l'], dtype=object)

In [148]:
df_modis = pd.read_csv('../wildfire-data/modis/2020/modis_2020_United_States.csv')

In [149]:
df_modis

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,31.8349,-83.0561,306.7,1.4,1.2,2020-01-01,324,Terra,MODIS,69,6.03,277.6,18.9,N,0
1,36.3562,-76.8050,300.2,1.2,1.1,2020-01-01,325,Terra,MODIS,20,6.03,278.9,10.2,N,0
2,33.4178,-110.8616,321.5,1.0,1.0,2020-01-01,920,Aqua,MODIS,100,6.03,271.8,24.9,N,2
3,41.4810,-90.8294,310.0,1.0,1.0,2020-01-01,1704,Terra,MODIS,69,6.03,276.5,13.8,D,0
4,38.6973,-90.1281,311.7,1.1,1.0,2020-01-01,1705,Terra,MODIS,54,6.03,282.3,14.1,D,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153843,38.7099,-122.8631,316.1,1.1,1.1,2020-12-31,2131,Aqua,MODIS,76,6.03,285.8,18.6,D,0
153844,39.7121,-122.2936,300.4,1.2,1.1,2020-12-31,2131,Aqua,MODIS,35,6.03,286.2,5.6,D,0
153845,37.7988,-121.1489,301.3,1.3,1.1,2020-12-31,2131,Aqua,MODIS,46,6.03,281.4,8.6,D,0
153846,39.1593,-122.9303,308.7,1.1,1.1,2020-12-31,2131,Aqua,MODIS,67,6.03,283.3,13.1,D,0


In [150]:
df_modis['satellite'].unique()

array(['Terra', 'Aqua'], dtype=object)

In [151]:
df_modis['instrument'].unique()

array(['MODIS'], dtype=object)

In [152]:
type(df_modis['confidence'][0])

numpy.int64

In [153]:
df_modis['confidence'].unique()

array([ 69,  20, 100,  54,  38,  60,   0,  70,  78,  46,  57,  83,  82,
        74,  79,  86,  53,  72,  41,  35,  73,  66,  84,  59,  55,  31,
        45,  50,  51,  47,  71,  63,  24,  36,  48,  80,  62,  49,  65,
        75,  67,  52,  58,  56,  93,  94,  64,  68,  61,  85,  90,  81,
        40,  88,  42,  97,  96,  33,  91,  87,  77,  76,  39,  44,  95,
        37,  89,  98,  30,  99,  21,  92,  29,  16,  17,  26,  32,  34,
        43,  25,  22,  19,  28,   9,  27,  23,  15,  13,  14,  18,  11,
        10,   8,  12,   6,   4,   7,   3,   5,   2,   1])

In [154]:
# Taken this idea from the H2O.ai wild fire competition github repo and changed it meet the local need
rows = []
fire_df = []
row_sum = 0
for f in tqdm(all_us_files):
    df = pd.read_csv(f, parse_dates=['acq_time'], low_memory=False) 
    csv_name = f.split('/')[-1]
    row = [
        f, csv_name, df.shape[0], df.shape[1], df.acq_date.min(), df.acq_date.max(),
        df.satellite.unique(), df.instrument.max(), df.version.max(),
        df.latitude.nunique(), df.longitude.nunique(),
        df.confidence.nunique(), df.satellite.nunique(), df.acq_date.nunique()
    ]
    if isinstance(df.confidence[0], str):
        df.confidence = df.confidence.replace({'l': 0, 'n': 50, 'h': 100})
    rows.append(row)
    row_sum = row_sum + df.shape[0]
    fire_df.append(df)
cols = [
    'path', 'csv', 'rows', 'cols', 'start', 'end',
    'satellite', 'instrument', 'version',
    'lats', 'lons', 'confs', 'sats', 'days'
]
filestats = pd.DataFrame(rows, columns=cols)
filestats.sort_values(by=['start', 'instrument'])
print("Total Rows: " + str(row_sum))

100%|███████████████████████████████████████████████████| 34/34 [00:12<00:00,  2.71it/s]


Unnamed: 0,path,csv,rows,cols,start,end,satellite,instrument,version,lats,lons,confs,sats,days
21,../wildfire-data/modis/2000/modis_2000_United_...,modis_2000_United_States.csv,3781,15,2000-11-01,2000-12-31,[Terra],MODIS,6.2,3720,3712,96,1,61
18,../wildfire-data/modis/2001/modis_2001_United_...,modis_2001_United_States.csv,44941,15,2001-01-01,2001-12-31,[Terra],MODIS,6.2,37881,40596,101,1,347
31,../wildfire-data/modis/2002/modis_2002_United_...,modis_2002_United_States.csv,79715,15,2002-01-01,2002-12-31,"[Terra, Aqua]",MODIS,6.2,61851,68094,101,2,355
32,../wildfire-data/modis/2003/modis_2003_United_...,modis_2003_United_States.csv,114471,15,2003-01-01,2003-12-31,"[Terra, Aqua]",MODIS,6.2,81630,93777,101,2,365
33,../wildfire-data/modis/2004/modis_2004_United_...,modis_2004_United_States.csv,158385,15,2004-01-01,2004-12-31,"[Terra, Aqua]",MODIS,6.2,96626,121487,101,2,365
30,../wildfire-data/modis/2005/modis_2005_United_...,modis_2005_United_States.csv,171160,15,2005-01-01,2005-12-31,"[Terra, Aqua]",MODIS,6.2,110486,135665,101,2,365
19,../wildfire-data/modis/2006/modis_2006_United_...,modis_2006_United_States.csv,126737,15,2006-01-01,2006-12-31,"[Terra, Aqua]",MODIS,6.2,88916,103617,101,2,365
20,../wildfire-data/modis/2007/modis_2007_United_...,modis_2007_United_States.csv,142420,15,2007-01-01,2007-12-31,"[Terra, Aqua]",MODIS,6.2,96108,109694,101,2,365
17,../wildfire-data/modis/2008/modis_2008_United_...,modis_2008_United_States.csv,119797,15,2008-01-01,2008-12-31,"[Terra, Aqua]",MODIS,6.2,83666,96357,101,2,366
22,../wildfire-data/modis/2009/modis_2009_United_...,modis_2009_United_States.csv,115174,15,2009-01-01,2009-12-31,"[Terra, Aqua]",MODIS,6.2,83904,96681,101,2,365


Total Rows: 7884687


In [156]:
## DO NOT DELETE - Help us to make sure we have 2022 data
## Note without 2022 data we had the following records count 
##  (7680596, 17)

In [157]:
fire_df

[       latitude  longitude  brightness  scan  track    acq_date acq_time  \
 0      19.41736 -155.27985      333.19  1.39   1.17  2022-01-01     1223   
 1      19.39973 -155.27629      301.55  1.40   1.17  2022-01-01     1223   
 2      19.40192 -155.28940      309.17  1.39   1.17  2022-01-01     1223   
 3      19.41009 -155.27425      391.88  1.40   1.17  2022-01-01     1223   
 4      19.41229 -155.28754      420.54  1.39   1.17  2022-01-01     1223   
 ...         ...        ...         ...   ...    ...         ...      ...   
 26458  19.40477 -155.29367      409.81  1.00   1.00  2022-03-25     2346   
 26459  19.41502 -155.28574      327.75  1.00   1.00  2022-03-25     2346   
 26460  19.39722 -155.28285      372.80  1.00   1.00  2022-03-25     2346   
 26461  19.39587 -155.29224      339.47  1.00   1.00  2022-03-25     2346   
 26462  19.40612 -155.28429      452.11  1.00   1.00  2022-03-25     2346   
 
       satellite instrument  confidence version  bright_t31     frp daynig

In [158]:
us_fire_df = pd.concat(fire_df)
us_fire_df.shape

(7884687, 17)

In [159]:
us_fire_df

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,bright_ti4,bright_ti5,type
0,19.41736,-155.27985,333.19,1.39,1.17,2022-01-01,1223,Aqua,MODIS,100,6.1NRT,290.13,57.45,N,,,
1,19.39973,-155.27629,301.55,1.40,1.17,2022-01-01,1223,Aqua,MODIS,28,6.1NRT,286.69,9.62,N,,,
2,19.40192,-155.28940,309.17,1.39,1.17,2022-01-01,1223,Aqua,MODIS,77,6.1NRT,286.69,17.35,N,,,
3,19.41009,-155.27425,391.88,1.40,1.17,2022-01-01,1223,Aqua,MODIS,100,6.1NRT,299.67,340.62,N,,,
4,19.41229,-155.28754,420.54,1.39,1.17,2022-01-01,1223,Aqua,MODIS,100,6.1NRT,298.81,651.82,N,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158380,34.16460,-100.13540,336.30,1.60,1.30,2004-12-31,2013,Aqua,MODIS,90,6.2,290.40,73.70,D,,,0.0
158381,34.16480,-100.12830,345.50,1.60,1.30,2004-12-31,2013,Aqua,MODIS,94,6.2,289.70,108.20,D,,,0.0
158382,34.16630,-100.11770,315.70,1.60,1.30,2004-12-31,2013,Aqua,MODIS,75,6.2,287.80,25.10,D,,,0.0
158383,34.16300,-100.14590,322.90,1.60,1.30,2004-12-31,2013,Aqua,MODIS,82,6.2,288.70,41.00,D,,,0.0


In [160]:
us_fire_df['acq_date'] = pd.to_datetime(us_fire_df['acq_date'])

In [161]:
us_fire_df['acq_date'].min()

Timestamp('2000-11-01 00:00:00')

## Now we have USA wildfire data until March 25th 2022

In [163]:
us_fire_df['acq_date'].max()

Timestamp('2022-03-25 00:00:00')

In [164]:
us_fire_df = us_fire_df.sort_values(by=['acq_date', 'acq_time'])

In [165]:
us_fire_df

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,bright_ti4,bright_ti5,type
0,38.54220,-78.30470,304.80,2.8,1.6,2000-11-01,0250,Terra,MODIS,23,6.2,280.90,40.30,N,,,0.0
1,38.55630,-78.30840,309.40,2.8,1.6,2000-11-01,0250,Terra,MODIS,70,6.2,280.40,54.50,N,,,0.0
2,38.54510,-78.31070,309.90,2.8,1.6,2000-11-01,0250,Terra,MODIS,79,6.2,280.70,58.80,N,,,0.0
3,38.55860,-78.31700,302.30,2.8,1.6,2000-11-01,0250,Terra,MODIS,45,6.2,279.80,36.00,N,,,0.0
4,31.33930,-89.91240,304.90,1.0,1.0,2000-11-01,0427,Terra,MODIS,62,6.2,287.50,8.50,N,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26458,19.40477,-155.29367,409.81,1.0,1.0,2022-03-25,2346,Aqua,MODIS,100,6.1NRT,325.46,301.59,D,,,
26459,19.41502,-155.28574,327.75,1.0,1.0,2022-03-25,2346,Aqua,MODIS,55,6.1NRT,311.22,14.56,D,,,
26460,19.39722,-155.28285,372.80,1.0,1.0,2022-03-25,2346,Aqua,MODIS,100,6.1NRT,315.68,111.31,D,,,
26461,19.39587,-155.29224,339.47,1.0,1.0,2022-03-25,2346,Aqua,MODIS,89,6.1NRT,311.52,26.62,D,,,


In [166]:
us_fire_df['confidence'].nunique()

101

In [167]:
us_fire_df['confidence'].unique()

array([ 23,  70,  79,  45,  62,  75,  74,  54,  69,  77,  72,  49,  36,
        43,  52,  21,  63,  57,  68,  67,   0,  56,  96,  98, 100,  95,
        42,  73,  91,  92,  41,  26,  53,  88,  81,  51,  61,  59,  60,
        76,  71,  46,  47,  82,  50,  65,  83,  55,  22,  58,  97,  90,
        87,  86,  66,  34,  84,  78,  44,  80,  89,  29,  85,  37,  24,
        64,  94,  48,  31,  18,  32,  93,  33,  39,  19,  10,  30,   4,
        13,   2,  99,  40,  28,  20,  38,  27,  35,  17,   7,  16,  14,
         9,   8,  15,  12,  25,  11,   6,   5,   3,   1])

# Brightness #
- brightness: Channel 21/22 brightness temperature of the fire pixel measured in Kelvin.

In [168]:
us_fire_df['brightness'].describe()

count    2.645844e+06
mean     3.258432e+02
std      2.353386e+01
min      2.079300e+02
25%      3.104000e+02
50%      3.197000e+02
75%      3.342900e+02
max      5.070000e+02
Name: brightness, dtype: float64

## Satellite ##

In [169]:
us_fire_df['satellite'].unique()

array(['Terra', 'Aqua', 'N', 1], dtype=object)

In [170]:
us_fire_df['instrument'].unique()

array(['MODIS', 'VIIRS'], dtype=object)

## bright_t31 ##
- Channel 31 brightness temperature of the fire pixel measured in Kelvin.

In [171]:
us_fire_df['bright_t31'].describe()

count    2.645844e+06
mean     2.949861e+02
std      9.768978e+00
min      2.216200e+02
25%      2.894900e+02
50%      2.948000e+02
75%      3.001000e+02
max      4.001000e+02
Name: bright_t31, dtype: float64

In [172]:
us_fire_df['bright_t31'].unique()

array([280.9 , 280.4 , 280.7 , ..., 345.27, 338.97, 325.46])

## frp ##
- Fire Radiative Power depicts the pixel-integrated fire radiative power in MW (megawatts).

In [173]:
us_fire_df['frp'].describe()

count    7.884687e+06
mean     3.002932e+01
std      1.238268e+02
min     -5.920000e+01
25%      2.560000e+00
50%      7.220000e+00
75%      2.090000e+01
max      1.614640e+04
Name: frp, dtype: float64

## type ##
- Inferred hot spot type
  - 0 = presumed vegetation fire
  - 1 = active volcano
  - 2 = other static land source
  - 3 = offshore

In [174]:
us_fire_df['type'].unique()

array([ 0.,  2.,  3.,  1., nan])

## Confidence ##
- This value is based on a collection of intermediate algorithm quantities used in the detection process. 
- It is intended to help users gauge the quality of individual hotspot/fire pixels. 
- Confidence estimates range between 0 and 100% and are assigned one of the three fire classes
  - low-confidence fire
  - nominal-confidence fire
  - high-confidence fire).

In [175]:
us_fire_df['confidence'].unique()

array([ 23,  70,  79,  45,  62,  75,  74,  54,  69,  77,  72,  49,  36,
        43,  52,  21,  63,  57,  68,  67,   0,  56,  96,  98, 100,  95,
        42,  73,  91,  92,  41,  26,  53,  88,  81,  51,  61,  59,  60,
        76,  71,  46,  47,  82,  50,  65,  83,  55,  22,  58,  97,  90,
        87,  86,  66,  34,  84,  78,  44,  80,  89,  29,  85,  37,  24,
        64,  94,  48,  31,  18,  32,  93,  33,  39,  19,  10,  30,   4,
        13,   2,  99,  40,  28,  20,  38,  27,  35,  17,   7,  16,  14,
         9,   8,  15,  12,  25,  11,   6,   5,   3,   1])

In [176]:
us_fire_df.columns

Index(['latitude', 'longitude', 'brightness', 'scan', 'track', 'acq_date',
       'acq_time', 'satellite', 'instrument', 'confidence', 'version',
       'bright_t31', 'frp', 'daynight', 'bright_ti4', 'bright_ti5', 'type'],
      dtype='object')

In [189]:
us_fire_df.to_csv('usa_daily_fire_2000_march25-2022-raw.csv', index=False)

## Extracting few main feautures from the dataset ## 

In [177]:
daily_fires_df = us_fire_df.groupby(
            ['latitude', 'longitude', 'acq_date', 'satellite', 'instrument']).confidence.max().reset_index()

In [179]:
## Note: Do not delete
## Without 2022 data we have rows/column count as below 
## 7680586 rows × 6 columns

In [59]:
### Note: If you have 'frp', 'type', 'bright_t31' columns to the mix, the 2021 data will be removed  ###
#daily_fires_df = us_fire_df.groupby(
#             ['latitude', 'longitude', 'acq_date', 'satellite', 'instrument', 'frp', 'type', 'bright_t31']).confidence.max().reset_index()

In [180]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,confidence
0,18.919016,-155.676727,2019-10-04,N,VIIRS,50
1,18.919678,-155.680664,2019-10-04,N,VIIRS,50
2,18.921785,-155.672180,2019-10-04,N,VIIRS,50
3,18.922449,-155.676117,2019-10-04,N,VIIRS,50
4,18.923107,-155.680054,2019-10-04,N,VIIRS,50
...,...,...,...,...,...,...
7884672,70.461823,-149.446426,2013-08-01,N,VIIRS,50
7884673,70.462181,-149.440659,2021-07-20,N,VIIRS,50
7884674,70.664764,-159.965317,2017-06-13,N,VIIRS,50
7884675,70.666168,-159.963379,2012-06-15,N,VIIRS,50


In [181]:
daily_fires_df['acq_date'].min()

Timestamp('2000-11-01 00:00:00')

In [182]:
daily_fires_df['acq_date'].max()

Timestamp('2022-03-25 00:00:00')

In [183]:
daily_fires_df['confidence'].unique()

array([ 50,   0,  94,  87, 100,  75,  86,  46,  34,  85,  42,  40,  69,
        23,  74,  60,  55,  56,  53,  70,  47,  82,  27,  25,  66,  72,
        19,  65,  59,  57,  89,  91,  48,  30,  20,  22,  37,  67,  24,
        71,  54,  61,  92,  77,  36,  33,  81,  35,  63,  41,  51,  90,
        45,  98,  32,  21,  58,  78,  68,  38,  95,  73,  29,  44,  99,
        97,  96,  49,  43,  28,  80,  84,  83,  17,   7,  64,  62,  79,
         6,  14,  18,  76,  16,  88,  39,  93,  15,  52,  12,   8,  31,
        13,  11,  26,   5,   9,   2,  10,   4,   3,   1])

In [184]:
daily_fires_df = daily_fires_df[daily_fires_df.confidence >= 50]

In [185]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,confidence
0,18.919016,-155.676727,2019-10-04,N,VIIRS,50
1,18.919678,-155.680664,2019-10-04,N,VIIRS,50
2,18.921785,-155.672180,2019-10-04,N,VIIRS,50
3,18.922449,-155.676117,2019-10-04,N,VIIRS,50
4,18.923107,-155.680054,2019-10-04,N,VIIRS,50
...,...,...,...,...,...,...
7884672,70.461823,-149.446426,2013-08-01,N,VIIRS,50
7884673,70.462181,-149.440659,2021-07-20,N,VIIRS,50
7884674,70.664764,-159.965317,2017-06-13,N,VIIRS,50
7884675,70.666168,-159.963379,2012-06-15,N,VIIRS,50


In [186]:
daily_fires_df['confidence'].unique()

array([ 50,  94,  87, 100,  75,  86,  85,  69,  74,  60,  55,  56,  53,
        70,  82,  66,  72,  65,  59,  57,  89,  91,  67,  71,  54,  61,
        92,  77,  81,  63,  51,  90,  98,  58,  78,  68,  95,  73,  99,
        97,  96,  80,  84,  83,  64,  62,  79,  76,  88,  93,  52])

In [187]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,confidence
0,18.919016,-155.676727,2019-10-04,N,VIIRS,50
1,18.919678,-155.680664,2019-10-04,N,VIIRS,50
2,18.921785,-155.672180,2019-10-04,N,VIIRS,50
3,18.922449,-155.676117,2019-10-04,N,VIIRS,50
4,18.923107,-155.680054,2019-10-04,N,VIIRS,50
...,...,...,...,...,...,...
7884672,70.461823,-149.446426,2013-08-01,N,VIIRS,50
7884673,70.462181,-149.440659,2021-07-20,N,VIIRS,50
7884674,70.664764,-159.965317,2017-06-13,N,VIIRS,50
7884675,70.666168,-159.963379,2012-06-15,N,VIIRS,50


In [190]:
##daily_fires_df.to_csv('usa_daily_fire_2000_2021.csv.gz', index=False, compression='gzip')
daily_fires_df.to_csv('usa_daily_fire_2000_march25-2022-grouped.csv', index=False)

In [191]:
!ls -lah

total 3493048
drwxr-xr-x  17 avkash  staff   544B Mar 27 09:09 [1m[36m.[m[m
drwxr-xr-x  13 avkash  staff   416B Mar 26 22:49 [1m[36m..[m[m
-rw-r--r--@  1 avkash  staff   6.0K Mar 27 08:52 .DS_Store
drwxr-xr-x   7 avkash  staff   224B Mar 27 08:54 [1m[36m.ipynb_checkpoints[m[m
-rw-r--r--   1 avkash  staff    15K Mar 27 08:30 Untitled.ipynb
-rw-r--r--   1 avkash  staff   1.3K Mar 27 08:56 Untitled1.ipynb
-rw-r--r--   1 avkash  staff    54M Mar 27 08:35 ca_daily_fire_2000_2021.csv
-rw-r--r--   1 avkash  staff   3.8M Mar 27 08:36 ca_fire_test.csv
-rw-r--r--   1 avkash  staff    60M Mar 27 08:36 ca_fire_train.csv
-rw-r--r--   1 avkash  staff   7.5M Mar 27 08:36 ca_fire_valid.csv
-rw-r--r--   1 avkash  staff   9.1M Mar 27 08:38 feature-engineering-ca-wildfire-data.ipynb
-rw-r--r--   1 avkash  staff    31K Mar 27 08:37 ml-ca-wildfire-ml-ready-data.ipynb
-rw-r--r--   1 avkash  staff   289M Mar 27 08:29 usa_daily_fire_2000_2021.csv
-rw-r--r--   1 avkash  staff   296M M

In [192]:
daily_fires_df['acq_date'].min()

Timestamp('2000-11-01 00:00:00')

In [193]:
daily_fires_df['acq_date'].max()

Timestamp('2022-03-25 00:00:00')

### California longitude and latitude range ###
- https://www.netstate.com/states/geography/ca_geography.htm
- Longitude: 114° 8' W to 124° 24' W
- Latitude: 32° 30' N to 42° N

In [194]:
CA_LATTITUDE_RANGE = (32, 42)
# We have to change [114, 124] to range as [-124, -114]
CA_LONGITUDE_RANGE = (-124, -114)

In [195]:
ca_daily_fire = daily_fires_df[
        (daily_fires_df.latitude > CA_LATTITUDE_RANGE[0]) & (daily_fires_df.latitude < CA_LATTITUDE_RANGE[1])]

In [196]:
print(ca_daily_fire.shape)
ca_daily_fire.sample(10)

(3621926, 6)


Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,confidence
2764706,34.859035,-86.248192,2012-05-02,N,VIIRS,50
2327018,33.799152,-94.019424,2019-10-12,N,VIIRS,50
4697620,40.18013,-123.056488,2017-10-18,N,VIIRS,50
3330695,36.428822,-82.93972,2020-12-06,N,VIIRS,50
2156085,33.4303,-82.4591,2018-03-22,Terra,MODIS,76
3566919,37.1217,-96.3397,2015-04-02,Terra,MODIS,72
4220862,38.873562,-119.527534,2013-07-05,N,VIIRS,100
5020413,41.000854,-123.467621,2013-08-19,N,VIIRS,100
4724316,40.246071,-93.845245,2021-04-01,N,VIIRS,50
1852874,32.802681,-94.856544,2017-08-10,N,VIIRS,50


In [197]:
ca_daily_fire = ca_daily_fire[
    (ca_daily_fire.longitude > CA_LONGITUDE_RANGE[0]) & (ca_daily_fire.longitude < CA_LONGITUDE_RANGE[1])]

In [198]:
## Note: Do not delete
## Untill DEc 31, 2021 we have the following rows and cols 
## 1102649 rows × 6 columns

In [199]:
ca_daily_fire

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,confidence
1671402,32.353340,-114.768260,2022-03-23,Aqua,MODIS,80
1673606,32.358820,-114.762730,2022-03-24,N,VIIRS,50
1673796,32.359240,-114.765390,2022-03-24,N,VIIRS,50
1674135,32.360030,-114.759670,2022-03-23,N,VIIRS,50
1674268,32.360360,-114.759350,2022-03-23,N,VIIRS,50
...,...,...,...,...,...,...
5476988,41.999947,-120.655807,2012-08-14,N,VIIRS,50
5476989,41.999950,-123.621155,2020-09-20,N,VIIRS,50
5476990,41.999958,-122.592186,2018-07-08,N,VIIRS,50
5476991,41.999969,-120.648338,2012-08-14,N,VIIRS,50


In [200]:
ca_daily_fire['acq_date'].min()

Timestamp('2000-11-01 00:00:00')

In [201]:
ca_daily_fire['acq_date'].max()

Timestamp('2022-03-25 00:00:00')

In [202]:
ca_daily_fire.describe()

Unnamed: 0,latitude,longitude,confidence
count,1116861.0,1116861.0,1116861.0
mean,38.59871,-120.6445,60.10158
std,2.377151,2.234325,18.51666
min,32.35334,-123.9997,50.0
25%,36.88822,-122.7088,50.0
50%,39.14958,-121.0172,50.0
75%,40.528,-119.1556,58.0
max,41.99997,-114.0001,100.0


In [203]:
ca_daily_fire.dtypes

latitude             float64
longitude            float64
acq_date      datetime64[ns]
satellite             object
instrument            object
confidence             int64
dtype: object

In [204]:
ca_daily_fire['acq_date'] = pd.to_datetime(ca_daily_fire['acq_date'])

In [205]:
ca_daily_fire.dtypes

latitude             float64
longitude            float64
acq_date      datetime64[ns]
satellite             object
instrument            object
confidence             int64
dtype: object

In [206]:
ca_daily_fire

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,confidence
1671402,32.353340,-114.768260,2022-03-23,Aqua,MODIS,80
1673606,32.358820,-114.762730,2022-03-24,N,VIIRS,50
1673796,32.359240,-114.765390,2022-03-24,N,VIIRS,50
1674135,32.360030,-114.759670,2022-03-23,N,VIIRS,50
1674268,32.360360,-114.759350,2022-03-23,N,VIIRS,50
...,...,...,...,...,...,...
5476988,41.999947,-120.655807,2012-08-14,N,VIIRS,50
5476989,41.999950,-123.621155,2020-09-20,N,VIIRS,50
5476990,41.999958,-122.592186,2018-07-08,N,VIIRS,50
5476991,41.999969,-120.648338,2012-08-14,N,VIIRS,50


In [207]:
ca_daily_fire['year'] = ca_daily_fire.acq_date.dt.year

In [208]:
ca_daily_fire['month'] = ca_daily_fire.acq_date.dt.month

In [209]:
ca_daily_fire

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,confidence,year,month
1671402,32.353340,-114.768260,2022-03-23,Aqua,MODIS,80,2022,3
1673606,32.358820,-114.762730,2022-03-24,N,VIIRS,50,2022,3
1673796,32.359240,-114.765390,2022-03-24,N,VIIRS,50,2022,3
1674135,32.360030,-114.759670,2022-03-23,N,VIIRS,50,2022,3
1674268,32.360360,-114.759350,2022-03-23,N,VIIRS,50,2022,3
...,...,...,...,...,...,...,...,...
5476988,41.999947,-120.655807,2012-08-14,N,VIIRS,50,2012,8
5476989,41.999950,-123.621155,2020-09-20,N,VIIRS,50,2020,9
5476990,41.999958,-122.592186,2018-07-08,N,VIIRS,50,2018,7
5476991,41.999969,-120.648338,2012-08-14,N,VIIRS,50,2012,8


In [210]:
min_year = ca_daily_fire['acq_date'].min().year
print(min_year)

2000


In [211]:
max_year = ca_daily_fire['acq_date'].max().year
print(max_year)

2022


In [225]:
max_date = ca_daily_fire['acq_date'].max()
print('{:0>2}'.format(max_date.month))
print('{:0>2}'.format(max_date.day))
print(max_date.year)

03
25
2022


In [226]:
target_file_name = "ca_daily_fire_" + str(min_year) + "_" + '{:0>2}'.format(max_date.month) + '{:0>2}'.format(max_date.day) + str(max_date.year) +  ".csv"
print(target_file_name)

ca_daily_fire_2000_03252022.csv


In [227]:
ca_daily_fire.to_csv(target_file_name, index=False)
#ca_daily_fire.to_csv('ca_daily_fire_2012_2020.csv.gz', index=False, compression='gzip')

In [None]:
!ls -lah