# Creating wildfire dataset for any country using global satellite data #
## We are using country USA and state California as our target country and region in this example ##

## Resources: ##
    - https://medium.com/ibm-data-ai/predicting-australian-wildfires-with-weather-forecast-data-8d1cc983c863
    - https://github.com/Call-for-Code/Spot-Challenge-Wildfires
    - https://h2o.ai/wildfire/
    - https://github.com/h2oai/challenge-wildfires/blob/main/notebook/DataPreparation.ipynb
    - https://github.com/mapbox/mapboxgl-jupyter
    - https://www.bigendiandata.com/2017-06-27-Mapping_in_Jupyter/

![modpas-nasa](https://github.com/prodramp/wildfire/blob/main/images/modaps-nasa.png?raw=true)

## Data Collection ##
- Please Visit https://firms.modaps.eosdis.nasa.gov/download/ to download both MODIS (2000-2020) and VIIRS (2012-2020) datasets based on each country wildfire data
  - MODIS - https://firms.modaps.eosdis.nasa.gov/country/
  - VIIRS - https://firms.modaps.eosdis.nasa.gov/country/
  - Active Fire (24 Hours, 48 Hours, 7 days)-  https://firms.modaps.eosdis.nasa.gov/usfs/active_fire/


<div style="max-width:400px;">
    <img src="https://github.com/prodramp/wildfire/blob/main/images/disclaimer.png?raw=true" size="400px"/>
</div>

In [1]:
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
import random
InteractiveShell.ast_node_interactivity = "all"
import os
from datetime import datetime
import plotly.express as px
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
!ls -l ../

total 8
-rw-r--r--   1 avkash  staff   85 Mar 26 07:52 README.md
drwxr-xr-x@ 11 avkash  staff  352 Mar 26 08:29 [1m[36mdataset[m[m
drwxr-xr-x   6 avkash  staff  192 Mar 26 08:03 [1m[36mimages[m[m
drwxr-xr-x@  5 avkash  staff  160 Mar 25 15:32 [1m[36mwildfire-data[m[m
drwxr-xr-x@ 24 avkash  staff  768 Mar 25 15:29 [1m[36mzips_modis[m[m
drwxr-xr-x@ 13 avkash  staff  416 Mar 25 15:26 [1m[36mzips_viirs[m[m


In [3]:
! ls -lh ../wildfire-data

total 0
drwxr-xr-x@ 24 avkash  staff   768B Mar 25 15:28 [1m[36mmodis[m[m
drwxr-xr-x@ 13 avkash  staff   416B Mar 25 15:26 [1m[36mviirs-snpp[m[m


In [4]:
! ls -lh ../wildfire-data/modis

total 0
drwxr-xr-x@ 165 avkash  staff   5.2K Feb 12  2019 [1m[36m2000[m[m
drwxr-xr-x@ 199 avkash  staff   6.2K Nov 16 09:06 [1m[36m2001[m[m
drwxr-xr-x@ 201 avkash  staff   6.3K Nov 16 09:06 [1m[36m2002[m[m
drwxr-xr-x@ 206 avkash  staff   6.4K Nov 16 09:06 [1m[36m2003[m[m
drwxr-xr-x@ 204 avkash  staff   6.4K Nov 16 09:06 [1m[36m2004[m[m
drwxr-xr-x@ 210 avkash  staff   6.6K Nov 16 09:06 [1m[36m2005[m[m
drwxr-xr-x@ 204 avkash  staff   6.4K Nov 16 09:06 [1m[36m2006[m[m
drwxr-xr-x@ 210 avkash  staff   6.6K Nov 16 09:06 [1m[36m2007[m[m
drwxr-xr-x@ 207 avkash  staff   6.5K Nov 16 09:06 [1m[36m2008[m[m
drwxr-xr-x@ 207 avkash  staff   6.5K Nov 16 09:06 [1m[36m2009[m[m
drwxr-xr-x@ 205 avkash  staff   6.4K Nov 16 09:06 [1m[36m2010[m[m
drwxr-xr-x@ 203 avkash  staff   6.3K Nov 16 09:06 [1m[36m2011[m[m
drwxr-xr-x@ 206 avkash  staff   6.4K Nov 16 09:06 [1m[36m2012[m[m
drwxr-xr-x@ 209 avkash  staff   6.5K Nov 16 09:06 [1m[36m2013[m[

In [5]:
! ls -lh ../wildfire-data/viirs-snpp

total 0
drwxr-xr-x@ 222 avkash  staff   6.9K Nov 16 09:04 [1m[36m2012[m[m
drwxr-xr-x@ 225 avkash  staff   7.0K Nov 16 09:04 [1m[36m2013[m[m
drwxr-xr-x@ 216 avkash  staff   6.8K Nov 16 09:04 [1m[36m2014[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Nov 16 09:04 [1m[36m2015[m[m
drwxr-xr-x@ 222 avkash  staff   6.9K Nov 16 09:04 [1m[36m2016[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Nov 16 09:04 [1m[36m2017[m[m
drwxr-xr-x@ 221 avkash  staff   6.9K Nov 16 09:04 [1m[36m2018[m[m
drwxr-xr-x@ 219 avkash  staff   6.8K Nov 16 09:04 [1m[36m2019[m[m
drwxr-xr-x@ 220 avkash  staff   6.9K Nov 16 09:04 [1m[36m2020[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Jan 27 05:18 [1m[36m2021[m[m


In [6]:
all_csv_files = glob.glob('../wildfire-data/**/*.csv', recursive=True)

In [7]:
all_csv_files

['../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Reunion.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Barbados.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Greenland.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Burundi.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Nepal.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Moldova.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Papua_New_Guinea.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Cook_Islands.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Belgium.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Ethiopia.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Cuba.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Democratic_Republic_of_the_Congo.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Timor-Leste.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Sint_Maarten.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-sn

In [8]:
len(all_csv_files)

6431

In [9]:
type(all_csv_files)

list

In [10]:
all_us_files = list(filter(lambda k: 'United_States' in k, all_csv_files))

In [11]:
all_us_files

['../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_United_States.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_United_States.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States_Minor_Outlying_Islands.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States_Minor_Outlying_Islands.csv',
 '../wildfire-data/viirs-snpp/2017/viirs-snpp_2017_United_States.csv',
 '../wildfire-data/viirs-snpp/2017/viirs-snpp_2017_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-snpp/2019/viirs-snpp_2019_United_States_Virgin_Islands.csv',
 '../wildfire-data/viirs-sn

In [12]:
all_us_files = list(filter(lambda k: 'United_States.csv' in k, all_csv_files))

In [13]:
all_us_files

['../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_United_States.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_United_States.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_United_States.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_United_States.csv',
 '../wildfire-data/viirs-snpp/2017/viirs-snpp_2017_United_States.csv',
 '../wildfire-data/viirs-snpp/2019/viirs-snpp_2019_United_States.csv',
 '../wildfire-data/viirs-snpp/2021/viirs-snpp_2021_United_States.csv',
 '../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_United_States.csv',
 '../wildfire-data/viirs-snpp/2018/viirs-snpp_2018_United_States.csv',
 '../wildfire-data/viirs-snpp/2016/viirs-snpp_2016_United_States.csv',
 '../wildfire-data/modis/2013/modis_2013_United_States.csv',
 '../wildfire-data/modis/2014/modis_2014_United_States.csv',
 '../wildfire-data/modis/2015/modis_2015_United_States.csv',
 '../wildfire-data/modis/2012/modis_2012_United_States.csv',
 '../wildfire-data/modis/2008/modis_2008_Unite

In [14]:
df_viirs = pd.read_csv('../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_United_States.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [15]:
df_viirs = pd.read_csv('../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_United_States.csv', low_memory=False)

In [16]:
df_viirs

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type
0,46.730213,-92.075386,303.31,0.56,0.52,2020-01-01,727,N,VIIRS,n,1,264.04,1.43,N,3
1,46.729069,-92.077477,301.76,0.56,0.52,2020-01-01,727,N,VIIRS,n,1,263.05,1.27,N,2
2,40.721516,-84.126244,323.46,0.39,0.36,2020-01-01,728,N,VIIRS,n,1,268.47,1.80,N,2
3,41.639717,-87.130882,300.05,0.45,0.39,2020-01-01,728,N,VIIRS,n,1,268.17,1.23,N,3
4,41.619049,-87.328514,297.25,0.46,0.39,2020-01-01,728,N,VIIRS,n,1,268.34,0.82,N,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
672445,19.409372,-155.295761,332.17,0.49,0.49,2020-12-31,2306,N,VIIRS,n,1,290.31,3.08,1,0
672446,19.402180,-155.285187,367.00,0.49,0.49,2020-12-31,2306,N,VIIRS,l,1,286.75,26.13,1,0
672447,19.404821,-155.271347,350.90,0.49,0.49,2020-12-31,2306,N,VIIRS,n,1,288.63,67.31,1,0
672448,19.399593,-155.298752,326.91,0.49,0.49,2020-12-31,2306,N,VIIRS,n,1,285.83,4.16,1,0


In [17]:
df_viirs['satellite'].unique()

array(['N'], dtype=object)

In [18]:
df_viirs['instrument'].unique()

array(['VIIRS'], dtype=object)

In [19]:
type(df_viirs.confidence[0])

str

In [20]:
isinstance(df_viirs.confidence[0], str)

True

In [21]:
df_viirs.confidence.unique()

array(['n', 'h', 'l'], dtype=object)

In [22]:
df_modis = pd.read_csv('../wildfire-data/modis/2020/modis_2020_United_States.csv')

In [23]:
df_modis

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,31.8349,-83.0561,306.7,1.4,1.2,2020-01-01,324,Terra,MODIS,69,6.03,277.6,18.9,N,0
1,36.3562,-76.8050,300.2,1.2,1.1,2020-01-01,325,Terra,MODIS,20,6.03,278.9,10.2,N,0
2,33.4178,-110.8616,321.5,1.0,1.0,2020-01-01,920,Aqua,MODIS,100,6.03,271.8,24.9,N,2
3,41.4810,-90.8294,310.0,1.0,1.0,2020-01-01,1704,Terra,MODIS,69,6.03,276.5,13.8,D,0
4,38.6973,-90.1281,311.7,1.1,1.0,2020-01-01,1705,Terra,MODIS,54,6.03,282.3,14.1,D,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153843,38.7099,-122.8631,316.1,1.1,1.1,2020-12-31,2131,Aqua,MODIS,76,6.03,285.8,18.6,D,0
153844,39.7121,-122.2936,300.4,1.2,1.1,2020-12-31,2131,Aqua,MODIS,35,6.03,286.2,5.6,D,0
153845,37.7988,-121.1489,301.3,1.3,1.1,2020-12-31,2131,Aqua,MODIS,46,6.03,281.4,8.6,D,0
153846,39.1593,-122.9303,308.7,1.1,1.1,2020-12-31,2131,Aqua,MODIS,67,6.03,283.3,13.1,D,0


In [24]:
df_modis['satellite'].unique()

array(['Terra', 'Aqua'], dtype=object)

In [25]:
df_modis['instrument'].unique()

array(['MODIS'], dtype=object)

In [26]:
type(df_modis['confidence'][0])

numpy.int64

In [27]:
df_modis['confidence'].unique()

array([ 69,  20, 100,  54,  38,  60,   0,  70,  78,  46,  57,  83,  82,
        74,  79,  86,  53,  72,  41,  35,  73,  66,  84,  59,  55,  31,
        45,  50,  51,  47,  71,  63,  24,  36,  48,  80,  62,  49,  65,
        75,  67,  52,  58,  56,  93,  94,  64,  68,  61,  85,  90,  81,
        40,  88,  42,  97,  96,  33,  91,  87,  77,  76,  39,  44,  95,
        37,  89,  98,  30,  99,  21,  92,  29,  16,  17,  26,  32,  34,
        43,  25,  22,  19,  28,   9,  27,  23,  15,  13,  14,  18,  11,
        10,   8,  12,   6,   4,   7,   3,   5,   2,   1])

In [28]:
# Taken this idea from the H2O.ai wild fire competition github repo and changed it meet the local need
rows = []
fire_df = []
row_sum = 0
for f in tqdm(all_us_files):
    df = pd.read_csv(f, parse_dates=['acq_time'], low_memory=False) 
    csv_name = f.split('/')[-1]
    row = [
        f, csv_name, df.shape[0], df.shape[1], df.acq_date.min(), df.acq_date.max(),
        df.satellite.unique(), df.instrument.max(), df.version.max(),
        df.latitude.nunique(), df.longitude.nunique(),
        df.confidence.nunique(), df.satellite.nunique(), df.acq_date.nunique()
    ]
    if isinstance(df.confidence[0], str):
        df.confidence = df.confidence.replace({'l': 0, 'n': 50, 'h': 100})
    rows.append(row)
    row_sum = row_sum + df.shape[0]
    fire_df.append(df)
cols = [
    'path', 'csv', 'rows', 'cols', 'start', 'end',
    'satellite', 'instrument', 'version',
    'lats', 'lons', 'confs', 'sats', 'days'
]
filestats = pd.DataFrame(rows, columns=cols)
filestats.sort_values(by=['start', 'instrument'])
print("Total Rows: " + str(row_sum))

100%|██████████| 31/31 [00:16<00:00,  1.89it/s]


Unnamed: 0,path,csv,rows,cols,start,end,satellite,instrument,version,lats,lons,confs,sats,days
18,../wildfire-data/modis/2000/modis_2000_United_...,modis_2000_United_States.csv,3781,15,2000-11-01,2000-12-31,[Terra],MODIS,6.2,3720,3712,96,1,61
15,../wildfire-data/modis/2001/modis_2001_United_...,modis_2001_United_States.csv,44941,15,2001-01-01,2001-12-31,[Terra],MODIS,6.2,37881,40596,101,1,347
28,../wildfire-data/modis/2002/modis_2002_United_...,modis_2002_United_States.csv,79715,15,2002-01-01,2002-12-31,"[Terra, Aqua]",MODIS,6.2,61851,68094,101,2,355
29,../wildfire-data/modis/2003/modis_2003_United_...,modis_2003_United_States.csv,114471,15,2003-01-01,2003-12-31,"[Terra, Aqua]",MODIS,6.2,81630,93777,101,2,365
30,../wildfire-data/modis/2004/modis_2004_United_...,modis_2004_United_States.csv,158385,15,2004-01-01,2004-12-31,"[Terra, Aqua]",MODIS,6.2,96626,121487,101,2,365
27,../wildfire-data/modis/2005/modis_2005_United_...,modis_2005_United_States.csv,171160,15,2005-01-01,2005-12-31,"[Terra, Aqua]",MODIS,6.2,110486,135665,101,2,365
16,../wildfire-data/modis/2006/modis_2006_United_...,modis_2006_United_States.csv,126737,15,2006-01-01,2006-12-31,"[Terra, Aqua]",MODIS,6.2,88916,103617,101,2,365
17,../wildfire-data/modis/2007/modis_2007_United_...,modis_2007_United_States.csv,142420,15,2007-01-01,2007-12-31,"[Terra, Aqua]",MODIS,6.2,96108,109694,101,2,365
14,../wildfire-data/modis/2008/modis_2008_United_...,modis_2008_United_States.csv,119797,15,2008-01-01,2008-12-31,"[Terra, Aqua]",MODIS,6.2,83666,96357,101,2,366
19,../wildfire-data/modis/2009/modis_2009_United_...,modis_2009_United_States.csv,115174,15,2009-01-01,2009-12-31,"[Terra, Aqua]",MODIS,6.2,83904,96681,101,2,365


Total Rows: 7680596


In [29]:
fire_df

[         latitude   longitude  bright_ti4  scan  track    acq_date acq_time  \
 0       33.015182  -79.886269      302.35  0.50   0.66  2013-01-01     0613   
 1       28.966951  -81.738235      296.46  0.59   0.70  2013-01-01     0614   
 2       46.731281  -92.076927      302.25  0.46   0.39  2013-01-01     0750   
 3       41.480541  -90.832298      321.17  0.39   0.36  2013-01-01     0752   
 4       33.452900  -82.920937      297.83  0.57   0.52  2013-01-01     0754   
 ...           ...         ...         ...   ...    ...         ...      ...   
 401689  39.545490 -115.925102      327.62  0.39   0.37  2013-12-31     2035   
 401690  38.446827  -96.908493      339.03  0.73   0.76  2013-12-31     2035   
 401691  38.449066  -96.910133      342.29  0.73   0.76  2013-12-31     2035   
 401692  41.492058 -120.704498      335.88  0.52   0.42  2013-12-31     2036   
 401693  43.321976 -123.522713      337.41  0.42   0.45  2013-12-31     2037   
 
        satellite instrument  confiden

In [30]:
us_fire_df = pd.concat(fire_df)
us_fire_df.shape

(7680596, 17)

In [31]:
us_fire_df.sample(10)

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type,brightness,bright_t31
87936,36.719418,-95.34549,367.0,0.42,0.38,2016-03-25,1911,N,VIIRS,100,1.0,338.68,94.73,D,0,,
31025,41.566135,-84.043869,329.68,0.39,0.36,2017-02-14,735,N,VIIRS,50,1.0,256.05,3.24,N,2,,
505025,41.46199,-123.359352,303.63,0.72,0.76,2021-09-06,854,N,VIIRS,50,1.0,290.27,5.27,N,0,,
458505,37.033253,-119.345825,341.85,0.57,0.69,2020-09-14,2154,N,VIIRS,50,1.0,298.13,49.31,1,0,,
515825,47.353004,-120.269852,331.71,0.46,0.39,2015-10-13,2032,N,VIIRS,50,1.0,298.12,5.06,D,0,,
486928,36.469135,-94.034729,333.3,0.38,0.36,2012-10-29,1914,N,VIIRS,50,1.0,287.79,5.18,D,0,,
60584,46.1485,-116.9135,,2.5,1.5,2007-07-17,502,Terra,MODIS,31,6.2,,18.3,N,0,305.3,294.2
172181,41.626892,-87.364723,302.8,0.37,0.58,2016-06-05,658,N,VIIRS,50,1.0,286.09,0.69,N,2,,
78208,40.7026,-123.2027,,1.0,1.0,2008-08-08,614,Terra,MODIS,36,6.2,,6.5,N,0,306.9,291.8
97849,38.037037,-95.355209,346.31,0.52,0.67,2016-04-02,1821,N,VIIRS,50,1.0,294.18,30.07,D,0,,


In [32]:
us_fire_df = us_fire_df.sort_values(by=['acq_date', 'acq_time'])

In [33]:
us_fire_df

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type,brightness,bright_t31
0,38.542200,-78.304700,,2.80,1.60,2000-11-01,0250,Terra,MODIS,23,6.2,,40.30,N,0,304.8,280.9
1,38.556300,-78.308400,,2.80,1.60,2000-11-01,0250,Terra,MODIS,70,6.2,,54.50,N,0,309.4,280.4
2,38.545100,-78.310700,,2.80,1.60,2000-11-01,0250,Terra,MODIS,79,6.2,,58.80,N,0,309.9,280.7
3,38.558600,-78.317000,,2.80,1.60,2000-11-01,0250,Terra,MODIS,45,6.2,,36.00,N,0,302.3,279.8
4,31.339300,-89.912400,,1.00,1.00,2000-11-01,0427,Terra,MODIS,62,6.2,,8.50,N,0,304.9,287.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661053,29.796158,-90.769928,333.09,0.40,0.37,2021-12-31,1900,N,VIIRS,50,1.0,299.65,11.39,D,0,,
661054,29.784075,-91.766235,326.32,0.42,0.38,2021-12-31,1900,N,VIIRS,50,1.0,295.23,3.28,D,0,,
661055,40.144726,-101.530975,342.62,0.51,0.50,2021-12-31,1906,N,VIIRS,50,1.0,277.59,7.29,D,0,,
661056,38.554855,-122.550667,334.54,0.52,0.41,2021-12-31,2042,N,VIIRS,50,1.0,281.30,8.67,D,0,,


In [34]:
us_fire_df['confidence'].nunique()

101

In [35]:
us_fire_df['confidence'].unique()

array([ 23,  70,  79,  45,  62,  75,  74,  54,  69,  77,  72,  49,  36,
        43,  52,  21,  63,  57,  68,  67,   0,  56,  96,  98, 100,  95,
        42,  73,  91,  92,  41,  26,  53,  88,  81,  51,  61,  59,  60,
        76,  71,  46,  47,  82,  50,  65,  83,  55,  22,  58,  97,  90,
        87,  86,  66,  34,  84,  78,  44,  80,  89,  29,  85,  37,  24,
        64,  94,  48,  31,  18,  32,  93,  33,  39,  19,  10,  30,   4,
        13,   2,  99,  40,  28,  20,  38,  27,  35,  17,   7,  16,  14,
         9,   8,  15,  12,  25,  11,   6,   5,   3,   1])

# Brightness #
- brightness: Channel 21/22 brightness temperature of the fire pixel measured in Kelvin.

In [36]:
us_fire_df['brightness'].describe()

count    2.441753e+06
mean     3.255319e+02
std      2.383604e+01
min      3.000000e+02
25%      3.103000e+02
50%      3.190000e+02
75%      3.329000e+02
max      5.070000e+02
Name: brightness, dtype: float64

## Satellite ##

In [37]:
us_fire_df['satellite'].unique()

array(['Terra', 'Aqua', 'N'], dtype=object)

In [38]:
us_fire_df['instrument'].unique()

array(['MODIS', 'VIIRS'], dtype=object)

## bright_t31 ##
- Channel 31 brightness temperature of the fire pixel measured in Kelvin.

In [39]:
us_fire_df['bright_t31'].describe()

count    2.441753e+06
mean     2.953714e+02
std      9.574688e+00
min      2.643000e+02
25%      2.898000e+02
50%      2.950000e+02
75%      3.003000e+02
max      4.001000e+02
Name: bright_t31, dtype: float64

In [40]:
us_fire_df['bright_t31'].unique()

array([280.9, 280.4, 280.7, ..., 381.8, 363.8, 360.9])

## frp ##
- Fire Radiative Power depicts the pixel-integrated fire radiative power in MW (megawatts).

In [41]:
us_fire_df['frp'].describe()

count    7.680596e+06
mean     3.049063e+01
std      1.252634e+02
min     -5.920000e+01
25%      2.560000e+00
50%      7.300000e+00
75%      2.130000e+01
max      1.614640e+04
Name: frp, dtype: float64

## type ##
- Inferred hot spot type
  - 0 = presumed vegetation fire
  - 1 = active volcano
  - 2 = other static land source
  - 3 = offshore

In [42]:
us_fire_df['type'].unique()

array([0, 2, 3, 1])

## Confidence ##
- This value is based on a collection of intermediate algorithm quantities used in the detection process. 
- It is intended to help users gauge the quality of individual hotspot/fire pixels. 
- Confidence estimates range between 0 and 100% and are assigned one of the three fire classes
  - low-confidence fire
  - nominal-confidence fire
  - high-confidence fire).

In [43]:
us_fire_df['confidence'].unique()

array([ 23,  70,  79,  45,  62,  75,  74,  54,  69,  77,  72,  49,  36,
        43,  52,  21,  63,  57,  68,  67,   0,  56,  96,  98, 100,  95,
        42,  73,  91,  92,  41,  26,  53,  88,  81,  51,  61,  59,  60,
        76,  71,  46,  47,  82,  50,  65,  83,  55,  22,  58,  97,  90,
        87,  86,  66,  34,  84,  78,  44,  80,  89,  29,  85,  37,  24,
        64,  94,  48,  31,  18,  32,  93,  33,  39,  19,  10,  30,   4,
        13,   2,  99,  40,  28,  20,  38,  27,  35,  17,   7,  16,  14,
         9,   8,  15,  12,  25,  11,   6,   5,   3,   1])

In [44]:
us_fire_df.columns

Index(['latitude', 'longitude', 'bright_ti4', 'scan', 'track', 'acq_date',
       'acq_time', 'satellite', 'instrument', 'confidence', 'version',
       'bright_ti5', 'frp', 'daynight', 'type', 'brightness', 'bright_t31'],
      dtype='object')

## Extracting few main feautures from the dataset ## 

In [45]:
daily_fires_df = us_fire_df.groupby(
            ['latitude', 'longitude', 'acq_date', 'satellite', 'instrument', 'frp', 'type', 'bright_t31']).confidence.max().reset_index()

In [46]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
0,18.9325,-155.6619,2005-05-24,Aqua,MODIS,12.6,0,296.6,0
1,18.9464,-155.6658,2003-07-18,Aqua,MODIS,426.6,0,313.7,94
2,18.9478,-155.6555,2003-07-18,Aqua,MODIS,1330.3,0,333.7,87
3,18.9492,-155.6451,2003-07-18,Aqua,MODIS,111.8,0,310.0,0
4,18.9557,-155.6673,2003-07-18,Aqua,MODIS,260.6,0,311.4,100
...,...,...,...,...,...,...,...,...,...
2441748,70.3157,-148.7467,2019-09-06,Terra,MODIS,39.0,0,285.5,0
2441749,70.3201,-148.5162,2019-07-23,Aqua,MODIS,6.1,0,287.5,0
2441750,70.3211,-148.5288,2019-09-06,Aqua,MODIS,0.0,0,302.8,100
2441751,70.3270,-158.1143,2002-09-22,Terra,MODIS,90.8,0,268.2,76


In [47]:
daily_fires_df = daily_fires_df[daily_fires_df.confidence >= 50]

In [48]:
daily_fires_df['confidence'].unique()

array([ 94,  87, 100,  75,  86,  85,  69,  74,  60,  55,  56,  53,  70,
        82,  66,  72,  65,  59,  57,  89,  91,  67,  71,  54,  61,  92,
        77,  81,  63,  51,  90,  98,  58,  78,  68,  95,  73,  50,  99,
        97,  96,  80,  84,  83,  64,  62,  79,  76,  88,  93,  52])

In [49]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
1,18.9464,-155.6658,2003-07-18,Aqua,MODIS,426.6,0,313.7,94
2,18.9478,-155.6555,2003-07-18,Aqua,MODIS,1330.3,0,333.7,87
4,18.9557,-155.6673,2003-07-18,Aqua,MODIS,260.6,0,311.4,100
5,18.9571,-155.6570,2003-07-18,Aqua,MODIS,1086.8,0,333.6,100
6,18.9585,-155.6466,2003-07-18,Aqua,MODIS,355.2,0,323.0,94
...,...,...,...,...,...,...,...,...,...
2441743,70.2162,-159.6799,2016-09-09,Terra,MODIS,13.6,0,282.3,58
2441746,70.2836,-149.8928,2003-09-09,Terra,MODIS,39.3,0,273.7,100
2441750,70.3211,-148.5288,2019-09-06,Aqua,MODIS,0.0,0,302.8,100
2441751,70.3270,-158.1143,2002-09-22,Terra,MODIS,90.8,0,268.2,76


In [50]:
daily_fires_df.to_csv('usa_daily_fire_2012_2021.csv.gz', index=False, compression='gzip')

In [66]:
daily_fires_df['acq_date'].min()

'2000-11-01'

In [67]:
daily_fires_df['acq_date'].max()

'2020-12-31'

### California longitude and latitude range ###
- https://www.netstate.com/states/geography/ca_geography.htm
- Longitude: 114° 8' W to 124° 24' W
- Latitude: 32° 30' N to 42° N

In [51]:
CA_LATTITUDE_RANGE = (32, 42)
# We have to change [114, 124] to range as [-124, -114]
CA_LONGITUDE_RANGE = (-124, -114)

In [52]:
ca_daily_fire = daily_fires_df[
        (daily_fires_df.latitude > CA_LATTITUDE_RANGE[0]) & (daily_fires_df.latitude < CA_LATTITUDE_RANGE[1])]

In [53]:
print(ca_daily_fire.shape)
ca_daily_fire.sample(10)

(933973, 9)


Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
743249,33.4887,-80.9967,2011-03-02,Aqua,MODIS,10.3,0,295.1,60
1070129,35.9689,-101.6678,2004-10-18,Aqua,MODIS,21.9,0,301.0,77
1495425,39.9136,-122.6223,2020-08-20,Terra,MODIS,60.2,0,299.4,100
682354,33.1362,-92.7673,2005-07-19,Aqua,MODIS,13.2,0,302.6,77
515047,32.0313,-93.5244,2007-09-29,Aqua,MODIS,46.3,0,301.9,88
1168830,36.7286,-96.4579,2005-04-16,Terra,MODIS,63.0,0,301.1,93
828875,34.0916,-116.9917,2020-09-07,Terra,MODIS,241.8,0,307.1,90
1400694,38.8689,-120.5433,2014-09-17,Terra,MODIS,233.3,0,300.2,84
1121121,36.3317,-118.4163,2020-10-07,Terra,MODIS,13.6,0,284.3,82
1118898,36.315,-96.2712,2019-03-28,Terra,MODIS,12.3,0,286.7,66


In [54]:
ca_daily_fire = ca_daily_fire[
    (ca_daily_fire.longitude > CA_LONGITUDE_RANGE[0]) & (ca_daily_fire.longitude < CA_LONGITUDE_RANGE[1])]

In [55]:
ca_daily_fire

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
576870,32.4646,-114.6906,2011-06-29,Terra,MODIS,91.1,0,315.7,84
578656,32.4768,-114.6785,2011-10-11,Terra,MODIS,96.2,0,313.7,100
581181,32.4937,-114.7856,2013-02-06,Terra,MODIS,26.9,0,296.1,65
581909,32.4987,-114.7985,2010-02-04,Terra,MODIS,22.2,0,282.9,62
582191,32.5006,-114.7929,2009-07-08,Aqua,MODIS,172.0,0,316.4,78
...,...,...,...,...,...,...,...,...,...
1671420,41.9998,-121.0237,2014-11-12,Aqua,MODIS,24.6,0,274.4,77
1671421,41.9998,-120.7090,2012-08-13,Terra,MODIS,74.1,0,310.7,97
1671422,41.9998,-114.9167,2007-07-20,Aqua,MODIS,35.7,0,321.8,91
1671429,41.9999,-123.5675,2018-07-30,Terra,MODIS,18.2,0,295.8,62


In [56]:
ca_daily_fire.describe()

Unnamed: 0,latitude,longitude,frp,type,bright_t31,confidence
count,221183.0,221183.0,221183.0,221183.0,221183.0,221183.0
mean,38.218017,-120.263418,137.750609,0.010991,299.375297,83.16249
std,2.536727,2.395738,353.842116,0.150897,12.301854,16.044658
min,32.4646,-123.9997,0.0,0.0,265.3,50.0
25%,36.28005,-122.34545,19.9,0.0,291.5,70.0
50%,38.6603,-120.4959,43.1,0.0,297.6,86.0
75%,40.3195,-118.7763,110.1,0.0,305.6,100.0
max,41.9999,-114.0001,11944.2,3.0,400.1,100.0


In [57]:
ca_daily_fire['acq_date'] = pd.to_datetime(ca_daily_fire['acq_date'])

In [58]:
ca_daily_fire

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
576870,32.4646,-114.6906,2011-06-29,Terra,MODIS,91.1,0,315.7,84
578656,32.4768,-114.6785,2011-10-11,Terra,MODIS,96.2,0,313.7,100
581181,32.4937,-114.7856,2013-02-06,Terra,MODIS,26.9,0,296.1,65
581909,32.4987,-114.7985,2010-02-04,Terra,MODIS,22.2,0,282.9,62
582191,32.5006,-114.7929,2009-07-08,Aqua,MODIS,172.0,0,316.4,78
...,...,...,...,...,...,...,...,...,...
1671420,41.9998,-121.0237,2014-11-12,Aqua,MODIS,24.6,0,274.4,77
1671421,41.9998,-120.7090,2012-08-13,Terra,MODIS,74.1,0,310.7,97
1671422,41.9998,-114.9167,2007-07-20,Aqua,MODIS,35.7,0,321.8,91
1671429,41.9999,-123.5675,2018-07-30,Terra,MODIS,18.2,0,295.8,62


In [59]:
ca_daily_fire['year'] = ca_daily_fire.acq_date.dt.year

In [60]:
ca_daily_fire['month'] = ca_daily_fire.acq_date.dt.month

In [61]:
ca_daily_fire

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence,year,month
576870,32.4646,-114.6906,2011-06-29,Terra,MODIS,91.1,0,315.7,84,2011,6
578656,32.4768,-114.6785,2011-10-11,Terra,MODIS,96.2,0,313.7,100,2011,10
581181,32.4937,-114.7856,2013-02-06,Terra,MODIS,26.9,0,296.1,65,2013,2
581909,32.4987,-114.7985,2010-02-04,Terra,MODIS,22.2,0,282.9,62,2010,2
582191,32.5006,-114.7929,2009-07-08,Aqua,MODIS,172.0,0,316.4,78,2009,7
...,...,...,...,...,...,...,...,...,...,...,...
1671420,41.9998,-121.0237,2014-11-12,Aqua,MODIS,24.6,0,274.4,77,2014,11
1671421,41.9998,-120.7090,2012-08-13,Terra,MODIS,74.1,0,310.7,97,2012,8
1671422,41.9998,-114.9167,2007-07-20,Aqua,MODIS,35.7,0,321.8,91,2007,7
1671429,41.9999,-123.5675,2018-07-30,Terra,MODIS,18.2,0,295.8,62,2018,7


In [62]:
ca_daily_fire['acq_date'].min()

Timestamp('2000-11-01 00:00:00')

In [63]:
ca_daily_fire['acq_date'].max()

Timestamp('2020-12-31 00:00:00')

In [68]:
ca_daily_fire.to_csv('ca_daily_fire_2000_2021.csv', index=False)
#ca_daily_fire.to_csv('ca_daily_fire_2012_2020.csv.gz', index=False, compression='gzip')