# Creating wildfire dataset for any country using global satellite data #
## We are using country Nepal as our target country  in this example ##

## Data Collection ##
- Please Visit https://firms.modaps.eosdis.nasa.gov/download/ to download both MODIS (2000-2020) and VIIRS (2012-2020) datasets based on each country wildfire data
  - MODIS - https://firms.modaps.eosdis.nasa.gov/country/
  - VIIRS - https://firms.modaps.eosdis.nasa.gov/country/
  - Active Fire (24 Hours, 48 Hours, 7 days)-  https://firms.modaps.eosdis.nasa.gov/usfs/active_fire/


<div style="max-width:400px;">
    <img src="https://github.com/prodramp/wildfire/blob/main/images/disclaimer.png?raw=true" size="400px"/>
</div>

In [1]:
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
import random
InteractiveShell.ast_node_interactivity = "all"
import os
from datetime import datetime
import plotly.express as px
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
!ls -l ../

total 8
-rw-r--r--   1 avkash  staff  511 Mar 26 12:49 README.md
drwxr-xr-x@ 15 avkash  staff  480 Mar 26 13:26 [1m[36mdataset[m[m
drwxr-xr-x   6 avkash  staff  192 Mar 26 08:03 [1m[36mimages[m[m
drwxr-xr-x@  5 avkash  staff  160 Mar 25 15:32 [1m[36mwildfire-data[m[m
drwxr-xr-x   8 avkash  staff  256 Mar 26 12:47 [1m[36mwildfire-data-streamlit[m[m
drwxr-xr-x@ 24 avkash  staff  768 Mar 25 15:29 [1m[36mzips_modis[m[m
drwxr-xr-x@ 13 avkash  staff  416 Mar 25 15:26 [1m[36mzips_viirs[m[m


In [3]:
! ls -lh ../wildfire-data

total 0
drwxr-xr-x@ 24 avkash  staff   768B Mar 25 15:28 [1m[36mmodis[m[m
drwxr-xr-x@ 13 avkash  staff   416B Mar 25 15:26 [1m[36mviirs-snpp[m[m


In [4]:
! ls -lh ../wildfire-data/modis

total 0
drwxr-xr-x@ 165 avkash  staff   5.2K Feb 12  2019 [1m[36m2000[m[m
drwxr-xr-x@ 199 avkash  staff   6.2K Nov 16 09:06 [1m[36m2001[m[m
drwxr-xr-x@ 201 avkash  staff   6.3K Nov 16 09:06 [1m[36m2002[m[m
drwxr-xr-x@ 206 avkash  staff   6.4K Nov 16 09:06 [1m[36m2003[m[m
drwxr-xr-x@ 204 avkash  staff   6.4K Nov 16 09:06 [1m[36m2004[m[m
drwxr-xr-x@ 210 avkash  staff   6.6K Nov 16 09:06 [1m[36m2005[m[m
drwxr-xr-x@ 204 avkash  staff   6.4K Nov 16 09:06 [1m[36m2006[m[m
drwxr-xr-x@ 210 avkash  staff   6.6K Nov 16 09:06 [1m[36m2007[m[m
drwxr-xr-x@ 207 avkash  staff   6.5K Nov 16 09:06 [1m[36m2008[m[m
drwxr-xr-x@ 207 avkash  staff   6.5K Nov 16 09:06 [1m[36m2009[m[m
drwxr-xr-x@ 205 avkash  staff   6.4K Nov 16 09:06 [1m[36m2010[m[m
drwxr-xr-x@ 203 avkash  staff   6.3K Nov 16 09:06 [1m[36m2011[m[m
drwxr-xr-x@ 206 avkash  staff   6.4K Nov 16 09:06 [1m[36m2012[m[m
drwxr-xr-x@ 209 avkash  staff   6.5K Nov 16 09:06 [1m[36m2013[m[

In [5]:
! ls -lh ../wildfire-data/viirs-snpp

total 0
drwxr-xr-x@ 222 avkash  staff   6.9K Nov 16 09:04 [1m[36m2012[m[m
drwxr-xr-x@ 225 avkash  staff   7.0K Nov 16 09:04 [1m[36m2013[m[m
drwxr-xr-x@ 216 avkash  staff   6.8K Nov 16 09:04 [1m[36m2014[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Nov 16 09:04 [1m[36m2015[m[m
drwxr-xr-x@ 222 avkash  staff   6.9K Nov 16 09:04 [1m[36m2016[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Nov 16 09:04 [1m[36m2017[m[m
drwxr-xr-x@ 221 avkash  staff   6.9K Nov 16 09:04 [1m[36m2018[m[m
drwxr-xr-x@ 219 avkash  staff   6.8K Nov 16 09:04 [1m[36m2019[m[m
drwxr-xr-x@ 220 avkash  staff   6.9K Nov 16 09:04 [1m[36m2020[m[m
drwxr-xr-x@ 218 avkash  staff   6.8K Jan 27 05:18 [1m[36m2021[m[m


In [6]:
all_csv_files = glob.glob('../wildfire-data/**/*.csv', recursive=True)

In [7]:
all_csv_files

['../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Reunion.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Barbados.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Greenland.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Burundi.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Nepal.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Moldova.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Papua_New_Guinea.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Cook_Islands.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Belgium.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Ethiopia.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Cuba.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Democratic_Republic_of_the_Congo.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Timor-Leste.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Sint_Maarten.csv',
 '../wildfire-data/viirs-snpp/2013/viirs-sn

In [8]:
len(all_csv_files)

6431

In [9]:
type(all_csv_files)

list

In [10]:
all_nepal_files = list(filter(lambda k: 'Nepal' in k, all_csv_files))

In [11]:
all_nepal_files

['../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Nepal.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_Nepal.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_Nepal.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_Nepal.csv',
 '../wildfire-data/viirs-snpp/2017/viirs-snpp_2017_Nepal.csv',
 '../wildfire-data/viirs-snpp/2019/viirs-snpp_2019_Nepal.csv',
 '../wildfire-data/viirs-snpp/2021/viirs-snpp_2021_Nepal.csv',
 '../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_Nepal.csv',
 '../wildfire-data/viirs-snpp/2018/viirs-snpp_2018_Nepal.csv',
 '../wildfire-data/viirs-snpp/2016/viirs-snpp_2016_Nepal.csv',
 '../wildfire-data/modis/2013/modis_2013_Nepal.csv',
 '../wildfire-data/modis/2014/modis_2014_Nepal.csv',
 '../wildfire-data/modis/2015/modis_2015_Nepal.csv',
 '../wildfire-data/modis/2012/modis_2012_Nepal.csv',
 '../wildfire-data/modis/2008/modis_2008_Nepal.csv',
 '../wildfire-data/modis/2001/modis_2001_Nepal.csv',
 '../wildfire-data/modis/2006/modis_2006_Nepal.csv',

In [12]:
all_nepal_files = list(filter(lambda k: 'Nepal.csv' in k, all_csv_files))

In [13]:
all_nepal_files

['../wildfire-data/viirs-snpp/2013/viirs-snpp_2013_Nepal.csv',
 '../wildfire-data/viirs-snpp/2014/viirs-snpp_2014_Nepal.csv',
 '../wildfire-data/viirs-snpp/2015/viirs-snpp_2015_Nepal.csv',
 '../wildfire-data/viirs-snpp/2012/viirs-snpp_2012_Nepal.csv',
 '../wildfire-data/viirs-snpp/2017/viirs-snpp_2017_Nepal.csv',
 '../wildfire-data/viirs-snpp/2019/viirs-snpp_2019_Nepal.csv',
 '../wildfire-data/viirs-snpp/2021/viirs-snpp_2021_Nepal.csv',
 '../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_Nepal.csv',
 '../wildfire-data/viirs-snpp/2018/viirs-snpp_2018_Nepal.csv',
 '../wildfire-data/viirs-snpp/2016/viirs-snpp_2016_Nepal.csv',
 '../wildfire-data/modis/2013/modis_2013_Nepal.csv',
 '../wildfire-data/modis/2014/modis_2014_Nepal.csv',
 '../wildfire-data/modis/2015/modis_2015_Nepal.csv',
 '../wildfire-data/modis/2012/modis_2012_Nepal.csv',
 '../wildfire-data/modis/2008/modis_2008_Nepal.csv',
 '../wildfire-data/modis/2001/modis_2001_Nepal.csv',
 '../wildfire-data/modis/2006/modis_2006_Nepal.csv',

In [14]:
df_viirs = pd.read_csv('../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_Nepal.csv')

In [15]:
df_viirs = pd.read_csv('../wildfire-data/viirs-snpp/2020/viirs-snpp_2020_Nepal.csv', low_memory=False)

In [16]:
df_viirs

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type
0,27.532799,83.409477,295.24,0.39,0.36,2020-01-04,2007,N,VIIRS,n,1,283.93,0.48,N,0
1,29.014025,82.741753,325.50,0.40,0.44,2020-01-11,710,N,VIIRS,l,1,296.50,3.10,D,0
2,28.972002,82.721237,329.05,0.40,0.44,2020-01-11,710,N,VIIRS,n,1,298.24,4.61,D,0
3,28.967470,82.850662,299.74,0.32,0.55,2020-01-11,1935,N,VIIRS,n,1,273.13,0.49,N,0
4,28.964066,82.850983,299.47,0.32,0.55,2020-01-11,1935,N,VIIRS,n,1,273.02,0.78,N,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9245,28.328114,84.929863,329.22,0.48,0.40,2020-12-31,2018,N,VIIRS,n,1,281.11,5.99,0,0
9246,28.331739,84.930809,324.43,0.48,0.40,2020-12-31,2018,N,VIIRS,n,1,279.79,2.50,0,0
9247,29.692017,81.372002,306.27,0.38,0.36,2020-12-31,2018,N,VIIRS,n,1,264.39,1.06,0,0
9248,28.328835,84.926025,327.65,0.48,0.40,2020-12-31,2018,N,VIIRS,n,1,283.64,9.41,0,0


In [17]:
df_viirs['satellite'].unique()

array(['N'], dtype=object)

In [18]:
df_viirs['instrument'].unique()

array(['VIIRS'], dtype=object)

In [19]:
type(df_viirs.confidence[0])

str

In [20]:
isinstance(df_viirs.confidence[0], str)

True

In [21]:
df_viirs.confidence.unique()

array(['n', 'l', 'h'], dtype=object)

In [22]:
df_modis = pd.read_csv('../wildfire-data/modis/2020/modis_2020_Nepal.csv')

In [23]:
df_modis

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,28.5115,81.2492,300.0,1.5,1.2,2020-01-13,734,Aqua,MODIS,24,6.03,288.2,6.7,D,0
1,28.5604,82.7200,305.1,1.2,1.1,2020-01-13,734,Aqua,MODIS,45,6.03,294.2,8.2,D,0
2,26.6773,86.9825,317.9,1.1,1.1,2020-01-15,721,Aqua,MODIS,77,6.03,295.8,14.0,D,0
3,28.3409,84.4350,304.1,1.0,1.0,2020-01-20,740,Aqua,MODIS,58,6.03,282.9,8.3,D,0
4,28.6669,82.7348,307.1,1.5,1.2,2020-01-22,728,Aqua,MODIS,53,6.03,284.5,16.1,D,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,28.3312,84.4264,304.0,3.2,1.7,2020-12-31,817,Aqua,MODIS,57,6.03,292.6,34.2,D,0
1348,28.6429,82.7774,312.2,2.4,1.5,2020-12-31,817,Aqua,MODIS,31,6.03,300.9,32.2,D,0
1349,28.6385,82.7702,309.7,2.4,1.5,2020-12-31,817,Aqua,MODIS,49,6.03,299.0,25.0,D,0
1350,29.8585,81.2090,301.9,2.0,1.4,2020-12-31,817,Aqua,MODIS,36,6.03,283.6,21.1,D,0


In [24]:
df_modis['satellite'].unique()

array(['Aqua', 'Terra'], dtype=object)

In [25]:
df_modis['instrument'].unique()

array(['MODIS'], dtype=object)

In [26]:
type(df_modis['confidence'][0])

numpy.int64

In [27]:
df_modis['confidence'].unique()

array([ 24,  45,  77,  58,  53,  72,  66,  50,  48,  37,  71,  41,  44,
        21,  26,  63,  51,  61,  43,  67,  59,  62,  73,  64,   0,  76,
        47,  54,  65,  69,  38,  29,  52,  60,  82,  32,  74,  42,  55,
        68,  19,  49,  57,  15,  35,  36,  85,  14,  78,  17,  81,  25,
        83,  84,  75,  94,  92,  93,  70,  80,  56,  40,  95,  86,  31,
        39,  22,  79,  12,  27,  90,  10, 100,  33,  30,  34,  89,  46,
        96,  87,  88,  18,  28,  91,   7,  23,  16,  97,  99,  20,  11,
        98,  13])

In [29]:
# Taken this idea from the H2O.ai wild fire competition github repo and changed it meet the local need
rows = []
fire_df = []
row_sum = 0
for f in tqdm(all_nepal_files):
    df = pd.read_csv(f, parse_dates=['acq_time'], low_memory=False) 
    csv_name = f.split('/')[-1]
    row = [
        f, csv_name, df.shape[0], df.shape[1], df.acq_date.min(), df.acq_date.max(),
        df.satellite.unique(), df.instrument.max(), df.version.max(),
        df.latitude.nunique(), df.longitude.nunique(),
        df.confidence.nunique(), df.satellite.nunique(), df.acq_date.nunique()
    ]
    if isinstance(df.confidence[0], str):
        df.confidence = df.confidence.replace({'l': 0, 'n': 50, 'h': 100})
    rows.append(row)
    row_sum = row_sum + df.shape[0]
    fire_df.append(df)
cols = [
    'path', 'csv', 'rows', 'cols', 'start', 'end',
    'satellite', 'instrument', 'version',
    'lats', 'lons', 'confs', 'sats', 'days'
]
filestats = pd.DataFrame(rows, columns=cols)
filestats.sort_values(by=['start', 'instrument'])
print("Total Rows: " + str(row_sum))

100%|█████████████████████████████████████████████████| 31/31 [00:00<00:00, 31.50it/s]


Unnamed: 0,path,csv,rows,cols,start,end,satellite,instrument,version,lats,lons,confs,sats,days
18,../wildfire-data/modis/2000/modis_2000_Nepal.csv,modis_2000_Nepal.csv,98,15,2000-11-01,2000-12-29,[Terra],MODIS,6.2,98,98,57,1,34
15,../wildfire-data/modis/2001/modis_2001_Nepal.csv,modis_2001_Nepal.csv,1238,15,2001-01-02,2001-12-29,[Terra],MODIS,6.2,1207,1222,91,1,139
28,../wildfire-data/modis/2002/modis_2002_Nepal.csv,modis_2002_Nepal.csv,487,15,2002-01-01,2002-12-30,"[Terra, Aqua]",MODIS,6.2,479,480,88,2,121
29,../wildfire-data/modis/2003/modis_2003_Nepal.csv,modis_2003_Nepal.csv,2202,15,2003-01-03,2003-12-31,"[Terra, Aqua]",MODIS,6.2,2091,2144,94,2,210
30,../wildfire-data/modis/2004/modis_2004_Nepal.csv,modis_2004_Nepal.csv,2846,15,2004-01-02,2004-12-30,"[Terra, Aqua]",MODIS,6.2,2682,2772,97,2,178
27,../wildfire-data/modis/2005/modis_2005_Nepal.csv,modis_2005_Nepal.csv,2676,15,2005-01-02,2005-12-31,"[Terra, Aqua]",MODIS,6.2,2540,2602,95,2,203
16,../wildfire-data/modis/2006/modis_2006_Nepal.csv,modis_2006_Nepal.csv,2349,15,2006-01-01,2006-12-31,"[Terra, Aqua]",MODIS,6.2,2218,2296,95,2,178
17,../wildfire-data/modis/2007/modis_2007_Nepal.csv,modis_2007_Nepal.csv,1353,15,2007-01-02,2007-12-31,"[Aqua, Terra]",MODIS,6.2,1306,1332,90,2,172
14,../wildfire-data/modis/2008/modis_2008_Nepal.csv,modis_2008_Nepal.csv,2990,15,2008-01-01,2008-12-31,"[Terra, Aqua]",MODIS,6.2,2792,2896,95,2,190
19,../wildfire-data/modis/2009/modis_2009_Nepal.csv,modis_2009_Nepal.csv,4704,15,2009-01-03,2009-12-31,"[Aqua, Terra]",MODIS,6.2,4287,4508,95,2,209


Total Rows: 355728


In [30]:
fire_df

[        latitude  longitude  bright_ti4  scan  track    acq_date acq_time  \
 0      26.669355  87.058891      351.25  0.39   0.36  2013-01-01     0720   
 1      26.682751  86.993225      332.90  0.39   0.36  2013-01-01     0720   
 2      26.682184  86.989326      346.14  0.39   0.36  2013-01-01     0720   
 3      26.668783  87.054985      327.39  0.39   0.36  2013-01-01     0720   
 4      26.659639  86.992599      329.09  0.39   0.36  2013-01-01     0720   
 ...          ...        ...         ...   ...    ...         ...      ...   
 22660  29.756262  81.242523      309.08  0.57   0.43  2013-12-30     0720   
 22661  29.754240  81.240425      283.43  0.57   0.43  2013-12-30     0720   
 22662  29.759514  81.246223      327.31  0.57   0.43  2013-12-30     0720   
 22663  29.687540  81.122917      356.57  0.38   0.44  2013-12-30     0720   
 22664  26.712027  87.000938      325.60  0.56   0.43  2013-12-31     0700   
 
       satellite instrument  confidence  version  bright_ti5  

In [31]:
nepal_fire_df = pd.concat(fire_df)
nepal_fire_df.shape

(355728, 17)

In [33]:
nepal_fire_df.sample(10)

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type,brightness,bright_t31
15827,27.306908,86.118263,333.89,0.6,0.53,2019-05-06,1923,N,VIIRS,50,1.0,290.46,4.11,N,0,,
4802,29.00433,81.19043,300.61,0.38,0.36,2021-03-05,2018,N,VIIRS,50,1.0,286.68,0.75,N,0,,
12527,27.68804,83.658524,297.81,0.44,0.46,2021-03-23,1942,N,VIIRS,50,1.0,285.25,0.38,N,0,,
289,28.475359,84.944557,342.3,0.57,0.43,2015-02-09,711,N,VIIRS,50,1.0,299.71,7.6,D,0,,
38311,28.239475,81.786629,339.92,0.47,0.48,2016-04-15,1949,N,VIIRS,50,1.0,300.59,2.73,N,0,,
20193,27.586031,83.596649,340.89,0.39,0.36,2016-04-09,736,N,VIIRS,0,1.0,315.82,1.8,D,0,,
15062,27.844988,82.29126,314.8,0.54,0.5,2019-05-05,1942,N,VIIRS,50,1.0,296.49,4.11,N,0,,
7681,28.3984,82.9459,,1.2,1.1,2016-12-20,505,Terra,MODIS,70,6.2,,12.4,D,0,310.2,290.7
2703,27.878843,84.629196,301.39,0.39,0.44,2020-04-05,1942,N,VIIRS,50,1.0,288.98,0.91,N,0,,
363,28.610355,83.693398,313.19,0.68,0.74,2016-01-12,1912,N,VIIRS,50,1.0,265.64,3.62,N,0,,


In [34]:
nepal_fire_df = nepal_fire_df.sort_values(by=['acq_date', 'acq_time'])

In [35]:
nepal_fire_df

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type,brightness,bright_t31
0,27.592100,87.538500,,1.10,1.10,2000-11-01,0500,Terra,MODIS,55,6.2,,6.20,D,0,306.2,292.1
1,29.100600,81.917000,,1.30,1.10,2000-11-06,0518,Terra,MODIS,87,6.2,,43.00,D,0,330.6,293.7
2,30.088100,81.335800,,1.80,1.30,2000-11-07,0601,Terra,MODIS,58,6.2,,21.10,D,0,308.7,291.2
3,30.084100,81.357000,,1.90,1.30,2000-11-07,0601,Terra,MODIS,37,6.2,,17.10,D,0,306.0,287.4
4,30.106000,81.964700,,2.10,1.40,2000-11-08,0506,Terra,MODIS,64,6.2,,41.60,D,0,315.0,282.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60353,27.125383,84.859642,332.21,0.39,0.36,2021-12-25,0724,N,VIIRS,50,1.0,296.39,2.05,D,0,,
60354,27.128704,84.858986,331.20,0.39,0.36,2021-12-25,0724,N,VIIRS,50,1.0,293.93,2.05,D,0,,
60355,28.698627,80.359848,330.97,0.54,0.42,2021-12-25,0724,N,VIIRS,50,1.0,294.96,8.72,D,0,,
60356,28.820669,80.171555,332.00,0.55,0.43,2021-12-25,0724,N,VIIRS,50,1.0,296.71,3.87,D,0,,


In [36]:
nepal_fire_df['confidence'].nunique()

101

In [37]:
nepal_fire_df['confidence'].unique()

array([ 55,  87,  58,  37,  64,  24,  63,  38,  27,   0,  14,  80,  51,
        35,  17,  47,  43,  72,  71,  70,  46,  48,  34,  53,  88,  85,
        67,  82,  30,  68,  79,  21,  69,  45,  95, 100,  25,  56,  36,
        78,  32,  83,  52,  77,  62,  92,  75,  60,  76,  40,  15,  90,
        59,  39,  74,  89,  94,  57,  33,  49,  12,  29,  41,  54,  20,
        23,  61,  96,  65,  86,  97,  42,  84,  73,  99,  66,  31,  50,
        16,  28,  44,  81,  19,   6,  11,  93,  26,  18,  22,  91,  98,
         9,   7,  13,   8,  10,   5,   3,   4,   2,   1])

# Brightness #
- brightness: Channel 21/22 brightness temperature of the fire pixel measured in Kelvin.

In [38]:
nepal_fire_df['brightness'].describe()

count    55011.000000
mean       319.632477
std         11.755701
min        300.000000
25%        310.800000
50%        319.800000
75%        326.000000
max        477.200000
Name: brightness, dtype: float64

## Satellite ##

In [39]:
nepal_fire_df['satellite'].unique()

array(['Terra', 'Aqua', 'N'], dtype=object)

In [40]:
nepal_fire_df['instrument'].unique()

array(['MODIS', 'VIIRS'], dtype=object)

## bright_t31 ##
- Channel 31 brightness temperature of the fire pixel measured in Kelvin.

In [41]:
nepal_fire_df['bright_t31'].describe()

count    55011.000000
mean       300.439956
std          8.388547
min        265.200000
25%        294.000000
50%        301.500000
75%        306.900000
max        328.000000
Name: bright_t31, dtype: float64

In [42]:
nepal_fire_df['bright_t31'].unique()

array([292.1, 293.7, 291.2, 287.4, 282.6, 293.4, 289.1, 293.6, 284.1,
       284.5, 287. , 288.6, 288.8, 287.2, 285.3, 286.8, 294.8, 294. ,
       292.7, 280.4, 289.3, 296.8, 281. , 292.4, 292.3, 288.7, 295.7,
       294.4, 295.6, 288.9, 283. , 286.3, 290.7, 282.8, 282.5, 289.7,
       291. , 293.5, 293.2, 291.9, 296.2, 291.5, 295.4, 296.5, 295.2,
       292.8, 298.2, 287.5, 290.4, 288.2, 297.6, 291.4, 297. , 296. ,
       286.2, 293.8, 296.4, 290.1, 289.8, 290.2, 290.9, 297.4, 293. ,
       291.7, 289.6, 291.8, 294.7, 298.5, 285.9, 297.7, 291.6, 290.5,
       289.4, 287.8, 284. , 286.9, 294.5, 281.4, 287.1, 282.1, 283.5,
       282.2, 293.1, 279.3, 278.2, 284.8, 292. , 288.3, 288.5, 286. ,
       287.7, 287.6, 294.6, 283.1, 285. , 292.9, 285.8, 281.1, 291.1,
       285.7, 285.6, 291.3, 286.7, 273. , 293.3, 302.6, 298.9, 277.8,
       286.4, 297.1, 292.5, 289.5, 302.4, 296.1, 294.3, 281.5, 271. ,
       293.9, 290.8, 292.2, 286.6, 301.8, 298.3, 294.1, 275.9, 295.9,
       290.6, 301.7,

## frp ##
- Fire Radiative Power depicts the pixel-integrated fire radiative power in MW (megawatts).

In [43]:
nepal_fire_df['frp'].describe()

count    355728.000000
mean          6.503891
std          15.557042
min           0.000000
25%           1.380000
50%           3.030000
75%           6.730000
max        2321.200000
Name: frp, dtype: float64

## type ##
- Inferred hot spot type
  - 0 = presumed vegetation fire
  - 1 = active volcano
  - 2 = other static land source
  - 3 = offshore

In [44]:
nepal_fire_df['type'].unique()

array([0, 3, 2])

## Confidence ##
- This value is based on a collection of intermediate algorithm quantities used in the detection process. 
- It is intended to help users gauge the quality of individual hotspot/fire pixels. 
- Confidence estimates range between 0 and 100% and are assigned one of the three fire classes
  - low-confidence fire
  - nominal-confidence fire
  - high-confidence fire).

In [45]:
nepal_fire_df['confidence'].unique()

array([ 55,  87,  58,  37,  64,  24,  63,  38,  27,   0,  14,  80,  51,
        35,  17,  47,  43,  72,  71,  70,  46,  48,  34,  53,  88,  85,
        67,  82,  30,  68,  79,  21,  69,  45,  95, 100,  25,  56,  36,
        78,  32,  83,  52,  77,  62,  92,  75,  60,  76,  40,  15,  90,
        59,  39,  74,  89,  94,  57,  33,  49,  12,  29,  41,  54,  20,
        23,  61,  96,  65,  86,  97,  42,  84,  73,  99,  66,  31,  50,
        16,  28,  44,  81,  19,   6,  11,  93,  26,  18,  22,  91,  98,
         9,   7,  13,   8,  10,   5,   3,   4,   2,   1])

In [None]:
nepal_fire_df.columns

## Extracting few main feautures from the dataset ## 

In [46]:
daily_fires_df = nepal_fire_df.groupby(
            ['latitude', 'longitude', 'acq_date', 'satellite', 'instrument', 'frp', 'type', 'bright_t31']).confidence.max().reset_index()

In [47]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
0,26.3775,87.3273,2020-11-07,Aqua,MODIS,7.0,0,301.3,57
1,26.4124,87.2421,2019-02-04,Aqua,MODIS,6.7,0,297.4,60
2,26.4171,87.5787,2018-03-06,Terra,MODIS,19.3,0,303.1,80
3,26.4265,87.5735,2013-02-26,Aqua,MODIS,4.8,0,302.9,43
4,26.4285,87.2503,2015-03-21,Terra,MODIS,7.0,0,303.1,62
...,...,...,...,...,...,...,...,...,...
55006,30.2113,81.9902,2012-06-29,Aqua,MODIS,10.3,0,306.9,56
55007,30.2321,82.0426,2002-10-22,Aqua,MODIS,13.1,0,286.1,41
55008,30.2455,81.6329,2003-12-14,Aqua,MODIS,8.2,0,289.2,37
55009,30.2470,81.6431,2003-12-14,Aqua,MODIS,10.0,0,286.7,58


In [48]:
daily_fires_df = daily_fires_df[daily_fires_df.confidence >= 50]

In [49]:
daily_fires_df['confidence'].unique()

array([ 57,  60,  80,  62,  81,  64,  63,  78,  56,  95,  73,  69,  58,
        86,  66,  77,  51,  67,  50,  52,  82,  59,  97,  53,  55,  74,
        75,  71,  72,  54,  70,  87,  76,  99,  85,  61,  68,  65,  88,
       100,  84,  79,  83,  92,  89,  94,  90,  91,  98,  96,  93])

In [50]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
0,26.3775,87.3273,2020-11-07,Aqua,MODIS,7.0,0,301.3,57
1,26.4124,87.2421,2019-02-04,Aqua,MODIS,6.7,0,297.4,60
2,26.4171,87.5787,2018-03-06,Terra,MODIS,19.3,0,303.1,80
4,26.4285,87.2503,2015-03-21,Terra,MODIS,7.0,0,303.1,62
5,26.4309,87.2452,2011-04-24,Terra,MODIS,43.0,0,298.4,81
...,...,...,...,...,...,...,...,...,...
55003,30.1793,80.8618,2016-12-16,Terra,MODIS,30.7,0,288.9,86
55004,30.1846,80.8731,2016-12-15,Aqua,MODIS,31.6,0,272.1,56
55005,30.1850,82.0371,2011-10-18,Terra,MODIS,26.2,0,287.2,85
55006,30.2113,81.9902,2012-06-29,Aqua,MODIS,10.3,0,306.9,56


In [51]:
daily_fires_df['acq_date'].min()

'2000-11-01'

In [52]:
daily_fires_df['acq_date'].max()

'2020-12-31'

In [53]:
daily_fires_df.describe()

Unnamed: 0,latitude,longitude,frp,type,bright_t31,confidence
count,40238.0,40238.0,40238.0,40238.0,40238.0,40238.0
mean,28.130197,83.232009,23.558335,0.000249,301.185633,69.580272
std,0.752369,2.100635,38.045217,0.022293,8.231211,12.217158
min,26.3775,80.0315,0.0,0.0,265.2,50.0
25%,27.5344,81.495525,9.5,0.0,294.9,60.0
50%,28.114,82.7318,14.3,0.0,302.1,69.0
75%,28.7229,84.813875,24.5,0.0,307.5,78.0
max,30.247,88.146,2321.2,2.0,328.0,100.0


In [54]:
daily_fires_df['acq_date'] = pd.to_datetime(daily_fires_df['acq_date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_fires_df['acq_date'] = pd.to_datetime(daily_fires_df['acq_date'])


In [55]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence
0,26.3775,87.3273,2020-11-07,Aqua,MODIS,7.0,0,301.3,57
1,26.4124,87.2421,2019-02-04,Aqua,MODIS,6.7,0,297.4,60
2,26.4171,87.5787,2018-03-06,Terra,MODIS,19.3,0,303.1,80
4,26.4285,87.2503,2015-03-21,Terra,MODIS,7.0,0,303.1,62
5,26.4309,87.2452,2011-04-24,Terra,MODIS,43.0,0,298.4,81
...,...,...,...,...,...,...,...,...,...
55003,30.1793,80.8618,2016-12-16,Terra,MODIS,30.7,0,288.9,86
55004,30.1846,80.8731,2016-12-15,Aqua,MODIS,31.6,0,272.1,56
55005,30.1850,82.0371,2011-10-18,Terra,MODIS,26.2,0,287.2,85
55006,30.2113,81.9902,2012-06-29,Aqua,MODIS,10.3,0,306.9,56


In [56]:
daily_fires_df['year'] = daily_fires_df.acq_date.dt.year

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_fires_df['year'] = daily_fires_df.acq_date.dt.year


In [57]:
daily_fires_df['month'] = daily_fires_df.acq_date.dt.month

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_fires_df['month'] = daily_fires_df.acq_date.dt.month


In [58]:
daily_fires_df

Unnamed: 0,latitude,longitude,acq_date,satellite,instrument,frp,type,bright_t31,confidence,year,month
0,26.3775,87.3273,2020-11-07,Aqua,MODIS,7.0,0,301.3,57,2020,11
1,26.4124,87.2421,2019-02-04,Aqua,MODIS,6.7,0,297.4,60,2019,2
2,26.4171,87.5787,2018-03-06,Terra,MODIS,19.3,0,303.1,80,2018,3
4,26.4285,87.2503,2015-03-21,Terra,MODIS,7.0,0,303.1,62,2015,3
5,26.4309,87.2452,2011-04-24,Terra,MODIS,43.0,0,298.4,81,2011,4
...,...,...,...,...,...,...,...,...,...,...,...
55003,30.1793,80.8618,2016-12-16,Terra,MODIS,30.7,0,288.9,86,2016,12
55004,30.1846,80.8731,2016-12-15,Aqua,MODIS,31.6,0,272.1,56,2016,12
55005,30.1850,82.0371,2011-10-18,Terra,MODIS,26.2,0,287.2,85,2011,10
55006,30.2113,81.9902,2012-06-29,Aqua,MODIS,10.3,0,306.9,56,2012,6


In [59]:
daily_fires_df['acq_date'].min()

Timestamp('2000-11-01 00:00:00')

In [60]:
daily_fires_df['acq_date'].max()

Timestamp('2020-12-31 00:00:00')

In [61]:
daily_fires_df.to_csv('nepal_daily_fire_2000_2020.csv', index=False)
#ca_daily_fire.to_csv('ca_daily_fire_2012_2020.csv.gz', index=False, compression='gzip')

In [62]:
!ls -la 

total 378384
drwxr-xr-x@ 16 avkash  staff        512 Mar 26 13:29 [1m[36m.[m[m
drwxr-xr-x  12 avkash  staff        384 Mar 26 12:49 [1m[36m..[m[m
-rw-r--r--@  1 avkash  staff       6148 Mar 26 10:30 .DS_Store
drwxr-xr-x   6 avkash  staff        192 Mar 26 10:03 [1m[36m.ipynb_checkpoints[m[m
-rw-r--r--   1 avkash  staff        623 Mar 26 12:47 README.md
-rw-r--r--   1 avkash  staff   14155994 Mar 26 13:25 ca_daily_fire_2000_2020.csv
-rw-r--r--   1 avkash  staff    3171046 Mar 26 13:25 ca_daily_fire_2012_2020.csv.gz
-rw-r--r--@  1 avkash  staff      25423 Mar 26 11:46 california.png
-rw-r--r--   1 avkash  staff    2455258 Mar 26 13:29 nepal_daily_fire_2000_2020.csv
-rw-r--r--   1 avkash  staff       3823 Mar 26 13:01 show_wildfire_map.ipynb
-rw-r--r--   1 avkash  staff  109378508 Mar 26 13:19 usa_daily_fire_2000_2021.csv
-rw-r--r--   1 avkash  staff   26457612 Mar 26 13:18 usa_daily_fire_2000_2021.csv.gz
-rw-r--r--   1 avkash  staff   26656065 Mar 26 08:32 usa_da