In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import geopandas as gpd
import matplotlib as plt
from shapely.geometry import Point, Polygon
from sklearn.cluster import DBSCAN
from shapely.ops import unary_union
import plotly.graph_objects as go
from shapely.geometry import MultiPoint, box
from plotly.subplots import make_subplots
from datetime import datetime
import seaborn as sns

# fire_nrt_J1V

In [3]:
df1 = pd.read_csv('../data/DL_FIRE_J1V-C2_412809/fire_nrt_J1V-C2_412809.csv')
df1['acq_date'] = pd.to_datetime(df1['acq_date'])
df1 = df1.sort_values(by=['acq_date', 'acq_time'], ascending=True)
df1.tail()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
132312,-38.24463,-72.32137,326.92,0.56,0.51,2023-12-31,1804,1,VIIRS,l,2.0NRT,300.19,6.5,D
132313,-40.30667,-73.08473,325.26,0.32,0.55,2023-12-31,1804,1,VIIRS,l,2.0NRT,298.69,1.76,D
132314,-35.12209,-72.02786,331.66,0.5,0.49,2023-12-31,1806,1,VIIRS,n,2.0NRT,297.76,4.3,D
132315,-34.1073,-70.45391,348.43,0.4,0.44,2023-12-31,1806,1,VIIRS,n,2.0NRT,315.07,10.76,D
132316,-30.51879,-70.99274,339.99,0.39,0.44,2023-12-31,1808,1,VIIRS,l,2.0NRT,312.53,1.05,D


# fire_archive_SV

In [4]:
df2 = pd.read_csv('../data/DL_FIRE_SV-C2_412810/fire_archive_SV-C2_412810.csv')
df2['acq_date'] = pd.to_datetime(df2['acq_date'])
df2 = df2.sort_values(by=['acq_date', 'acq_time'], ascending=True)
df2.tail()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
281406,-27.41534,-70.260925,321.83,0.41,0.45,2022-08-31,518,N,VIIRS,n,1,282.26,1.44,N,2
281407,-36.182384,-70.92466,342.3,0.4,0.6,2022-08-31,1742,N,VIIRS,n,1,292.75,4.4,D,0
281408,-36.180378,-70.924461,330.96,0.4,0.6,2022-08-31,1742,N,VIIRS,n,1,294.31,4.41,D,0
281409,-22.317188,-68.883453,332.83,0.43,0.46,2022-08-31,1748,N,VIIRS,l,1,306.78,4.92,D,2
281410,-18.646139,-69.553741,335.64,0.43,0.46,2022-08-31,1748,N,VIIRS,n,1,306.21,0.65,D,0


In [5]:
df3 = pd.read_csv('../data/DL_FIRE_SV-C2_412810/fire_nrt_SV-C2_412810.csv')
df3['acq_date'] = pd.to_datetime(df3['acq_date'])
df3 = df3.sort_values(by=['acq_date', 'acq_time'], ascending=True)
df3.tail()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
64452,-34.10617,-70.45506,299.64,0.5,0.49,2024-01-01,616,N,VIIRS,n,2.0NRT,286.24,5.18,N
64453,-39.42022,-71.93173,305.23,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,266.92,4.84,N
64454,-39.42414,-71.93903,306.3,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,262.45,2.3,N
64455,-33.53647,-70.82634,323.98,0.47,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,285.25,2.7,N
64456,-39.41918,-71.94389,295.01,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,265.41,31.99,N


# fire_nrt_M

In [6]:
# Page 51 https://modis-fire.umd.edu/files/MODIS_C6_C6.1_Fire_User_Guide_1.0.pdf
def convert_confidence(confidence):
    if confidence >= 80:
        return 'h'
    elif 30 <= confidence < 80:
        return 'n'
    else:
        return 'l'

In [7]:
df4 = pd.read_csv('../data/DL_FIRE_M-C61_412808/fire_nrt_M-C61_412808.csv')
df4['acq_date'] = pd.to_datetime(df4['acq_date'])
df4['confidence'] = df4['confidence'].apply(convert_confidence)
df4 = df4.sort_values(by=['acq_date', 'acq_time'], ascending=True)
df4.tail()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
15643,-33.43597,-70.7997,327.71,1.13,1.06,2023-12-31,1853,Aqua,MODIS,n,6.1NRT,310.52,7.02,D
15644,-36.42322,-71.96901,316.84,1.39,1.17,2023-12-31,1853,Aqua,MODIS,n,6.1NRT,302.3,9.19,D
15645,-37.124,-72.01332,313.43,1.42,1.18,2023-12-31,1853,Aqua,MODIS,n,6.1NRT,300.73,8.74,D
15646,-39.41642,-71.92625,303.83,1.56,1.23,2024-01-01,557,Aqua,MODIS,n,6.1NRT,270.15,20.22,N
15647,-39.4129,-71.94388,317.72,1.57,1.23,2024-01-01,557,Aqua,MODIS,h,6.1NRT,271.05,39.34,N


In [8]:
df5 = pd.read_csv('../data/DL_FIRE_M-C61_412808/fire_archive_M-C61_412808.csv')
df5['acq_date'] = pd.to_datetime(df5['acq_date'])
df5['confidence'] = df5['confidence'].apply(convert_confidence)
df5 = df5.sort_values(by=['acq_date', 'acq_time'], ascending=True)
df5.tail()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
86269,-37.2499,-73.3129,309.9,1.4,1.2,2023-01-31,1459,Terra,MODIS,n,6.03,298.2,9.7,D,2
86270,-45.5774,-72.0422,305.2,2.2,1.4,2023-01-31,1501,Terra,MODIS,n,6.03,293.6,14.5,D,0
86271,-38.6721,-72.8112,334.4,2.4,1.5,2023-01-31,1938,Aqua,MODIS,h,6.03,300.5,101.9,D,0
86272,-38.6589,-72.7954,359.6,2.4,1.5,2023-01-31,1938,Aqua,MODIS,h,6.03,300.2,286.1,D,0
86273,-38.6648,-72.8215,330.5,2.4,1.5,2023-01-31,1938,Aqua,MODIS,h,6.03,300.6,83.9,D,0


In [9]:
# Filter data
df1['confidence'].value_counts()

n    111047
l     12444
h      8826
Name: confidence, dtype: int64

In [10]:
df2['confidence'].value_counts()

n    240988
h     21830
l     18593
Name: confidence, dtype: int64

In [11]:
df5['confidence'].value_counts()

n    50927
h    27866
l     7481
Name: confidence, dtype: int64

In [12]:
merged_df = pd.concat([df1, df2, df3, df4, df5], axis=0).sort_values(by=['acq_date', 'acq_time']).reset_index(drop=True)
merged_df.tail()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
580102,-34.10617,-70.45506,299.64,0.5,0.49,2024-01-01,616,N,VIIRS,n,2.0NRT,286.24,5.18,N,
580103,-39.42022,-71.93173,305.23,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,266.92,4.84,N,
580104,-39.42414,-71.93903,306.3,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,262.45,2.3,N,
580105,-33.53647,-70.82634,323.98,0.47,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,285.25,2.7,N,
580106,-39.41918,-71.94389,295.01,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,265.41,31.99,N,


In [13]:
merged_df = merged_df.drop_duplicates().reset_index(drop=True)

In [14]:
merged_df

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,-23.820446,-70.320282,301.51,0.74,0.76,2013-01-01,448,N,VIIRS,n,1,285.54,2.38,N,2.0
1,-23.823833,-70.318871,306.90,0.74,0.76,2013-01-01,448,N,VIIRS,n,1,285.80,2.33,N,2.0
2,-26.430983,-69.475632,299.73,0.58,0.70,2013-01-01,448,N,VIIRS,n,1,279.61,2.86,N,2.0
3,-32.760929,-71.476440,309.70,0.52,0.67,2013-01-01,448,N,VIIRS,n,1,285.42,2.50,N,3.0
4,-34.624073,-71.000023,319.97,0.44,0.63,2013-01-01,448,N,VIIRS,n,1,290.28,2.27,N,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580024,-34.106170,-70.455060,299.64,0.50,0.49,2024-01-01,616,N,VIIRS,n,2.0NRT,286.24,5.18,N,
580025,-39.420220,-71.931730,305.23,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,266.92,4.84,N,
580026,-39.424140,-71.939030,306.30,0.48,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,262.45,2.30,N,
580027,-33.536470,-70.826340,323.98,0.47,0.48,2024-01-01,616,N,VIIRS,n,2.0NRT,285.25,2.70,N,


In [15]:
api_cols = ['country_id','latitude','longitude','bright_ti4','scan','track','acq_date','acq_time','satellite','instrument','confidence','version','bright_ti5','frp','daynight']
#country_id,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight
#country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
#country_id,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight

In [16]:
merged_df['satellite'] = merged_df['satellite'].astype(str)
merged_df['confidence'] = merged_df['confidence'].astype(str)
merged_df['version'] = merged_df['version'].astype(str)

In [17]:
merged_df.to_parquet('../fires-api/db/fires_merged.parquet', index=None)

In [18]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 580029 entries, 0 to 580028
Data columns (total 15 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   latitude    580029 non-null  float64       
 1   longitude   580029 non-null  float64       
 2   brightness  580029 non-null  float64       
 3   scan        580029 non-null  float64       
 4   track       580029 non-null  float64       
 5   acq_date    580029 non-null  datetime64[ns]
 6   acq_time    580029 non-null  int64         
 7   satellite   580029 non-null  object        
 8   instrument  580029 non-null  object        
 9   confidence  580029 non-null  object        
 10  version     580029 non-null  object        
 11  bright_t31  580029 non-null  float64       
 12  frp         580029 non-null  float64       
 13  daynight    580029 non-null  object        
 14  type        367685 non-null  float64       
dtypes: datetime64[ns](1), float64(8), int64(1), object(