# Fire Information Report

Using GeoPandas to show heating up: Satellite imagery shows hotspots in and around northern Thailand.

![](https://github.com/kaopanboonyuen/GISTDA2023/raw/main/img/FIR_logo2.jpeg)

In [None]:
# !pip install folium >> tmp
!pip install geopandas >> tmp

In [None]:
!wget https://github.com/kaopanboonyuen/GISTDA2023/raw/main/dataset/202202_FEB_FIR_SHP.zip

In [None]:
!unzip 202202_FEB_FIR_SHP.zip >> tmp

# Data Sample 

In [191]:
import geopandas as gpd
import warnings
warnings.filterwarnings("ignore")
# Read the shapefile

shapefile_path = "202202_FEB_FIR_SHP/20220214_FIR_SHP/20220214_FIR_SHP.shp"
gdf = gpd.read_file(shapefile_path)

transformed_gdfs = []
transformed_gdfs.append(gdf)

# Convert the GeoDataFrame to a DataFrame
df = gdf.drop(columns="geometry")

# Display the DataFrame
df.head()

Unnamed: 0,NAME,GM_LAYER,GM_TYPE,MAP_NAME,LATITUDE,LONGITUDE,NUMBER,DISTRICT,PROVINCE,PRIORITY,...,POINT_SYMB,FONT_NAME,FONT_SIZE,FONT_COLOR,FONT_STYLE,FONT_CHARS,FONT_WEIGH,POINT_SYM1,FONT_CHAR1,FONT_WEIG1
0,1J,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,Province_FIR_1_3_9_SHP.shp,18.56364,98.31892,1J,Mae Chaem,CHIANG MAI,*,...,Dot - Red,Times New Roman,10.0,"RGB(0,0,0)","OPAQUE,",1.0,400.0,,,
1,1L,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,Province_FIR_1_3_9_SHP.shp,18.48861,98.34169,1L,Mae Chaem,CHIANG MAI,*,...,Dot - Red,Times New Roman,10.0,"RGB(0,0,0)","OPAQUE,",1.0,400.0,,,
2,1K,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,Province_FIR_1_3_9_SHP.shp,18.50445,98.40274,1K,Mae Chaem,CHIANG MAI,*,...,Dot - Red,Times New Roman,10.0,"RGB(0,0,0)","OPAQUE,",1.0,400.0,,,
3,1R,J1_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,Province_FIR_1_3_9_SHP.shp,18.2313,98.42541,1R,Hot,CHIANG MAI,*,...,Dot - Red,Times New Roman,10.0,"RGB(0,0,0)","OPAQUE,",1.0,400.0,,,
4,1N,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,Province_FIR_1_3_9_SHP.shp,18.25719,98.54907,1N,Chomthong,CHIANG MAI,*,...,Dot - Red,Times New Roman,10.0,"RGB(0,0,0)","OPAQUE,",1.0,400.0,,,


# Import Data using GeoPandas: (1 Month: Feb/2022)

In [192]:
import os
import geopandas as gpd
import pandas as pd

# Root directory containing all folders with shapefiles
root_dir = "202202_FEB_FIR_SHP/"

# Initialize an empty list to store GeoDataFrames
gdfs = []

# Recursive function to search for shapefiles and combine them
def search_shapefiles(folder):
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith(".shp"):
                # Read the shapefile and append to the list
                shapefile_path = os.path.join(root, file)
                gdf = gpd.read_file(shapefile_path)
                if gdf.crs != transformed_gdfs[0].crs:
                  # Transform the CRS to match the first GeoDataFrame
                  gdf = gdf.to_crs(transformed_gdfs[0].crs)

                transformed_gdfs.append(gdf)
                gdfs.append(gdf)
                
        for dir in dirs:
            # Recursively search in subfolders
            search_shapefiles(os.path.join(root, dir))

# Start the search from the root directory
search_shapefiles(root_dir)

# Combine all GeoDataFrames into a single DataFrame
combined_df = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))

# Display the combined DataFrame

combined_df.head()

Unnamed: 0,NAME,GM_LAYER,GM_TYPE,LATITUDE,LONGITUDE,BRIGHT_TI4,SCAN,TRACK,ACQ_DATE,ACQ_TIME,...,PIORITY,PROVICE,NUMBER,PROVINC,Piority,KML_STYLE,ELEVATION,DISTRICR,TAGET,TAEGET
0,2A,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,19.35221,97.983,334.65,0.42,0.37,2/18/2022,650,...,,,,,,,,,,
1,11A,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,15.50836,101.16861,336.68,0.53,0.42,2/18/2022,648,...,,,,,,,,,,
2,11B,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,15.41239,101.22559,304.96,0.45,0.39,2/19/2022,1902,...,,,,,,,,,,
3,1C,J1_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,18.82367,98.11649,303.97,0.4,0.6,2/14/2022,1944,...,,,,,,,,,,
4,1D,SUOMI_VIIRS_C2_SouthEast_Asia_24h,Unknown Point Feature,18.37741,98.26976,353.29,0.51,0.41,2/14/2022,624,...,,,,,,,,,,


# Exploratory Data Analysis (EDA)

In [193]:
combined_df.columns

Index(['NAME', 'GM_LAYER', 'GM_TYPE', 'LATITUDE', 'LONGITUDE', 'BRIGHT_TI4',
       'SCAN', 'TRACK', 'ACQ_DATE', 'ACQ_TIME', 'SATELLITE', 'CONFIDENCE',
       'VERSION', 'BRIGHT_TI5', 'FRP', 'DAYNIGHT', 'layer', 'path', 'LAYER_2',
       'MAP_NAME', 'Shape_Leng', 'Shape_Area', 'TAMBON_EN', 'TAMBON_PCO',
       'ADM3_REF', 'ADM3ALT1EN', 'ADM3ALT2EN', 'ADM3ALT1TH', 'ADM3ALT2TH',
       'DISTRIC_EN', 'DISTRIC_PC', 'PROVINC_EN', 'PROVINC_PC', 'NATION_EN',
       'NATION_PCO', 'date', 'validOn', 'validTo', 'PRIORITY', 'TRAGET',
       'AREA', 'POINT_SYMB', 'FONT_NAME', 'FONT_SIZE', 'FONT_COLOR',
       'FONT_STYLE', 'FONT_CHARS', 'FONT_WEIGH', 'geometry', 'POINT_SYM1',
       'FONT_CHAR1', 'FONT_WEIG1', 'PROVING', 'DISTRICT', 'TARGET', 'PROVINCE',
       'LAYER', 'AREA_EN', 'AREA_TH', 'LAT', 'LON', 'TAMBON_TH', 'DISTRIC_TH',
       'PROVINC_TH', 'NATION_TH', 'FONT_PLACE', 'PIORITY', 'PROVICE', 'NUMBER',
       'PROVINC', 'Piority', 'KML_STYLE', 'ELEVATION', 'DISTRICR', 'TAGET',
       'TAEG

In [194]:
combined_df_2 = combined_df[['ACQ_DATE','LATITUDE','LONGITUDE','BRIGHT_TI4','CONFIDENCE',
       'SCAN', 'TRACK','DISTRIC_EN','PROVINC_EN','SATELLITE','TRAGET']]

combined_df_2.head(5)

Unnamed: 0,ACQ_DATE,LATITUDE,LONGITUDE,BRIGHT_TI4,CONFIDENCE,SCAN,TRACK,DISTRIC_EN,PROVINC_EN,SATELLITE,TRAGET
0,2/18/2022,19.35221,97.983,334.65,nominal,0.42,0.37,Mueang Mae Hong Son,Mae Hong Son,N,2A
1,2/18/2022,15.50836,101.16861,336.68,nominal,0.53,0.42,Si Thep,Phetchabun,N,11A
2,2/19/2022,15.41239,101.22559,304.96,nominal,0.45,0.39,Si Thep,Phetchabun,N,11B
3,2/14/2022,18.82367,98.11649,303.97,nominal,0.4,0.6,Mae Chaem,Chiang Mai,1,1C
4,2/14/2022,18.37741,98.26976,353.29,nominal,0.51,0.41,Mae Chaem,Chiang Mai,N,1D


In [195]:
combined_df_2['ACQ_DATE'] = pd.to_datetime(combined_df_2['ACQ_DATE'])

In [196]:
combined_df_2.CONFIDENCE.value_counts()

nominal    2248
high         60
Name: CONFIDENCE, dtype: int64

In [197]:
combined_df_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2708 entries, 0 to 2707
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   ACQ_DATE    2308 non-null   datetime64[ns]
 1   LATITUDE    2308 non-null   float64       
 2   LONGITUDE   2308 non-null   float64       
 3   BRIGHT_TI4  2308 non-null   object        
 4   CONFIDENCE  2308 non-null   object        
 5   SCAN        2168 non-null   float64       
 6   TRACK       2168 non-null   float64       
 7   DISTRIC_EN  1738 non-null   object        
 8   PROVINC_EN  1738 non-null   object        
 9   SATELLITE   2308 non-null   object        
 10  TRAGET      264 non-null    object        
dtypes: datetime64[ns](1), float64(4), object(6)
memory usage: 232.8+ KB


# Convert Data Type

In [198]:
combined_df_2['BRIGHT_TI4'] = combined_df_2['BRIGHT_TI4'].astype(str).astype(float)

# Descriptive Statistics

In [199]:
combined_df_2.describe() # include='all'

Unnamed: 0,LATITUDE,LONGITUDE,BRIGHT_TI4,SCAN,TRACK
count,2308.0,2308.0,2308.0,2168.0,2168.0
mean,18.076143,99.238594,329.93175,0.457057,0.474982
std,1.255834,1.074736,17.896872,0.084335,0.114823
min,15.38064,97.36057,300.01,0.32,0.36
25%,17.12431,98.40138,309.64,0.39,0.37
50%,18.55942,98.87095,335.35,0.44,0.44
75%,19.00445,100.08144,342.11,0.51,0.55
max,20.04135,101.47044,367.0,0.78,0.78


In [200]:
combined_df_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2708 entries, 0 to 2707
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   ACQ_DATE    2308 non-null   datetime64[ns]
 1   LATITUDE    2308 non-null   float64       
 2   LONGITUDE   2308 non-null   float64       
 3   BRIGHT_TI4  2308 non-null   float64       
 4   CONFIDENCE  2308 non-null   object        
 5   SCAN        2168 non-null   float64       
 6   TRACK       2168 non-null   float64       
 7   DISTRIC_EN  1738 non-null   object        
 8   PROVINC_EN  1738 non-null   object        
 9   SATELLITE   2308 non-null   object        
 10  TRAGET      264 non-null    object        
dtypes: datetime64[ns](1), float64(5), object(5)
memory usage: 232.8+ KB


# Clean and Feature Selection

In [201]:
combined_df_2.isnull().sum()

ACQ_DATE       400
LATITUDE       400
LONGITUDE      400
BRIGHT_TI4     400
CONFIDENCE     400
SCAN           540
TRACK          540
DISTRIC_EN     970
PROVINC_EN     970
SATELLITE      400
TRAGET        2444
dtype: int64

In [202]:
combined_df_2.shape

(2708, 11)

# Select Rows Based on column Values (BRIGHT_TI4 & CONFIDENCE)

In [203]:
combined_df_3 = combined_df_2.loc[combined_df_2['BRIGHT_TI4'] >= 200]
combined_df_3 = combined_df_3.loc[(combined_df_3['CONFIDENCE'] == 'nominal') | (combined_df_3['CONFIDENCE'] == 'high')]

In [204]:
combined_df_3.shape

(2308, 11)

In [205]:
combined_df_3 = combined_df_3.dropna()

In [206]:
combined_df_3.isnull().sum()

ACQ_DATE      0
LATITUDE      0
LONGITUDE     0
BRIGHT_TI4    0
CONFIDENCE    0
SCAN          0
TRACK         0
DISTRIC_EN    0
PROVINC_EN    0
SATELLITE     0
TRAGET        0
dtype: int64

In [207]:
combined_df_3['ACQ_DATE'].value_counts()

2022-02-14    118
2022-02-13     86
2022-02-16     32
2022-02-15     10
2022-02-18      8
2022-02-17      8
2022-02-19      2
Name: ACQ_DATE, dtype: int64

# Save .csv file

In [208]:
combined_df_3.to_csv('FIR_SAMPLE_OUTPUT.csv')

# Data Visualization using Folium (Whole Month)

In [209]:
import pandas as pd
import folium

# Create a DataFrame from the dataset
df = combined_df_3

# Convert 'ACQ_DATE' column to datetime
df['ACQ_DATE'] = pd.to_datetime(df['ACQ_DATE'])

# Create a map centered on Thailand
m = folium.Map(location=[13.736717, 100.523186], zoom_start=6)

# Add markers for each data point
for index, row in df.iterrows():
    folium.Marker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        popup=f"Date: {row['ACQ_DATE'].strftime('%m/%d/%Y')}<br>Target: {row['TRAGET']}",
        icon=folium.Icon(color='red')
    ).add_to(m)

# Display the map
m

# Data Visualization using Folium (1 Day)

In [210]:
import pandas as pd
import folium

# Create a DataFrame from the dataset
df = combined_df_3

# Convert 'ACQ_DATE' column to datetime
df['ACQ_DATE'] = pd.to_datetime(df['ACQ_DATE'])

# Set the target date for filtering
target_date = pd.to_datetime('2/13/2022') # '2/13/2022', '2/14/2022', '2/15/2022', '2/16/2022'

# Create a map centered on Thailand
m = folium.Map(location=[13.736717, 100.523186], zoom_start=6)

# Add markers for each data point matching the target date
filtered_df = df[df['ACQ_DATE'] == target_date]
for index, row in filtered_df.iterrows():
    folium.Marker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        popup=f"Date: {row['ACQ_DATE'].strftime('%m/%d/%Y')}<br>Target: {row['TRAGET']}",
        icon=folium.Icon(color='red')
    ).add_to(m)

# Display the map
m
