In [28]:
import pandas as pd
import geopandas as gpd
import numpy as np
import h5py
from scipy.ndimage import label
import matplotlib.pyplot as plt
from shapely.geometry import Point
from scipy.ndimage import label


In [29]:
# Load the HDF5 file
fire_file_path = "../../../Calabria_dataset/InputReteGood/Target/2017/20170701.h5"

with h5py.File(fire_file_path, "r") as h5_file:
    fire_values_table = h5_file["values/table"][:]


In [None]:
# Extract Index and values
index_values = fire_values_table["index"]
fire_results = fire_values_table["values_block_0"].flatten()


In [31]:
# Get grid attributes
with h5py.File(fire_file_path, "r") as h5_file:
    attributes_table = h5_file["attributes/table"][:]

attribute_names = [attr[0].decode() for attr in attributes_table]
attribute_values = [attr[1][0] for attr in attributes_table]
attributes_dict = dict(zip(attribute_names, attribute_values))

ncols = int(attributes_dict["ncols"])
nrows = int(attributes_dict["nrows"])
cellsize = attributes_dict["cellsize"]
xllcorner = attributes_dict["xllcorner"]
yllcorner = attributes_dict["yllcorner"]


In [32]:
# Index to row/column
row_coords = index_values // ncols
col_coords = index_values % ncols


In [33]:
# Create 2D fire grid
fire_grid = np.zeros((nrows, ncols), dtype=int)

for row, col, fire in zip(row_coords, col_coords, fire_results):
    if fire == 1:
        fire_grid[row, col] = 1

fire_grid

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [34]:
# Label connected fire clusters
structure = np.array([[1, 1, 1],
                      [1, 1, 1],
                      [1, 1, 1]])

labeled_fire_grid, num_fires = label(fire_grid, structure=structure)
labeled_fire_grid

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)

In [35]:
# Extract fire cluster coordinates
fire_cluster_coords = []

for row in range(nrows):
    for col in range(ncols):
        cluster_id = labeled_fire_grid[row, col]
        if cluster_id > 0:
            x = xllcorner + (col * cellsize)
            y = yllcorner + ((nrows - 1 - row) * cellsize)  
            fire_cluster_coords.append((cluster_id, row, col, x, y))

fire_cluster_coords

[(np.int32(1), 248, 199, np.float32(569815.4), np.float32(4423679.0)),
 (np.int32(1), 248, 200, np.float32(569915.4), np.float32(4423679.0)),
 (np.int32(1), 249, 194, np.float32(569315.4), np.float32(4423579.0)),
 (np.int32(1), 249, 195, np.float32(569415.4), np.float32(4423579.0)),
 (np.int32(1), 249, 196, np.float32(569515.4), np.float32(4423579.0)),
 (np.int32(1), 249, 197, np.float32(569615.4), np.float32(4423579.0)),
 (np.int32(1), 249, 198, np.float32(569715.4), np.float32(4423579.0)),
 (np.int32(1), 249, 199, np.float32(569815.4), np.float32(4423579.0)),
 (np.int32(1), 249, 200, np.float32(569915.4), np.float32(4423579.0)),
 (np.int32(1), 249, 201, np.float32(570015.4), np.float32(4423579.0)),
 (np.int32(1), 250, 194, np.float32(569315.4), np.float32(4423479.0)),
 (np.int32(1), 250, 195, np.float32(569415.4), np.float32(4423479.0)),
 (np.int32(1), 250, 196, np.float32(569515.4), np.float32(4423479.0)),
 (np.int32(1), 250, 197, np.float32(569615.4), np.float32(4423479.0)),
 (np.i

In [36]:
# Create a DataFrame
df_fire_clusters = pd.DataFrame(
    fire_cluster_coords,
    columns=["Cluster_ID", "Row", "Column", "X_Coord", "Y_Coord"]
)

df_fire_clusters

Unnamed: 0,Cluster_ID,Row,Column,X_Coord,Y_Coord
0,1,248,199,569815.375,4423679.0
1,1,248,200,569915.375,4423679.0
2,1,249,194,569315.375,4423579.0
3,1,249,195,569415.375,4423579.0
4,1,249,196,569515.375,4423579.0
...,...,...,...,...,...
138,11,2202,154,565315.375,4228279.0
139,11,2203,151,565015.375,4228179.0
140,11,2203,152,565115.375,4228179.0
141,11,2203,153,565215.375,4228179.0


In [37]:
# Load Precomputed Cell-Zone
df_cell_zones = pd.read_parquet("../cell_zones.parquet")
df_cell_zones

Unnamed: 0,Row,Column,X_Coord,Y_Coord,geometry,Zone_ID
0,74,179,567815.375,4196779.0,b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0\x0eT!A\...,8.0
1,74,180,567915.375,4196779.0,b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0\xd6T!A\...,8.0
2,74,181,568015.375,4196779.0,b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0\x9eU!A\...,8.0
3,74,182,568115.375,4196779.0,b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0fV!A\x00...,8.0
4,74,183,568215.375,4196779.0,b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0.W!A\x00...,8.0
...,...,...,...,...,...,...
1507648,2550,712,621115.375,4444379.0,"b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0v\xf4""A\...",5.0
1507649,2550,713,621215.375,4444379.0,"b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0>\xf5""A\...",5.0
1507650,2551,711,621015.375,4444479.0,b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0\xae\xf3...,5.0
1507651,2551,712,621115.375,4444479.0,"b'\x01\x01\x00\x00\x00\x00\x00\x00\xc0v\xf4""A\...",5.0


In [38]:
df_fire_clusters["Y_Coord"] = df_fire_clusters["Y_Coord"].round(-0).astype(float).round(1)


In [39]:
# Merge fire cluster cells with zone info
df_fire_clusters_zones = df_fire_clusters.merge(
    df_cell_zones[["X_Coord", "Y_Coord", "Zone_ID"]],
    on=["X_Coord", "Y_Coord"],
    how="left"
)

df_fire_clusters_zones

Unnamed: 0,Cluster_ID,Row,Column,X_Coord,Y_Coord,Zone_ID
0,1,248,199,569815.375,4423679.0,1.0
1,1,248,200,569915.375,4423679.0,1.0
2,1,249,194,569315.375,4423579.0,1.0
3,1,249,195,569415.375,4423579.0,1.0
4,1,249,196,569515.375,4423579.0,1.0
...,...,...,...,...,...,...
138,11,2202,154,565315.375,4228279.0,4.0
139,11,2203,151,565015.375,4228179.0,4.0
140,11,2203,152,565115.375,4228179.0,4.0
141,11,2203,153,565215.375,4228179.0,4.0


In [40]:
# Dominant Zone_ID
df_dominant_zones = df_fire_clusters_zones.groupby("Cluster_ID")["Zone_ID"] \
    .agg(lambda x: x.mode().iloc[0]) \
    .reset_index() \
    .rename(columns={"Zone_ID": "Dominant_Zone_ID"})
    
df_dominant_zones


Unnamed: 0,Cluster_ID,Dominant_Zone_ID
0,1,1.0
1,2,5.0
2,3,2.0
3,4,2.0
4,5,2.0
5,6,3.0
6,7,7.0
7,8,3.0
8,9,3.0
9,10,4.0


In [41]:
# Merge Back Cluster
df_fire_clusters_zones = df_fire_clusters_zones.drop(columns=["Zone_ID"]).merge(
    df_dominant_zones, on="Cluster_ID", how="left"
)

df_fire_clusters_zones.rename(columns={"Dominant_Zone_ID": "Zone_ID"}, inplace=True)

df_fire_clusters_zones

Unnamed: 0,Cluster_ID,Row,Column,X_Coord,Y_Coord,Zone_ID
0,1,248,199,569815.375,4423679.0,1.0
1,1,248,200,569915.375,4423679.0,1.0
2,1,249,194,569315.375,4423579.0,1.0
3,1,249,195,569415.375,4423579.0,1.0
4,1,249,196,569515.375,4423579.0,1.0
...,...,...,...,...,...,...
138,11,2202,154,565315.375,4228279.0,4.0
139,11,2203,151,565015.375,4228179.0,4.0
140,11,2203,152,565115.375,4228179.0,4.0
141,11,2203,153,565215.375,4228179.0,4.0


In [42]:
# Count unique fire clusters per zone
all_zones = pd.DataFrame(df_cell_zones["Zone_ID"].unique(), columns=["Zone_ID"])

df_fire_counts = df_fire_clusters_zones[["Cluster_ID", "Zone_ID"]].drop_duplicates()
fires_per_zone = df_fire_counts.groupby("Zone_ID").size().reset_index(name="Num_Fires")

fires_per_zone


Unnamed: 0,Zone_ID,Num_Fires
0,1.0,1
1,2.0,3
2,3.0,3
3,4.0,2
4,5.0,1
5,7.0,1


In [44]:
# Merge with full zone list
df_zone_fire_counts = all_zones.merge(fires_per_zone, on="Zone_ID", how="left")
df_zone_fire_counts["Num_Fires"] = df_zone_fire_counts["Num_Fires"].fillna(0).astype(int)

df_zone_fire_counts = df_zone_fire_counts.sort_values("Zone_ID").reset_index(drop=True)

df_zone_fire_counts

Unnamed: 0,Zone_ID,Num_Fires
0,1.0,1
1,2.0,3
2,3.0,3
3,4.0,2
4,5.0,1
5,6.0,0
6,7.0,1
7,8.0,0


In [51]:
climate_file_path = "../../../Calabria_dataset/InputReteGood/Climatic/2017/20170701.h5"

with h5py.File(climate_file_path, "r") as h5_file:
    values_table = h5_file["values/table"][:]
    attributes_table = h5_file["attributes/table"][:]

In [52]:
# Parse climate attributes
attribute_names = [attr[0].decode() for attr in attributes_table]
attribute_values = [attr[1][0] for attr in attributes_table]
attributes_dict = dict(zip(attribute_names, attribute_values))
ncols = int(attributes_dict["ncols"])
nrows = int(attributes_dict["nrows"])
cellsize = attributes_dict["cellsize"]

In [53]:
# Index to row/column
index_values = values_table["index"]
climate_values = values_table["values_block_0"]

row_coords = index_values // ncols
col_coords = index_values % ncols

row_coords, col_coords

(array([   0,    0,    0, ..., 2591, 2591, 2591]),
 array([   0,    1,    2, ..., 1487, 1488, 1489]))

In [54]:
df_climate = pd.DataFrame({
    "Row": row_coords,
    "Column": col_coords,
    "Precipitation": climate_values[:, 0],
    "Humidity": climate_values[:, 1],
    "Temperature": climate_values[:, 2],
    "Wind": climate_values[:, 3],
    "X_Coord": attributes_dict["xllcorner"] + (col_coords * cellsize),
    "Y_Coord": attributes_dict["yllcorner"] + ((nrows - 1 - row_coords) * cellsize)
})

df_climate["Y_Coord"] = df_climate["Y_Coord"].round(-0).astype(float).round(1)


In [55]:
df_climate

Unnamed: 0,Row,Column,Precipitation,Humidity,Temperature,Wind,X_Coord,Y_Coord
0,0,0,,,,,549915.375,4448479.0
1,0,1,,,,,550015.375,4448479.0
2,0,2,,,,,550115.375,4448479.0
3,0,3,,,,,550215.375,4448479.0
4,0,4,,,,,550315.375,4448479.0
...,...,...,...,...,...,...,...,...
3862075,2591,1485,,,,,698415.375,4189379.0
3862076,2591,1486,,,,,698515.375,4189379.0
3862077,2591,1487,,,,,698615.375,4189379.0
3862078,2591,1488,,,,,698715.375,4189379.0


In [56]:
# Merge with zone mapping
df_climate_zone = df_cell_zones[["X_Coord", "Y_Coord", "Zone_ID"]].merge(
    df_climate,
    on=["X_Coord", "Y_Coord"],
    how="inner"
)

df_climate_zone

Unnamed: 0,X_Coord,Y_Coord,Zone_ID,Row,Column,Precipitation,Humidity,Temperature,Wind
0,567815.375,4196779.0,8.0,2517,179,0.0,43.453739,30.943409,19.785675
1,567915.375,4196779.0,8.0,2517,180,0.0,43.457977,30.943258,19.795025
2,568015.375,4196779.0,8.0,2517,181,0.0,43.463036,30.942766,19.804356
3,568115.375,4196779.0,8.0,2517,182,0.0,43.464634,30.943758,19.813671
4,568215.375,4196779.0,8.0,2517,183,0.0,43.467197,30.945190,19.827629
...,...,...,...,...,...,...,...,...,...
1507648,621115.375,4444379.0,5.0,41,712,0.0,34.937561,30.150095,10.675401
1507649,621215.375,4444379.0,5.0,41,713,0.0,34.943012,30.143555,10.676477
1507650,621015.375,4444479.0,5.0,40,711,0.0,34.956539,30.157795,10.669076
1507651,621115.375,4444479.0,5.0,40,712,0.0,34.956444,30.153513,10.670221


In [57]:
# Compute full-zone average climate
df_zone_climate_all = df_climate_zone.groupby("Zone_ID").agg({
    "Precipitation": "mean",
    "Humidity": "mean",
    "Temperature": "mean",
    "Wind": "mean"
}).reset_index()

df_zone_climate_all

Unnamed: 0,Zone_ID,Precipitation,Humidity,Temperature,Wind
0,1.0,0.0,41.732643,29.27515,8.326049
1,2.0,0.0,42.190063,29.053242,11.123259
2,3.0,0.0,53.883282,29.429832,12.137206
3,4.0,0.0,50.605591,29.052879,12.140365
4,5.0,0.0,38.871094,29.209106,10.119443
5,6.0,0.0,40.165001,29.268719,10.425837
6,7.0,0.0,50.282833,29.811609,11.557623
7,8.0,0.0,53.947163,28.506708,17.130785


In [58]:
#Fire Cluster Climate
df_fire_with_climate = df_fire_clusters_zones.merge(
    df_climate[["X_Coord", "Y_Coord", "Precipitation", "Humidity", "Temperature", "Wind"]],
    on=["X_Coord", "Y_Coord"],
    how="left"
)

df_fire_with_climate

Unnamed: 0,Cluster_ID,Row,Column,X_Coord,Y_Coord,Zone_ID,Precipitation,Humidity,Temperature,Wind
0,1,248,199,569815.375,4423679.0,1.0,0.0,43.345367,28.648363,8.005685
1,1,248,200,569915.375,4423679.0,1.0,0.0,43.636620,28.521614,7.992517
2,1,249,194,569315.375,4423579.0,1.0,0.0,42.325649,29.092976,8.053102
3,1,249,195,569415.375,4423579.0,1.0,0.0,42.565880,28.987972,8.040082
4,1,249,196,569515.375,4423579.0,1.0,0.0,42.694485,28.930569,8.027013
...,...,...,...,...,...,...,...,...,...,...
138,11,2202,154,565315.375,4228279.0,4.0,0.0,51.440411,28.032513,9.321663
139,11,2203,151,565015.375,4228179.0,4.0,0.0,50.777534,28.308563,9.314273
140,11,2203,152,565115.375,4228179.0,4.0,0.0,50.678192,28.348970,9.337703
141,11,2203,153,565215.375,4228179.0,4.0,0.0,50.806164,28.292408,9.361336


In [59]:
# Aggregate Climate per Fire Cluster
df_cluster_climate = df_fire_with_climate.groupby("Cluster_ID").agg({
    "Zone_ID": "first", 
    "Precipitation": "mean",
    "Humidity": "mean",
    "Temperature": "mean",
    "Wind": "mean"
}).reset_index()

df_cluster_climate


Unnamed: 0,Cluster_ID,Zone_ID,Precipitation,Humidity,Temperature,Wind
0,1,1.0,0.0,43.095226,28.756428,8.008166
1,2,5.0,0.0,37.303787,30.068119,10.616098
2,3,2.0,0.0,41.916607,26.785069,12.817098
3,4,2.0,0.0,43.344761,30.594194,9.224451
4,5,2.0,0.0,46.476936,29.829859,4.69345
5,6,3.0,0.0,44.323101,31.589273,4.313506
6,7,7.0,0.0,46.907806,29.704016,9.239685
7,8,3.0,0.0,50.760254,28.873072,7.256264
8,9,3.0,0.0,52.158352,29.835176,9.444869
9,10,4.0,0.0,53.314442,27.432812,8.266943


In [60]:
# Aggregate fire-cluster climate by Zone_ID
df_zone_fire_climate = df_cluster_climate.groupby("Zone_ID").agg({
    "Precipitation": "mean",
    "Humidity": "mean",
    "Temperature": "mean",
    "Wind": "mean"
}).reset_index()

df_zone_fire_climate


Unnamed: 0,Zone_ID,Precipitation,Humidity,Temperature,Wind
0,1.0,0.0,43.095226,28.756428,8.008166
1,2.0,0.0,43.912769,29.069708,8.911666
2,3.0,0.0,49.080566,30.099174,7.004879
3,4.0,0.0,52.326626,27.761681,8.744971
4,5.0,0.0,37.303787,30.068119,10.616098
5,7.0,0.0,46.907806,29.704016,9.239685


In [62]:
# Rename climate columns
df_zone_climate_all = df_zone_climate_all.rename(columns={
    "Precipitation": "Precipitation_all",
    "Humidity": "Humidity_all",
    "Temperature": "Temperature_all",
    "Wind": "Wind_all"
})

df_zone_fire_climate = df_zone_fire_climate.rename(columns={
    "Precipitation": "Precipitation_fire",
    "Humidity": "Humidity_fire",
    "Temperature": "Temperature_fire",
    "Wind": "Wind_fire"
})



In [63]:
# Merge climate sources into fire count table
df_zone_day = df_zone_fire_counts.merge(df_zone_climate_all, on="Zone_ID", how="left")
df_zone_day = df_zone_day.merge(df_zone_fire_climate, on="Zone_ID", how="left")

In [64]:
df_zone_day

Unnamed: 0,Zone_ID,Num_Fires,Precipitation_all,Humidity_all,Temperature_all,Wind_all,Precipitation_fire,Humidity_fire,Temperature_fire,Wind_fire
0,1.0,1,0.0,41.732643,29.27515,8.326049,0.0,43.095226,28.756428,8.008166
1,2.0,3,0.0,42.190063,29.053242,11.123259,0.0,43.912769,29.069708,8.911666
2,3.0,3,0.0,53.883282,29.429832,12.137206,0.0,49.080566,30.099174,7.004879
3,4.0,2,0.0,50.605591,29.052879,12.140365,0.0,52.326626,27.761681,8.744971
4,5.0,1,0.0,38.871094,29.209106,10.119443,0.0,37.303787,30.068119,10.616098
5,6.0,0,0.0,40.165001,29.268719,10.425837,,,,
6,7.0,1,0.0,50.282833,29.811609,11.557623,0.0,46.907806,29.704016,9.239685
7,8.0,0,0.0,53.947163,28.506708,17.130785,,,,


In [65]:
# Select correct climate source
for col in ["Precipitation", "Humidity", "Temperature", "Wind"]:
    df_zone_day[col] = df_zone_day.apply(
        lambda row: row[f"{col}_fire"] if row["Num_Fires"] > 0 and not pd.isna(row[f"{col}_fire"])
        else row[f"{col}_all"],
        axis=1
    )


In [66]:
df_zone_day = df_zone_day[["Zone_ID", "Precipitation", "Humidity", "Temperature", "Wind", "Num_Fires"]]
df_zone_day = df_zone_day.sort_values("Zone_ID").reset_index(drop=True)
df_zone_day

Unnamed: 0,Zone_ID,Precipitation,Humidity,Temperature,Wind,Num_Fires
0,1.0,0.0,43.095226,28.756428,8.008166,1
1,2.0,0.0,43.912769,29.069708,8.911666,3
2,3.0,0.0,49.080566,30.099174,7.004879,3
3,4.0,0.0,52.326626,27.761681,8.744971,2
4,5.0,0.0,37.303787,30.068119,10.616098,1
5,6.0,0.0,40.165001,29.268719,10.425837,0
6,7.0,0.0,46.907806,29.704016,9.239685,1
7,8.0,0.0,53.947163,28.506708,17.130785,0
