# 0. Setup

In [None]:
# General utilities
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
import matplotlib.colors as mcolors

# Geospatial tools
import geopandas as gpd
import folium

`geopandas` documentation: https://geopandas.org/en/stable/

# 1. Loading the shapefiles

Source of the data: https://agricultura.gencat.cat/ca/serveis/cartografia-sig/bases-cartografiques/boscos/incendis-forestals/incendis-forestals-format-shp/

More detailed explanation of the data fields: https://agricultura.gencat.cat/ca/serveis/cartografia-sig/bases-cartografiques/boscos/incendis-forestals/

In [20]:
# Get all shapefile paths
shapefile_pattern = "catalunya_fire_data/*/incendis*.shp"
shapefile_paths = glob.glob(shapefile_pattern)  # Return a list of all matching shapefiles with that pattern

# Sort to ensure consistent ordering
shapefile_paths.sort()

# Create a dictionary to store all GeoDataFrames
gdfs = {}

# Import each shapefile with dynamic naming
for i, shapefile_path in enumerate(shapefile_paths, 1):
    
    # Extract year from path for better naming
    year = shapefile_path.split('/')[-1].replace('incendis', '').replace('.shp', '')
    
    # Load the shapefile
    gdf = gpd.read_file(shapefile_path)
    
    # Store with dynamic name
    var_name = f"gdf_{year}"  # gdf_01, gdf_02, etc.
    gdfs[var_name] = gdf
    
    print(f"Loaded {var_name} (year {year}): {len(gdf)} fires")

print("\nAll GeoDataFrames loaded:")
print(gdfs.keys())

Loaded gdf_2014 (year 2014): 10 fires
Loaded gdf_2015 (year 2015): 13 fires
Loaded gdf_2016 (year 2016): 16 fires
Loaded gdf_2017 (year 2017): 35 fires
Loaded gdf_2018 (year 2018): 12 fires
Loaded gdf_2019 (year 2019): 27 fires
Loaded gdf_2020 (year 2020): 11 fires
Loaded gdf_2021 (year 2021): 22 fires
Loaded gdf_ (year ): 43 fires
Loaded gdf_2023 (year 2023): 22 fires

All GeoDataFrames loaded:
dict_keys(['gdf_2014', 'gdf_2015', 'gdf_2016', 'gdf_2017', 'gdf_2018', 'gdf_2019', 'gdf_2020', 'gdf_2021', 'gdf_', 'gdf_2023'])


In [21]:
# Note that the shapefile for 2022 did not have a 'year' in its name, so we change the naming separately
if 'gdf_2022' not in gdfs:
    gdfs['gdf_2022'] = gdfs['gdf_'].copy()
    # Remove the original without year
    del gdfs['gdf_']
    # Sort the keys again to maintain order
    gdfs = {k: gdfs[k] for k in sorted(gdfs.keys())}

print(gdfs.keys())

dict_keys(['gdf_2014', 'gdf_2015', 'gdf_2016', 'gdf_2017', 'gdf_2018', 'gdf_2019', 'gdf_2020', 'gdf_2021', 'gdf_2022', 'gdf_2023'])


# 2. Merging and cleaning

Let's merge and clean the shapefiles for easier manipulation. To do that, first we must check which features are available in each shapefile.

In [22]:
# Check the fields in each GeoDataFrame
for name, gdf in gdfs.items():
    print(f"{name} fields: {gdf.columns.tolist()}")

gdf_2014 fields: ['CODI_FINAL', 'MUNICIPI', 'DATA_INCEN', 'GRID_CODE', 'geometry']
gdf_2015 fields: ['CODI_FINAL', 'GRID_CODE', 'MUNICIPI', 'DATA_INCEN', 'geometry']
gdf_2016 fields: ['OBJECTID', 'CODI_FINAL', 'DATA_INCEN', 'GRID_CODE', 'MUNICIPI', 'geometry']
gdf_2017 fields: ['CODI_FINAL', 'DATA_INCEN', 'MUNICIPI', 'GRID_CODE', 'geometry']
gdf_2018 fields: ['CODI_FINAL', 'DATA_INCEN', 'MUNICIPI', 'GRID_CODE', 'geometry']
gdf_2019 fields: ['CODI_FINAL', 'DATA_INCEN', 'MUNICIPI', 'GRID_CODE', 'geometry']
gdf_2020 fields: ['CODI_FINAL', 'DATA_INCEN', 'MUNICIPI', 'GRID_CODE', 'geometry']
gdf_2021 fields: ['CODI_FINAL', 'DATA_INCEN', 'MUNICIPI', 'GRID_CODE', 'geometry']
gdf_2022 fields: ['CODI_FINAL', 'DATA_INCEN', 'MUNICIPI', 'GRID_CODE', 'geometry']
gdf_2023 fields: ['CODI_FINAL', 'DATA_INCEN', 'MUNICIPI', 'GRID_CODE', 'geometry']


All of the data frames have the following information:
- A code for denoting a fire (`CODI_FINAL`).
- The municipality where the fire occurred (`MUNICIPI`).
- The date when the fire was declared as active (`DATA_INCEN`).
- The geometry which, as is shown below, is stored as a polygon/multipolygon.

In [23]:
# Let's merge all GeoDataFrames into one, creating a field for the year
merged_gdf = gpd.GeoDataFrame()
for name, gdf in gdfs.items():
    year = name.split('_')[1]  # Extract year from the variable name
    gdf['year'] = year  # Add a new column for the year
    gdf = gdf.to_crs(epsg=4326)  # Ensure the same CRS for all GeoDataFrames: convert to WGS84 if not already (geometry for Copernicus)
    merged_gdf = pd.concat([merged_gdf, gdf], ignore_index=True)

# Remove the `OBJECTID` column only present in the 2016 data
if 'OBJECTID' in merged_gdf.columns:
    merged_gdf.drop(columns=['OBJECTID'], inplace=True)

# Also remove the `GRID_CODE` column which is not needed (uniform value across all years)
if 'GRID_CODE' in merged_gdf.columns:
    merged_gdf.drop(columns=['GRID_CODE'], inplace=True)

# Change naming of columns
merged_gdf = merged_gdf.rename(columns = {
    'CODI_FINAL': 'fire_code',
    'MUNICIPI': 'municip',
    'DATA_INCEN': 'date_activ'
})

merged_gdf.head()

Unnamed: 0,fire_code,municip,date_activ,geometry,year
0,2014080187,Tiana,26/01/14,"POLYGON ((2.27641 41.49769, 2.27648 41.49766, ...",2014
1,2014170069,Roses,22/10/14,"POLYGON ((3.22466 42.24001, 3.22464 42.24004, ...",2014
2,2014250067,Les,17/03/14,"MULTIPOLYGON (((0.70981 42.82654, 0.70987 42.8...",2014
3,2014250068,Sant Guim de la Plana,17/07/14,"POLYGON ((1.35591 41.7573, 1.35595 41.75733, 1...",2014
4,2014250069,Oliola,21/06/14,"POLYGON ((1.23054 41.91515, 1.23053 41.91515, ...",2014


In [26]:
# Save the clean and merged GeoDataFrame to a new shapefile
output_dir = os.path.join('catalunya_fire_data', 'clean_data')
os.makedirs(output_dir, exist_ok=True)  # Ensure the output directory exists
merged_gdf.to_file(
    filename = os.path.join(output_dir, 'catalunya_fires_cleaned.shp'), 
    driver = 'ESRI Shapefile'
)

# 3. Simple visualization

In [29]:
# Create a color palette for different years
years = merged_gdf['year'].unique()
colors = plt.cm.Set1(np.linspace(0, 1, len(years)))
year_colors = dict(zip(years, colors))

# Convert colors to hex format for folium
def rgb_to_hex(rgb):
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))

year_color_hex = {year: rgb_to_hex(color) for year, color in year_colors.items()}

# Create a folium map centered on Catalunya
center_lat = merged_gdf.geometry.centroid.y.mean()
center_lon = merged_gdf.geometry.centroid.x.mean()

m = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=8,
    tiles='OpenStreetMap'
)

# Add fires to the map, grouped by year
for year in years:
    year_data = merged_gdf[merged_gdf['year'] == year]
    
    # Create a feature group for each year
    fg = folium.FeatureGroup(name=f'Fires {year}')
    
    for idx, row in year_data.iterrows():
        # Create popup text with fire information
        popup_text = f"""
        <b>Fire Code:</b> {row['fire_code']}<br>
        <b>Municipality:</b> {row['municip']}<br>
        <b>Fire Activation Date:</b> {row['date_activ']}<br>
        <b>Year:</b> {row['year']}
        """
        
        # Add the polygon to the map
        folium.GeoJson(
            row['geometry'],
            style_function=lambda x, color=year_color_hex[year]: {
                'fillColor': color,
                'color': color,
                'weight': 2,
                'fillOpacity': 0.6,
                'opacity': 0.8
            },
            popup=folium.Popup(popup_text, max_width=300),
            tooltip=f"{row['municip']} - {year}"  # Add information when hovering
        ).add_to(fg)
    
    fg.add_to(m)

# Add layer control to toggle years on/off
folium.LayerControl().add_to(m)

# Create a legend
legend_html = '''
<div style="position: fixed; 
            bottom: 50px; left: 50px; width: 150px; height: auto; 
            background-color: white; border:2px solid grey; z-index:9999; 
            font-size:14px; padding: 10px">
<p><b>Fire Years</b></p>
'''

for year in sorted(years):
    color = year_color_hex[year]
    legend_html += f'<p><i class="fa fa-square" style="color:{color}"></i> {year}</p>'

legend_html += '</div>'

m.get_root().html.add_child(folium.Element(legend_html))

# Display the map
m


  center_lat = merged_gdf.geometry.centroid.y.mean()

  center_lon = merged_gdf.geometry.centroid.x.mean()
