In [None]:
import geopandas as gpd
import xarray as xr
import rioxarray as rxr
import numpy as np
import pandas as pd
import rasterio
from rasterio.transform import rowcol
from datetime import datetime

import os
import random


import matplotlib.pyplot as plt
from matplotlib import colors


In [None]:
# Paths to the data
path_ba = 'path-to-final-BA-data'
path_pa = "path-to-WDPA-netCDF-file" 
path_lc = 'path-to-final-LC-data'
path_clim = 'path-to-final-FWI-files'

## output
output_table = 'path-and-name-of-final-table'

## Sample BA

In [None]:
# Selecct BA pixels looping through all years
data = []
# case_limit_per_year = 2  # Limit to 1000 pixels per year
random.seed(42)

# Loop through each subfolder (e.g., 2001 to 2020)
for year_folder in sorted(os.listdir(path_ba)):  # Ensure consistent ordering
    year_path = os.path.join(path_ba, year_folder)
    if not os.path.isdir(year_path):
        continue  # Skip files or irrelevant content

    # Storage for all pixels in this year
    year_data = []

    # Loop through each file in the subfolder
    for file_name in sorted(os.listdir(year_path)):  # Sort for consistency
        if not file_name.endswith(".tif"):
            continue  # Skip non-raster files

        # Extract year and month from the file name
        year = file_name[:4]  # Assumes the year is the first 4 characters
        month = file_name[4:6]  # Assumes the month is the next 2 characters

        # Open the raster file
        file_path = os.path.join(year_path, file_name)
        with rasterio.open(file_path) as src:
            # Read the data and the transform (geolocation information)
            data_array = src.read(1)  # Assuming data is in the first band
            transform = src.transform

            # Loop through the array to find non-zero (burned area) pixels
            rows, cols = data_array.shape
            for row in range(rows):
                for col in range(cols):
                    burned_value = data_array[row, col]
                    if burned_value > 0:  # Check for burned area
                        # Convert array indices to lat/lon
                        lon, lat = rasterio.transform.xy(transform, row, col, offset='center')

                        # Append data to the year's list
                        year_data.append({
                            "Year": int(year),
                            "Month": int(month),
                            "Latitude": lat,
                            "Longitude": lon,
                            "Burned_Area": burned_value
                        })

    # Add the selected data to the main list
    data.extend(year_data)

# Convert the list of dictionaries to a pandas DataFrame
df = pd.DataFrame(data)

## Add PA information

In [None]:
### Copy df
df_pa = df.copy()


In [None]:
# Loop through each burned pixel in the DataFrame
PA_values = []  # To store PA values for each burned pixel
PA_year_values = []


for _, row in df_pa.iterrows():
    year = int(row['Year'])
    lat = row['Latitude']
    lon = row['Longitude']
    
    
    # Construct the filename for the previous year
    input_filename = f"name-of-WDPA-netCDF-file"
    input_file = os.path.join(path_pa, input_filename)
    
    
    # Open the dataset for the previous year
    pa_data = xr.open_dataset(input_file)
    
    
    # Extract the latitude and longitude indices
    lat_idx = np.abs(pa_data['Latitude'].values - lat).argmin()
    lon_idx = np.abs(pa_data['Longitude'].values - lon).argmin()
    
    # Extract the PA info for the corresponding pixel (previous year)
    pa_status = pa_data['PA_status'].isel(Latitude=lat_idx, Longitude=lon_idx).values
    pa_year = pa_data['PA_year'].isel(Latitude=lat_idx, Longitude=lon_idx).values
    
    # Append the PA value to the list
    PA_values.append(pa_status)
    PA_year_values.append(pa_year)

# Add the values to the original dataframe as a new column
df_pa['PA_status'] = PA_values
df_pa['PA_year'] = PA_year_values

## Add LC information

In [None]:
### copy dataframe
df_lc = df_pa.copy()

In [None]:
## LC class maximum value
# Loop through each burned pixel in the DataFrame
lccs_values = []  # To store lccs values for each burned pixel

for _, row in df_lc.iterrows():
    year = int(row['Year'])
    lat = row['Latitude']
    lon = row['Longitude']
    
    # Get the previous year for the LC data
    previous_year = year - 1
    
    # Determine the version based on the year
    if previous_year <= 2015:
        file_version = 'v2.0.7'
    else:
        file_version = 'v2.1.1'
    
    # Construct the filename for the previous year
    input_filename = f"ESACCI-LC-L4-LCCS-Map-300m-P1Y-{previous_year}-{file_version}_agg_1km.nc"
    input_file = os.path.join(path_lc, input_filename)
    
    # Check if the file exists
    if not os.path.exists(input_file):
        print(f"File for {previous_year} not found: {input_file}")
        lccs_values.append(np.nan)  # Append NaN if file not found
        continue
    
    # Open the dataset for the previous year
    lc_data = xr.open_dataset(input_file)
    
    # Check if 'lccs_class_new' exists in the dataset
    if 'lccs_class_new' not in lc_data:
        print(f"Variable 'lccs_class_new' not found in {input_file}")
        lccs_values.append(np.nan)  # Append NaN if variable not found
        continue
    
    # Extract the latitude and longitude indices
    lat_idx = np.abs(lc_data['lat'].values - lat).argmin()
    lon_idx = np.abs(lc_data['lon'].values - lon).argmin()
    
    # Extract the LC class for the corresponding pixel (previous year)
    lc_class = lc_data['lccs_class_new'].isel(time=0, lat=lat_idx, lon=lon_idx).values
    
    # Append the LC class value to the list
    lccs_values.append(lc_class)

# Add the lccs_class_new values to the original dataframe as a new column
df_lc['lccs'] = lccs_values

## Add FWI max

In [None]:
### copy dataframe
df_all = df_lc.copy()

In [None]:
# Add temperature statistics to the DataFrame
def add_FWI(df, clim_data_path):
    # Iterate over unique years in the burned area data
    for year in df['Year'].unique():
        # Open the NetCDF file for the corresponding year
        clim_file = os.path.join(clim_data_path, f"ERA5_Land_FWI_1km_{year}.nc")  # Example naming pattern
        if not os.path.exists(clim_file):
            print(f"Data for year {year} not found!")
            continue

        with xr.open_dataset(clim_file) as ds:
            # Assume the dataset has variables 'temperature', 'lat', 'lon', and 'time'
            fwi = ds['fwi']
            time = pd.to_datetime(ds['valid_time'].values)

            # Loop through each row in the DataFrame for this year
            for i, row in df[df['Year'] == year].iterrows():
                month = int(row['Month'])  # Burned area month
                target_lat, target_lon = row['Latitude'], row['Longitude']

                
                # FWI
                nearest_fwi = fwi.sel(
                    latitude=target_lat, longitude=target_lon, method="nearest"
                )


                # Filter by the month
                month_days = (time.month == month)
                monthly_fwi = nearest_fwi[month_days].values

                # Calculate statistics if there is valid data
                if monthly_fwi.size > 0:
                    df.loc[i, 'fwi_mean'] = np.mean(monthly_fwi)
                    df.loc[i, 'fwi_max'] = np.max(monthly_fwi)
                else:
                    # Fill with NaN if no data is available
                    df.loc[i, ['fwi_mean', 'fwi_max']] = [np.nan] * 4


    

    return df
    

# Apply the function to the DataFrame
df_all = add_FWI(df_all, path_clim)

## Filter and clean

In [None]:
## filter the data
df_all["PA_year"] = df_all["PA_year"].fillna(0)
df_all["PA_year"] = df_all["PA_year"].astype(int)
df_all["Year"] = df_all["Year"].astype(int)
df_all["Month"] = df_all["Month"].astype(int)
df_all["lccs"] = df_all["lccs"].astype(int)
# Filter rows where lcc is 1, 2, or 3 (only forest)
# df_filtered = df_all[df_all["lcc"].isin([1, 2, 3])]

# Remove rows where Year (year of fire) - PA_year is negative (retain rows with NaN in PA_year) - Protection before the fire started is kept
df_filtered = df_all[(df_all["Year"] - df_all["PA_year"]) >= 0]

# Filter rows where lcc is 1, 2, or 3 (only forest)
df_forest = df_forest[df_forest["lccs"].isin([1, 2, 3])]

In [None]:
## save as csv table
df_forest.to_csv(output_table)