# Koala Habitat Fire‑Risk Analysis

This notebook provides a **step‑by‑step, for analysing the overlap between koala habitat suitability and the Forest Fire Danger Index (FFDI) from 2013‑2023.  

The analysis is organised into four clear sections:

1. **Categorise** each koala polygon by mean FFDI and habitat suitability  
2. **Cluster** polygons by their mean FFDI and visualise the spatial distribution  
3. **Box‑plot** monthly FFDI for the 15 largest high‑suitability polygons  
4. **Heat‑map** the percentage of each polygon that is under high risk (FFDI ≥ 12)



## Section 1 – Categorise Koala Habitat by Mean FFDI and Suitability

In [None]:
"""
Section 1 – Categorise Koala Habitat by Mean FFDI and Suitability

This cell performs the entire workflow for section 1 – categorise koala habitat by mean ffdi and suitability.

"""

import geopandas as gpd
import os
import pandas as pd
from rasterstats import zonal_stats
from tqdm import tqdm
import numpy as np

# === USER INPUT ===
shapefile_path = "C:/Users/gades/Documents/Koala_final.shp"
suitability_field = "suitabilit"  # 1 = low, 2 = high
ffdi_folder = "C:/Users/gades/Desktop/Thesis/datasets/terra_var/FFDI_finaldf90"
output_path = "C:/Users/gades/Desktop/Thesis/koala_categorized.shp"

# === 1. Load polygon data ===
gdf = gpd.read_file(shapefile_path)
gdf = gdf.to_crs("EPSG:4326")  # Match raster CRS

# === 2. List and sort FFDI raster files ===
ffdi_files = sorted([
    os.path.join(ffdi_folder, f) for f in os.listdir(ffdi_folder) if f.endswith(".tif")
])
dates = [os.path.basename(f).replace(".tif", "").replace("ffdi_", "") for f in ffdi_files]

# === 3. Zonal statistics: mean FFDI per polygon per month ===
mean_records = []
print("🔄 Calculating mean FFDI for all polygons over all months...")

for date, tif in tqdm(zip(dates, ffdi_files), total=len(ffdi_files)):
    stats = zonal_stats(gdf, tif, stats=['mean'], nodata=-9999)
    mean_values = [s['mean'] if s['mean'] is not None else np.nan for s in stats]
    mean_records.append(mean_values)

# === 4. Create DataFrame of means ===
mean_ffdi_df = pd.DataFrame(mean_records, index=dates).T  # shape: (polygons, dates)
gdf["mean_ffdi"] = mean_ffdi_df.mean(axis=1, skipna=True)

# === 5. Assign default class if values are missing ===
def classify(row):
    suit = row[suitability_field]
    ffdi = row["mean_ffdi"]

    # Fill missing suitability with 1 (Low) and missing FFDI with median of all polygons
    if pd.isna(suit):
        suit = 1
    if pd.isna(ffdi):
        ffdi = gdf["mean_ffdi"].median()

    suit = int(suit)

    if suit == 1 and ffdi <= 12:
        return "Low Suitability – Low Risk"
    elif suit == 1 and ffdi > 12:
        return "Low Suitability – High Risk"
    elif suit == 2 and ffdi <= 12:
        return "High Suitability – Low Risk"
    elif suit == 2 and ffdi > 12:
        return "High Suitability – High Risk"
    else:
        return "Low Suitability – Low Risk"  # Safe fallback

# === 6. Apply classification
gdf["RiskSuitabilityCategory"] = gdf.apply(classify, axis=1)

# === 7. Summary
category_counts = gdf["RiskSuitabilityCategory"].value_counts()
print("\n Category counts:")
print(category_counts)

# === 8. Save as .shp
gdf.to_file(output_path, driver="ESRI Shapefile")
print(f"\n Shapefile saved to: {output_path}")


## Section 2 – K‑means Clustering of Polygons by Mean FFDI

In [None]:
"""
Section 2 – K‑means Clustering of Polygons by Mean FFDI

This cell performs the entire workflow for section 2 – k‑means clustering of polygons by mean ffdi.

"""

import geopandas as gpd
import rasterio
import rasterio.mask
import os
import pandas as pd
import matplotlib.pyplot as plt
from rasterstats import zonal_stats
from sklearn.cluster import KMeans
from tqdm import tqdm

# === USER INPUT ===
gpkg_path = r"C:\Users\gades\Documents\Koala.shp"
ffdi_folder = r"C:\Users\gades\Desktop\Thesis\datasets\terra_var\FFDI_finaldf90"
n_clusters = 3
output_shapefile = gpkg_path.replace(".shp", "_with_clusters.shp")
output_plot = r"C:\Users\gades\Desktop\koala_cluster_boxplot.png"

# === 1. Load the vector data ===
gdf = gpd.read_file(gpkg_path)
gdf = gdf.to_crs("EPSG:4326")

# === 2. List FFDI raster files ===
ffdi_files = sorted([
    os.path.join(ffdi_folder, f) for f in os.listdir(ffdi_folder)
    if f.endswith(".tif") and f.startswith("ffdi_")
])
dates = [os.path.basename(f).replace(".tif", "").replace("ffdi_", "") for f in ffdi_files]
dates = pd.to_datetime(dates)

# === 3. Compute mean FFDI for clustering ===
print("Computing mean FFDI for clustering...")
mean_ffdi_per_polygon = []

for tif in tqdm(ffdi_files):
    stats = zonal_stats(gdf, tif, stats=['mean'], nodata=-9999)
    mean_ffdi_per_polygon.append([s['mean'] if s['mean'] is not None else 0 for s in stats])

ffdi_mean_df = pd.DataFrame(mean_ffdi_per_polygon, index=dates).T

# === 4. Cluster polygons based on their mean time series ===
print("Clustering polygons...")
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
gdf['cluster'] = kmeans.fit_predict(ffdi_mean_df) + 1  # convert 0,1,2 to 1,2,3

# === 5. Collect pixel-level FFDI values per cluster over time ===
print("Collecting all pixel values for boxplot...")
cluster_pixel_values_by_date = {c: [[] for _ in range(len(dates))] for c in range(1, n_clusters + 1)}

for i, tif in enumerate(tqdm(ffdi_files)):
    with rasterio.open(tif) as src:
        for idx, geom in enumerate(gdf.geometry):
            try:
                out_image, _ = rasterio.mask.mask(src, [geom], crop=True, nodata=-9999)
                data = out_image[0]
                values = data[data != -9999]
                cluster = gdf.iloc[idx]['cluster']
                cluster_pixel_values_by_date[cluster][i].extend(values.tolist())
            except Exception:
                continue

# === 6. Create and save boxplots ===
print("Plotting and saving boxplot...")
cluster_colors = ['red', 'yellow', 'blue']
plt.figure(figsize=(24, 12))

positions = []
box_data = []
colors = []
legend_labels = []

for t_idx in range(len(dates)):
    for c in range(1, n_clusters + 1):
        box_data.append(cluster_pixel_values_by_date[c][t_idx])
        positions.append(t_idx * (n_clusters + 1) + (c - 1))
        colors.append(cluster_colors[c - 1])
        if t_idx == 0:  # For legend
            legend_labels.append(f"Cluster {c}")

bp = plt.boxplot(box_data, positions=positions, patch_artist=True, widths=1.0, showfliers=False)
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

# Add FFDI threshold line
plt.axhline(y=12, color='green', linestyle='--', linewidth=2, label='FFDI = 12 threshold')

# Legend
for c_idx, label in enumerate(legend_labels):
    plt.plot([], c=cluster_colors[c_idx], label=label)
plt.legend(loc="upper left", fontsize=12)

# Set x-axis ticks to yearly intervals only
years = sorted(set(d.year for d in dates))
year_positions = [i * (n_clusters + 1) * 12 + 1 for i in range(len(years))]
plt.xticks(ticks=year_positions, labels=years, fontsize=12)

plt.title("Monthly FFDI Distributions by Koala Cluster (2013–2023)", fontsize=16)
plt.xlabel("Year", fontsize=14)
plt.ylabel("FFDI", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.tight_layout()
plt.savefig(output_plot, dpi=300)
plt.show()

# === 7. Save clustered spatial map ===
print(f"Saving spatial clustered map to: {output_shapefile}")
gdf.to_file(output_shapefile)

print(" Done!")


## Section 3 – Monthly FFDI Box‑plots for the 15 Largest High‑Suitability Polygons

In [None]:
"""
Section 3 – Monthly FFDI Box‑plots for the 15 Largest High‑Suitability Polygons

This cell performs the entire workflow for section 3 – monthly ffdi box‑plots for the 15 largest high‑suitability polygons.

"""

import geopandas as gpd
import rasterio
import rasterio.mask
import os
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

# === USER INPUT ===
gpkg_path = r"C:\Users\gades\Documents\Koala.shp"
ffdi_folder = r"C:\Users\gades\Desktop\Thesis\datasets\terra_var\FFDI_finaldf90"
output_folder = "."

# === 1. Load vector data ===
gdf = gpd.read_file(gpkg_path)
gdf = gdf.to_crs("EPSG:4326")

# === 2. Filter for suitability class 2 ===
suit_col = [col for col in gdf.columns if "suit" in col.lower()][0]
suit2_gdf = gdf[gdf[suit_col] == 2].copy()

# === 3. Calculate area and select largest 15 polygons ===
suit2_gdf["area_km2"] = suit2_gdf.geometry.to_crs(epsg=6933).area / 1e6  # km²
top15_gdf = suit2_gdf.sort_values("area_km2", ascending=False).head(15).reset_index(drop=True)

# === 4. List FFDI raster files ===
ffdi_files = sorted([
    os.path.join(ffdi_folder, f) for f in os.listdir(ffdi_folder)
    if f.endswith(".tif") and f.startswith("ffdi_")
])
dates = [os.path.basename(f).replace(".tif", "").replace("ffdi_", "") for f in ffdi_files]
dates = pd.to_datetime(dates)

# === 5. Extract pixel-level FFDI values per polygon per month ===
print("Extracting FFDI pixel values...")
polygon_pixel_ffdi = {i: [[] for _ in range(len(dates))] for i in range(15)}

for t_idx, tif in enumerate(tqdm(ffdi_files)):
    with rasterio.open(tif) as src:
        for i, geom in enumerate(top15_gdf.geometry):
            try:
                out_image, _ = rasterio.mask.mask(src, [geom], crop=True, nodata=-9999)
                data = out_image[0]
                values = data[data != -9999]
                polygon_pixel_ffdi[i][t_idx].extend(values.tolist())
            except Exception:
                continue

# === 6. Plot and Save in Two Portrait Pages ===
def plot_page(polygons, page_num, rows, cols):
    fig, axes = plt.subplots(rows, cols, figsize=(8.3, 11.7))  # A4 portrait in inches
    axes = axes.flatten()
    for i, poly_idx in enumerate(polygons):
        ax = axes[i]
        box_data = polygon_pixel_ffdi[poly_idx]
        bp = ax.boxplot(box_data, patch_artist=True, widths=0.5, showfliers=False)

        for box in bp['boxes']:
            box.set_facecolor("lightblue")
            box.set_edgecolor("blue")

        area_km2 = top15_gdf.loc[poly_idx, "area_km2"]
        ax.set_title(f"Polygon {poly_idx + 1} | Area: {area_km2:.2f} km²", fontsize=10)
        ax.grid(True, linestyle="--", alpha=0.5)
        ax.set_ylim(0, 50)  # Fixed Y-axis range for all subplots

        xticks = list(range(0, len(dates), 12))
        xtick_labels = [dates[j].strftime("%Y") for j in xticks]
        ax.set_xticks(xticks)
        ax.set_xticklabels(xtick_labels, rotation=45, fontsize=8)
        ax.tick_params(axis='y', labelsize=8)

    # Hide unused subplots
    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    fig.suptitle(f"FFDI Boxplots – Page {page_num}", fontsize=14)
    fig.text(0.5, 0.04, 'Year (Monthly Data)', ha='center', fontsize=12)
    fig.text(0.06, 0.5, 'FFDI', va='center', rotation='vertical', fontsize=12)
    plt.tight_layout(rect=[0.08, 0.06, 0.95, 0.94])

    out_path = os.path.join(output_folder, f"ffdi_boxplots_page{page_num}.png")
    plt.savefig(out_path, dpi=300)
    plt.close()
    print(f"Saved: {out_path}")

# === Split and plot: Page 1 (0-8), Page 2 (9-14) ===
plot_page(range(0, 9), page_num=1, rows=3, cols=3)
plot_page(range(9, 15), page_num=2, rows=3, cols=2)


## Section 4 – Heat‑map of High‑Risk Area (%) per Polygon

In [None]:
"""
Section 4 – Heat‑map of High‑Risk Area (%) per Polygon

This cell performs the entire workflow for section 4 – heat‑map of high‑risk area (%) per polygon.

"""

import geopandas as gpd
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import seaborn as sns
from rasterstats import zonal_stats
from tqdm import tqdm
import rasterio

# ------------------------
# 1. Input file paths
# ------------------------
shapefile_path = "C:/Users/gades/Documents/koala_Highsuitability.shp"
raster_folder = "C:/Users/gades/Desktop/Thesis/datasets/terra_var/FFDI_finaldf90"
output_dir = "C:/Users/gades/Desktop/Thesis/datasets/koala results/"
os.makedirs(output_dir, exist_ok=True)

# ------------------------
# 2. Load polygons and rasters
# ------------------------
gdf = gpd.read_file(shapefile_path)
raster_files = sorted(glob.glob(os.path.join(raster_folder, "ffdi_*.tif")))
assert len(raster_files) == 132, f"Expected 132 rasters, found {len(raster_files)}"

# Reproject polygons to match raster CRS
with rasterio.open(raster_files[0]) as src:
    raster_crs = src.crs
gdf = gdf.to_crs(raster_crs)

# Calculate area and sort polygons by area (descending)
gdf["area"] = gdf.geometry.area
gdf_sorted = gdf.sort_values(by="area", ascending=False).reset_index(drop=True)
gdf_sorted["Polygon_ID"] = [f"Polygon_{i+1}" for i in range(len(gdf_sorted))]

# ------------------------
# 3. Calculate % of area under high risk (FFDI ≥ 12)
# ------------------------
percentage_matrix = np.zeros((len(gdf_sorted), len(raster_files)))

for j, raster_path in enumerate(tqdm(raster_files, desc="Processing rasters")):
    stats = zonal_stats(
        gdf_sorted,
        raster_path,
        stats=None,
        categorical=True,
        nodata=None  # Set if known (e.g., -9999)
    )

    for i, stat in enumerate(stats):
        total_pixels = sum(stat.values())
        high_risk_pixels = sum([v for k, v in stat.items() if isinstance(k, (int, float)) and k >= 12])
        percentage_matrix[i, j] = (high_risk_pixels / total_pixels) * 100 if total_pixels > 0 else 0

# ------------------------
# 4. Format DataFrame for plotting
# ------------------------
polygon_labels = gdf_sorted["Polygon_ID"].tolist()
month_labels = pd.date_range("2013-01", periods=132, freq="MS").strftime("%Y-%m")
df_percent = pd.DataFrame(percentage_matrix, index=polygon_labels, columns=month_labels)

# ------------------------
# 5. Heatmap of % high-risk area
# ------------------------
plt.figure(figsize=(22, 10))
sns.heatmap(df_percent, cmap="YlOrRd", cbar_kws={'label': 'Area under FFDI ≥ 12 (%)'}, vmin=0, vmax=100)
plt.title("Heatmap: % Area under High Fire Risk per Koala Polygon (2013–2023)")
plt.xlabel("Month")
plt.ylabel("Polygon (Sorted by Area)")
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "heatmap_ffdi_percent_per_polygon_sorted_by_area.png"), dpi=300)
plt.close()

# ------------------------
# 6. Identify polygons with >50% high-risk in >6 months
# ------------------------
polygons_meeting_criteria = (df_percent > 50).sum(axis=1)
polygons_meeting_criteria = polygons_meeting_criteria[polygons_meeting_criteria > 6]

if not polygons_meeting_criteria.empty:
    print("Polygons with >50% area under high risk (FFDI ≥ 12) in >6 months:")
    for idx in polygons_meeting_criteria.index:
        original_index = gdf_sorted[gdf_sorted["Polygon_ID"] == idx].index[0]
        fid = gdf_sorted.loc[original_index, 'fid'] if 'fid' in gdf_sorted.columns else 'N/A'
        count = polygons_meeting_criteria[idx]
        print(f"  - {idx} (fid: {fid}): {count} months")
else:
    print("No polygons exceeded 50% high-risk area in more than 6 months.")
