In [9]:
import os
import glob
import pandas as pd

def filter_lake_features_with_fire(input_folder, fire_folder, output_folder, fire_end_year=2019):
    os.makedirs(output_folder, exist_ok=True)

    # 🔹 Top 5 FireCCI land covers to include
    fire_cols_to_include = [
        "Grassland",
        "Tree_broad_deciduous",
        "Shrub_herb_flooded",
        "Shrubland",
        "Tree_needle_evergreen"
    ]

    # 🔹 Base columns to keep
    time_cols = ["Lake_ID", "Year", "Week", "chla_mean"]
    predictors = [
        "lswt_mean", "lake_mix_layer_temperature", "temperature_2m",
        "runoff_sum", "surface_runoff_sum", "total_precipitation_sum"
    ]
    lake_params = [
        "Lake_area", "Shore_len", "Shore_dev", "Vol_total", "Depth_avg",
        "Dis_avg", "Res_time", "Elevation", "Slope_100", "Wshd_area"
    ]
    selected_cols = time_cols + predictors + lake_params

    # 🔹 Load and merge all FireCCI files
    fire_files = sorted(glob.glob(os.path.join(fire_folder, "*.csv")))
    fire_df_all = pd.concat([pd.read_csv(f) for f in fire_files], ignore_index=True)

    # 🔹 Filter and rename
    fire_df_all = fire_df_all[["Lake_ID", "year", "week"] + fire_cols_to_include]
    fire_df_all.rename(columns={"year": "Year", "week": "Week"}, inplace=True)
    fire_df_all = fire_df_all[fire_df_all["Year"] <= fire_end_year]

    # 🔁 Process each lake file
    for file in glob.glob(os.path.join(input_folder, "Lake_*.csv")):
        try:
            df = pd.read_csv(file)
            lake_id = int(df["Lake_ID"].iloc[0])

            df = df[[col for col in selected_cols if col in df.columns]]
            df = df[df["Year"] <= fire_end_year]

            fire_lake = fire_df_all[fire_df_all["Lake_ID"] == lake_id]
            df = pd.merge(df, fire_lake, on=["Lake_ID", "Year", "Week"], how="left")

            for col in fire_cols_to_include:
                if col in df.columns:
                    df[col] = df[col].fillna(0)

            out_path = os.path.join(output_folder, f"Lake_{lake_id}.csv")
            df.to_csv(out_path, index=False)

        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

    print(f"✅ Finished generating lake files with fire data to: {output_folder}")


In [10]:
filter_lake_features_with_fire(
    input_folder="Datasets/No_Fire_Lake_CSVs",
    fire_folder="Datasets/GEE/firecci_raw",
    output_folder="Datasets/No_Fire_With_FirePredictors"
)

filter_lake_features_with_fire(
    input_folder="Datasets/Fire_Lake_CSVs",
    fire_folder="Datasets/GEE/firecci_raw",
    output_folder="Datasets/Fire_With_FirePredictors"
)


✅ Finished generating lake files with fire data to: Datasets/No_Fire_With_FirePredictors
✅ Finished generating lake files with fire data to: Datasets/Fire_With_FirePredictors
