In [1]:
import os
import glob
import pandas as pd

def filter_lake_features(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    # Columns to keep
    time_cols = ["Lake_ID", "Year", "Week", "chla_mean"]
    predictors = [
        "lswt_mean", "lake_mix_layer_temperature", "temperature_2m",
        "runoff_sum", "surface_runoff_sum", "total_precipitation_sum"
    ]
    lake_params = [
        "Lake_area", "Shore_len", "Shore_dev", "Vol_total", "Depth_avg",
        "Dis_avg", "Res_time", "Elevation", "Slope_100", "Wshd_area"
    ]
    selected_cols = time_cols + predictors + lake_params

    for file in glob.glob(os.path.join(input_folder, "Lake_*.csv")):
        try:
            df = pd.read_csv(file)
            # Keep only selected columns
            df = df[[col for col in selected_cols if col in df.columns]]
            # Save to new folder
            lake_id = os.path.basename(file).split("_")[1].split(".")[0]
            out_path = os.path.join(output_folder, f"Lake_{lake_id}.csv")
            df.to_csv(out_path, index=False)
        except Exception as e:
            print(f"Error processing {file}: {e}")




In [2]:

filter_lake_features("Datasets/No_Fire_Lake_CSVs", "Datasets/Filtered_LSTM_Input")