In [6]:
import os
import glob
import pandas as pd

def interpolate_lake_csvs_with_control(input_folder, output_folder, method="linear", limit=4):
    os.makedirs(output_folder, exist_ok=True)

    for file in glob.glob(os.path.join(input_folder, "Lake_*.csv")):
        try:
            df = pd.read_csv(file)

            # Step 1: Limited linear interpolation
            numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns
            df[numeric_cols] = df[numeric_cols].interpolate(method=method, limit=limit, limit_direction='both')

            # Step 2: Fallback fill only for predictors
            fallback_fill = [
                "lswt_mean", "lake_mix_layer_temperature", "runoff_sum",
                "surface_runoff_sum", "temperature_2m", "total_precipitation_sum"
            ]
            for col in fallback_fill:
                if col in df.columns:
                    df[col] = df[col].ffill().bfill()

                    # Optional: fill remaining NaN with mean if nothing at all exists
                    if df[col].isna().all():
                        df[col] = df[col].fillna(0)  # or df[col].mean(), or other neutral default

            # Step 3: Save updated file
            lake_id = os.path.basename(file).split("_")[1].split(".")[0]
            out_path = os.path.join(output_folder, f"Lake_{lake_id}.csv")
            df.to_csv(out_path, index=False)
            print(f"Interpolated and saved: {out_path}")

        except Exception as e:
            print(f"Error processing {file}: {e}")


In [7]:
interpolate_lake_csvs_with_control("Datasets/Merged_With_Metadata", "Datasets/Interpolated_Lake_CSVs")

Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_100000014.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_100000015.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_100000033.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_1003.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_1009.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_101.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_10161.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_1026.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_10421.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_1053.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_1062.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_107.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_1071.csv
Interpolated and saved: Datasets/Interpolated_Lake_CSVs\Lake_1075.csv
Inter

In [3]:
import os
import glob
import pandas as pd

def check_no_missing_values(folder):
    missing_report = []

    for file in glob.glob(os.path.join(folder, "Lake_*.csv")):
        df = pd.read_csv(file)
        if df.isna().any().any():  # if any NaN in entire DataFrame
            lake_id = os.path.basename(file).split("_")[1].split(".")[0]
            missing_report.append(lake_id)

    if missing_report:
        print(f"⚠️ Files with missing values after interpolation: {missing_report}")
        print(f"Total with missing values: {len(missing_report)}")
    else:
        print("✅ All files are complete. No missing values found.")

# ▶️ To run:
check_no_missing_values("Datasets/Interpolated_Lake_CSVs")


⚠️ Files with missing values after interpolation: ['100000014', '100000015', '100000033', '1003', '1009', '101', '10161', '1026', '10421', '1053', '1062', '107', '1071', '1075', '110', '1116', '1128', '1129', '114', '1148', '1154', '1169', '118', '1188', '1193', '1199', '12', '122', '12200', '1221', '1222', '1237', '12471', '1249', '127', '128', '13', '130', '13377', '1344', '13484', '1359', '1361', '1368', '1375', '1424', '1425', '1434', '1437', '146', '1467', '1479', '1493', '1498', '15', '1509', '151', '1510', '153', '1547', '155', '1567', '158', '1582', '159', '1594', '1623', '1627', '165', '1676', '1703', '1706', '1714', '172', '1735', '174', '1748', '1756', '176', '1761', '1815', '1818', '1908', '191', '1910', '194', '1945', '198', '1989', '200', '201', '2012', '2021', '2032', '2064', '2097', '21', '2102', '211002', '2132', '214687', '215215', '215311', '215339', '2157', '2162', '2168', '2179', '2190', '2192', '222', '223', '224', '2245', '225', '2253', '2263', '2330', '234', '23