In [85]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def week_start_end(year, week):
    start = datetime(int(year), 1, 1) + timedelta(days=int((week - 1) * 7))
    end = min(start + timedelta(days=6), datetime(int(year), 12, 31))
    return start, end

def mask_outliers(df, variable):
    """Mask only outlier rows (not ± buffer) for CHLA, LSWT, TURB."""
    df = df.copy()
    threshold = {
        "chla": 50,
        "LSWT": 305,
        "turb": 20
    }.get(variable.lower(), np.inf)

    # Mask only values that are true outliers
    df.loc[df["mean"] >= threshold, ["mean", "median", "stddev"]] = np.nan
    return df




def process_file_with_variable(filepath, variable):
    df = pd.read_csv(filepath, parse_dates=["date"])
    df = df.rename(columns={"date": "Date"})

    filename = os.path.basename(filepath)
    lake_id_str = filename.split("_")[0].replace("ID", "")
    lake_id = int(lake_id_str)

    df = df[df["Date"] >= pd.Timestamp("2002-01-01")].copy()
    if df.empty:
        return None

    df["Year"] = df["Date"].dt.year
    df["Week"] = ((df["Date"] - pd.to_datetime(df["Year"].astype(str) + "-01-01")).dt.days // 7 + 1).astype(int)

    # Remove outliers based on variable type
    df = mask_outliers(df, variable)


    # Weekly aggregation
    grouped = df.groupby(["Year", "Week"])
    weekly = grouped.agg({
        "n": "sum",
        "ntot": "sum",
        "nNA": "sum",
        "mean": "mean",
        "median": "median",
        "stddev": "mean"
    }).reset_index()

    weekly[["week_start", "week_end"]] = weekly.apply(
        lambda row: pd.Series(week_start_end(row["Year"], row["Week"])),
        axis=1
    )

    weekly["Lake_ID"] = lake_id

    weekly = weekly[[
        "Lake_ID", "Year", "Week", "week_start", "week_end",
        "n", "ntot", "nNA", "mean", "median", "stddev"
    ]]

    weekly = weekly.replace({None: np.nan})
    return weekly

def batch_process_by_variable(input_folder, output_folder, variable):
    os.makedirs(output_folder, exist_ok=True)

    for fname in os.listdir(input_folder):
        if not fname.endswith(".csv") or variable.upper() not in fname:
            continue

        input_path = os.path.join(input_folder, fname)
        try:
            weekly_df = process_file_with_variable(input_path, variable)
            if weekly_df is None or weekly_df.empty:
                print(f"Skipped: {fname}")
                continue

            output_name = fname.replace(".csv", f"_{variable.upper()}_weekly.csv")
            output_path = os.path.join(output_folder, output_name)
            weekly_df.to_csv(output_path, index=False, na_rep="NaN")
            print(f"Saved: {output_name}")
        except Exception as e:
            print(f"Failed to process {fname}: {e}")


In [86]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

batch_process_by_variable(r"Datasets/CNR/CHLA", r"Datasets/CNR\weekly/CHLA", "chla")



Saved: ID100000001_curonian_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000002_katithandalakeeyre_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000003_theewaterskloofreservoir_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000004_loughmourne_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000011_rukwa_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000012_ijsselmeer_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000013_kyoga_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000014_playgreenlake_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000015_littleplaygreenlake_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000016_markermeer_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID100000033_thompson_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID1003_talbot_CHLA_19920926_20221231_v2.1.0_f60_CHLA_weekly.csv
Saved: ID1007_guarico_CHLA_1

In [87]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

batch_process_by_variable(r"Datasets/CNR/turbidity", r"Datasets/CNR/weekly/turbidity", "turb")



Saved: ID100000001_curonian_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000002_katithandalakeeyre_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000003_theewaterskloofreservoir_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000004_loughmourne_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000011_rukwa_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000012_ijsselmeer_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000013_kyoga_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000014_playgreenlake_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000015_littleplaygreenlake_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000016_markermeer_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID100000033_thompson_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID1003_talbot_TURB_19920926_20221231_v2.1.0_f80_TURB_weekly.csv
Saved: ID1007_guarico_TURB_1

In [88]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

batch_process_by_variable(r"Datasets/CNR/LSWT", r"Datasets/CNR/weekly/LSWT", "LSWT")



Saved: ID100000001_curonian_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000002_katithandalakeeyre_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000003_theewaterskloofreservoir_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000004_loughmourne_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000011_rukwa_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000012_ijsselmeer_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000013_kyoga_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000014_playgreenlake_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000015_littleplaygreenlake_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000016_markermeer_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID100000033_thompson_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID1003_talbot_LSWT_19920926_20221231_v2.1.0_f4_LSWT_weekly.csv
Saved: ID1007_guarico_LSWT_19920926_2022