In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# ==== CONFIGURATION ====
BASE_DIR = "/scratch/hjh7hp/Watershed_22_2025_fall/Watershed22_with_new_summer/Sharadha_khola_watershed/1976_SA_1/salyan/model"
SIM_DIR = os.path.join(BASE_DIR, "SA_output_basin_csv")  # Processed CSV outputs location
OBS_CSV = os.path.join(BASE_DIR, "W22_runoff.csv")   # Observed runoff location
RESULT_CSV = os.path.join(BASE_DIR, "objective_function_result.csv")  # Output results CSV

# NUM_DEFS_FOLDERS must be defined before running this script!

# === ANALYSIS DATE RANGE ===
START_DATE = "1976-01-01"
END_DATE   = "1977-12-30"

# ---- Objective Functions ----
def calc_nse(sim, obs):
    obs_mean = np.mean(obs)
    return 1 - np.sum((sim - obs)**2) / np.sum((obs - obs_mean)**2)

def calc_rmse(sim, obs):
    return np.sqrt(mean_squared_error(obs, sim))

def calc_r2(sim, obs):
    return r2_score(obs, sim)

def calc_lognse(sim, obs):
    mask = (sim > 0) & (obs > 0)
    if not np.any(mask):
        return np.nan
    logsim = np.log(sim[mask])
    logobs = np.log(obs[mask])
    return 1 - np.sum((logsim - logobs) ** 2) / np.sum((logobs - logobs.mean()) ** 2)

# ---- LOAD OBSERVED DATA ----
if not os.path.exists(OBS_CSV):
    raise FileNotFoundError(f"Observed data not found: {OBS_CSV}")
obs_df = pd.read_csv(OBS_CSV, parse_dates=["Date"])
obs_df.rename(columns={"runoff(mm/day)_815km2": "runoff"}, inplace=True)
obs_df = obs_df[["Date", "runoff"]]
obs_df = obs_df[(obs_df["Date"] >= START_DATE) & (obs_df["Date"] <= END_DATE)]
obs_df.set_index("Date", inplace=True)
years_with_obs = set(obs_df.dropna(subset=["runoff"]).index.year)
print(f"Years with observed data: {sorted(years_with_obs)}")

results = []

# ---- LOOP OVER SIMULATIONS ----
for i in range(1, NUM_DEFS_FOLDERS + 1):
    job_name = f"basin{i}"
    defs_name = f"defs{i}"
    sim_csv = os.path.join(SIM_DIR, f"basin{i}.csv")  # Correct file name!
    if not os.path.exists(sim_csv):
        print(f"WARNING: {sim_csv} not found; skipping.")
        continue
    try:
        sim_df = pd.read_csv(sim_csv, parse_dates=["date"])
    except Exception as e:
        print(f"ERROR reading {sim_csv}: {e}")
        continue
    sim_df = sim_df[(sim_df["date"] >= START_DATE) & (sim_df["date"] <= END_DATE)]
    sim_df.set_index("date", inplace=True)
    merged = pd.merge(sim_df, obs_df, left_index=True, right_index=True, how='inner')
    if merged.empty:
        print(f"No overlap for {job_name}; skipping.")
        continue
    merged = merged[merged.index.year.isin(years_with_obs)]
    n_before = len(merged)
    merged = merged.dropna(subset=["streamflow", "runoff"])
    dropped_n = n_before - len(merged)
    if dropped_n > 0:
        print(f"{job_name}: Dropped {dropped_n} rows with NaN or no obs year.")
    if merged.empty:
        print(f"{job_name}: No valid rows remaining; skipping.")
        continue
    sim = merged["streamflow"].to_numpy()
    obs = merged["runoff"].to_numpy()
    nse = calc_nse(sim, obs)
    rmse = calc_rmse(sim, obs)
    r2 = calc_r2(sim, obs)
    lognse = calc_lognse(sim, obs)
    results.append({
        "job": job_name,
        "defs": defs_name,
        "NSE": nse,
        "RMSE": rmse,
        "R2": r2,
        "logNSE": lognse
    })
    print(f"{job_name}: NSE={nse:.3f}, RMSE={rmse:.3f}, R2={r2:.3f}, logNSE={lognse:.3f}")

# ---- SAVE RESULTS ----
df = pd.DataFrame(results)
df.to_csv(RESULT_CSV, index=False)
print(f"\nSaved results to {RESULT_CSV}")
