In [15]:
# %% [markdown]
# # PV Match & Full Half-Hourly Comparison With/Without September
# - Linear interpolation to half-hourly  
# - Select shading by avg monthly % error ≤ 5% (fallback to min avg error)  
# - Export **all** half-hour rows to CSV & print summary metrics for each scenario

# %% [code]
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
import PySAM.Pvwattsv8 as pv

# ─── Paths & Settings ─────────────────────────────────────────────────────────────
project_root = Path().cwd().parent
data_dir     = project_root / 'data'
weather_file = data_dir / 'Bonfire_2025.epw'
meas_csv     = data_dir / 'PV_Generation_excel.csv'
out_dir      = project_root / 'outputs_PV_match'
out_dir.mkdir(parents=True, exist_ok=True)

# ─── PV simulation + half-hour interpolation ──────────────────────────────────────
def simulate_pv(weather_file, shading, start_year=2024):
    # Fixed loss categories except shading
    soiling, snow, mismatch, wiring = 2.0, 0.0, 2.0, 2.0
    connections, lid, nameplate = 0.5, 1.5, 1.0
    age, availability = 0.0, 3.0
    tot_loss = soiling + shading + snow + mismatch + wiring + connections + lid + nameplate + age + availability

    m = pv.default('PVWattsNone')
    m.SolarResource.solar_resource_file = str(weather_file)
    sd = m.SystemDesign
    sd.system_capacity = 10.0
    sd.dc_ac_ratio     = 1.15
    sd.inv_eff         = 96.0
    sd.tilt            = 10.0
    sd.azimuth         = 18.0
    sd.array_type      = 1
    sd.module_type     = 0
    sd.gcr             = 0.3
    sd.losses          = tot_loss
    m.execute()

    ac_kwh = np.array(m.Outputs.ac) / 1000.0
    idx_h = pd.date_range(datetime(start_year,1,1), periods=len(ac_kwh), freq='h') \
            + pd.Timedelta(hours=1)
    df_h = pd.DataFrame({'kWh': ac_kwh}, index=idx_h)
    df_h = df_h[df_h.index.year == start_year]

    half_start = df_h.index[0] - pd.Timedelta(minutes=30)
    half_idx   = pd.date_range(half_start, periods=len(df_h)*2, freq='30min')
    P = df_h['kWh'].rename('P').reindex(half_idx).interpolate(method='linear')
    df_half = pd.DataFrame(index=half_idx)
    df_half['simulated_kwh'] = P * 0.5
    return df_half[df_half.index.year == start_year]

# ─── Load measured data ────────────────────────────────────────────────────────────
raw = pd.read_csv(meas_csv, usecols=[0,1], header=0)
raw.columns = ['datetime','measured_kwh']
raw['datetime'] = pd.to_datetime(raw['datetime'])
meas_full = raw.set_index('datetime')

# ─── Scenarios ───────────────────────────────────────────────────────────────────
month_scenarios = {
    'Including Sep': [1,2,9,10,11,12],
    'Excluding Sep': [1,2,10,11,12],
}

for label, months in month_scenarios.items():
    # Filter measured & print coverage
    meas = meas_full[meas_full.index.month.isin(months)]
    print(f"\nMeasured covers: {meas.index.min()} → {meas.index.max()}")
    print(f"--- Scenario: {label} (months {months}) ---")

    # Aggregate measured to hourly for matching
    meas_hr = meas.resample('h').sum()

    # Sweep shading
    shades, records = np.arange(10, 60.5, 0.5), []
    for s in shades:
        sim_half = simulate_pv(weather_file, shading=s)
        sim_hr   = sim_half.resample('h').sum()
        df = meas_hr.join(sim_hr.rename(columns={'simulated_kwh':'sim_kwh'}), how='inner').dropna()

        mm_meas = df['measured_kwh'].groupby(df.index.month).sum()
        mm_sim  = df['sim_kwh'].groupby(df.index.month).sum()
        pct_err = ((mm_sim - mm_meas) / mm_meas) * 100
        abs_errs = [abs(pct_err[m]) for m in months]
        records.append({'shading': s, 'monthly_avg_pcterr': np.mean(abs_errs)})

    sdf = pd.DataFrame(records).set_index('shading')
    cands = sdf[sdf['monthly_avg_pcterr'] <= 5]

    if not cands.empty:
        best = cands['monthly_avg_pcterr'].idxmin()
        print("✔ Found shading avg-pcterr ≤5%.")
    else:
        best = sdf['monthly_avg_pcterr'].idxmin()
        print("⚠ No shading avg-pcterr ≤5%. Using fallback.")
    print(f"✔ Selected shading = {best}% (avg error = {sdf.loc[best,'monthly_avg_pcterr']:.2f}%)")

    # Final comparison & full export
    final_half = simulate_pv(weather_file, shading=best)
    full = final_half.copy()
    # join measured_kwh for all half-hours
    full = full.join(meas_full['measured_kwh'], how='left')
    full['difference'] = full['simulated_kwh'] - full['measured_kwh']

    suffix = 'incl' if 'Including' in label else 'excl'
    out_csv = out_dir / f'full_comparison_{suffix}_sep.csv'
    full.to_csv(out_csv, index_label='time',
                columns=['measured_kwh','simulated_kwh','difference'])
    print(f"✔ Exported full half-hourly comparison → {out_csv}")

    # Summary metrics (on times with measurements)
    comp = full.dropna(subset=['measured_kwh'])
    avg_diff = comp['difference'].abs().mean()
    pct_series = (comp['difference'] / comp['measured_kwh']).replace([np.inf,-np.inf], np.nan).abs()
    avg_pct_half = pct_series.mean() * 100
    print(f"Average absolute difference per half-hour: {avg_diff:.3f} kWh ({avg_pct_half:.2f}%)")

    # Monthly totals & % diffs
    print("\nMonth | Measured (kWh) | Simulated (kWh) | % diff")
    m_meas = comp['measured_kwh'].groupby(comp.index.month).sum()
    m_sim  = comp['simulated_kwh'].groupby(comp.index.month).sum()
    m_pct  = ((m_sim - m_meas) / m_meas) * 100
    for m in months:
        print(f"{m:02d}    {m_meas[m]:10.1f}       {m_sim[m]:10.1f}    {m_pct[m]:6.2f}%")

    # Average monthly percent diff
    avg_month_pct = m_pct.abs().mean()
    print(f"\nAverage monthly percent difference: {avg_month_pct:.2f}%")
    print(f"\nFinal shading used: {best}%")



Measured covers: 2024-01-01 00:30:00 → 2025-01-01 00:00:00
--- Scenario: Including Sep (months [1, 2, 9, 10, 11, 12]) ---
⚠ No shading avg-pcterr ≤5%. Using fallback.
✔ Selected shading = 44.5% (avg error = 7.35%)
✔ Exported full half-hourly comparison → /Users/petertunali/Documents/GitHub/Battery_Optimisation/outputs_PV_match/full_comparison_incl_sep.csv
Average absolute difference per half-hour: 0.274 kWh (16202.20%)

Month | Measured (kWh) | Simulated (kWh) | % diff
01         806.4            812.2      0.72%
02         712.8            680.3     -4.56%
09         413.3            512.7     24.06%
10         672.1            653.5     -2.76%
11         670.0            723.1      7.92%
12         794.8            762.5     -4.07%

Average monthly percent difference: 8.72%

Final shading used: 44.5%

Measured covers: 2024-01-01 00:30:00 → 2025-01-01 00:00:00
--- Scenario: Excluding Sep (months [1, 2, 10, 11, 12]) ---
✔ Found shading avg-pcterr ≤5%.
✔ Selected shading = 43.5% (avg e