In [2]:
# PV Optimization Notebook
# ========================
# Imports simulate_pv() from scripts/PV_Calculation.py

import os, sys
import pandas as pd
import numpy as np

# 1) Point to project root & add scripts to PYTHONPATH
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
scripts_dir  = os.path.join(project_root, 'scripts')
sys.path.insert(0, scripts_dir)

from PV_Calculation import simulate_pv

# 2) Data paths
data_dir     = os.path.join(project_root, 'data')
weather_file = os.path.join(data_dir, 'Bonfire_2025.epw')
measured_csv = os.path.join(data_dir, 'PV_Generation_excel.csv')
output_pref  = os.path.join(project_root, 'pv_optimized_losses')

# 3) Load half-hourly measured data (drop missing block)
def load_measured(path: str) -> pd.Series:
    df = pd.read_csv(
        path,
        usecols=[0,1],
        names=['datetime','measured_kwh'],
        header=0,
        parse_dates=['datetime']
    )
    df.set_index('datetime', inplace=True)
    # exclude 2024-03-03 18:00 → 2024-08-27 17:00
    start_miss = pd.Timestamp('2024-03-03 18:00')
    end_miss   = pd.Timestamp('2024-08-27 17:00')
    return df[(df.index < start_miss) | (df.index > end_miss)]['measured_kwh']

measured = load_measured(measured_csv)

# 4) Helpers: simulate & upsample
def simulate_hourly_pv(loss_pct: float) -> pd.Series:
    df_h = simulate_pv(
        weather_file=weather_file,
        system_capacity_kw=10.0,
        losses_pct=loss_pct,
        start_year=2024
    )
    return df_h['simulated_kwh']

def to_halfhourly(hourly: pd.Series) -> pd.Series:
    return hourly.resample('30min').interpolate('linear')

# 5) Daily % difference
def daily_pct_diff(sim: pd.Series, meas: pd.Series) -> pd.DataFrame:
    sim_d  = sim.resample('D').sum()
    meas_d = meas.resample('D').sum()
    df     = pd.concat([sim_d, meas_d], axis=1, join='inner')
    df.columns = ['sim','meas']
    df = df[df['meas'] > 0]
    df['pct_diff'] = ((df['sim'] - df['meas']).abs() / df['meas']) * 100
    return df[['pct_diff']]

# 6) Search 10% → 50% in 0.5% increments
loss_range = np.arange(10.0, 50.1, 0.5)
threshold  = 10.0
best = {'loss': None, 'avg_diff': np.inf}

for loss in loss_range:
    sim_hh   = to_halfhourly(simulate_hourly_pv(loss))
    daily_df = daily_pct_diff(sim_hh, measured)
    avg_pct  = daily_df['pct_diff'].mean()
    if avg_pct < best['avg_diff']:
        best.update({
            'loss': loss,
            'avg_diff': avg_pct,
            'daily_df': daily_df,
            'sim_hh': sim_hh
        })

# 7) Report & save
print(f"Optimal losses: {best['loss']}% → Avg daily diff = {best['avg_diff']:.2f}%")
print("✅ Within threshold" if best['avg_diff'] <= threshold else "⚠️ Above threshold")

# Write out CSVs
daily_csv = f"{output_pref}_{best['loss']}pct_daily_diff.csv"
best['daily_df'].to_csv(daily_csv, index_label='date')

half_csv = f"{output_pref}_{best['loss']}pct_halfhourly.csv"
combined = pd.concat([measured, best['sim_hh']], axis=1)
combined.columns = ['measured_kwh','simulated_kwh']
combined.to_csv(half_csv, index_label='datetime')

print("Saved:", daily_csv, half_csv)


Optimal losses: 50.0% → Avg daily diff = 178.92%
⚠️ Above threshold
Saved: /Users/petertunali/Documents/GitHub/Battery_Optimisation/pv_optimized_losses_50.0pct_daily_diff.csv /Users/petertunali/Documents/GitHub/Battery_Optimisation/pv_optimized_losses_50.0pct_halfhourly.csv
