In [None]:
# Cell 1: Import all required libraries
import sys
import time
import os
from pathlib import Path
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from scipy import optimize
from scipy.interpolate import griddata

# Pymoo imports for NSGA-II
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.termination import get_termination
from pymoo.optimize import minimize
from pymoo.core.callback import Callback
from pymoo.util.display.column import Column
from pymoo.util.misc import termination_from_tuple

# Standard libraries for file handling
import json
import csv
import warnings
warnings.filterwarnings('ignore')  # Suppress warnings

# Additional math and stats
import math
from statistics import mean, median

# 1) Locate project root (one level up from this notebooks directory)
try:
    project_root = Path(__file__).resolve().parent.parent
except NameError:
    project_root = Path.cwd().parent
    print(f"Project root identified as: {project_root}")

# 2) Point at your scripts folder - UPDATED PATH
scripts_dir = project_root / "5_nsga" / "5_nsga_scripts_master"
if not scripts_dir.exists():
    # Try alternatives if the first path doesn't exist
    alternatives = [
        project_root / "5_nsga_scripts_master",
        Path.cwd() / "5_nsga_scripts_master",
        Path.cwd().parent / "5_nsga" / "5_nsga_scripts_master"
    ]
    
    for alt in alternatives:
        if alt.exists():
            scripts_dir = alt
            break
    else:
        raise FileNotFoundError(f"Cannot find scripts directory. Tried: {scripts_dir} and alternatives: {alternatives}")

print(f"Using scripts directory: {scripts_dir}")
sys.path.insert(0, str(scripts_dir))

# Import custom modules
try:
    from battery import simulate_battery_dispatch
    from pv import simulate_multi_year_pv
    from fin import compute_financials
    from classic_obj import evaluate_solution
    print("All modules imported successfully")
except ImportError as e:
    print(f"Error importing modules: {e}")
    print(f"Files in scripts directory: {[f.name for f in scripts_dir.iterdir() if f.is_file()]}")
    raise

print("All imports completed successfully")

In [None]:
# Cell 2: Set up output directories
# Set up results folder in the same directory as the master notebook
results_dir = scripts_dir / "5_nsga_results"
results_dir.mkdir(exist_ok=True)

# Create a new numbered subfolder for this run
existing_runs = [d for d in results_dir.iterdir() if d.is_dir() and d.name.isdigit()]
next_run_num = 1 if not existing_runs else max([int(d.name) for d in existing_runs]) + 1
run_dir = results_dir / f"{next_run_num:03d}"
run_dir.mkdir(exist_ok=True)

print(f"Results will be saved to: {run_dir}")

In [None]:
# Cell 3: Define data directory paths and PV system configurations
# Data directory path - with fallback options
data_dir = Path("/Users/petertunali/Documents/GitHub/Battery_Optimisation/data")
if not data_dir.exists():
    alternatives = [
        project_root / "data",
        project_root.parent / "Battery_Optimisation" / "data",
        Path.cwd() / "data"
    ]
    
    for alt in alternatives:
        if alt.exists():
            data_dir = alt
            break
    else:
        print("\nWARNING: Data directory not found at expected locations.")
        print("Please enter the absolute path to your data directory:")
        user_path = input().strip()
        data_dir = Path(user_path)
        if not data_dir.exists():
            raise FileNotFoundError(f"Data directory not found: {data_dir}\nPlease check your path and try again.")

print(f"Using data directory: {data_dir}")

# Check for demand files
demand_paths = [
    data_dir / "PV_Generation_excel.csv",
    data_dir / "Energy_Demand_and_Supply_2024.csv"
]

for p in demand_paths:
    if p.exists():
        demand_file = p
        print(f"\n✔ Found demand file: {p}")
        break
else:
    # List available CSV files so user can see what's available
    csv_files = sorted(list(data_dir.glob("*.csv")))
    if csv_files:
        print("\nNo demand file found with expected name. Available CSV files:")
        for csv in csv_files:
            print(f"  - {csv.name}")
        
        # Ask user to select a file
        print("\nPlease enter the name of the demand file from the list above:")
        selected_file = input().strip()
        demand_file = data_dir / selected_file
        
        if not demand_file.exists():
            raise FileNotFoundError(f"Selected file not found: {demand_file}")
    else:
        raise FileNotFoundError(f"No CSV files found in {data_dir}. Please check your data directory.")

# Use specific weather files in the correct order
print("\nLooking for weather files...")
desired_weather_files = [
    "Bonfire_2025.epw",
    "Bonfire_2040_4_5.epw",
    "Bonfire_2050_4_5.epw"
]

weather_files = []
for filename in desired_weather_files:
    file_path = data_dir / filename
    if file_path.exists():
        weather_files.append(str(file_path))
        print(f"  ✔ Found: {filename}")
    else:
        print(f"  ❌ Missing: {filename}")

if len(weather_files) < 3:
    # If we didn't find all the specific files, look for any .epw files
    print("\nSearching for alternative .epw files...")
    available_epw = sorted(list(data_dir.glob("*.epw")))
    
    if available_epw:
        for epw in available_epw:
            if str(epw) not in weather_files:
                print(f"  Found alternative: {epw.name}")
        
        # If we need more files to reach 3, use alternatives
        while len(weather_files) < 3 and available_epw:
            for epw in available_epw:
                if str(epw) not in weather_files:
                    weather_files.append(str(epw))
                    print(f"  Using: {epw.name}")
                    break
    
    # If we still don't have 3 weather files, we need to check the path
    if len(weather_files) < 3:
        raise FileNotFoundError(
            f"Could not find the required weather files: {', '.join(desired_weather_files)}\n"
            f"Please ensure these files exist in: {data_dir}"
        )

print("\nUsing these weather files:")
for i, wf in enumerate(weather_files):
    print(f"  {i+1}. {Path(wf).name}")

# Define existing PV system
existing_pv = {
    'name': 'existing_system',
    'system_capacity_kw': 10.0,
    'tilt': 10.0,
    'azimuth': 18.0,
    'shading': 43.0,
    'array_type': 1  # Roof-mounted
}

# Define new PV system options based on priority
pv_options = [
    {
        'name': 'accommodation_block',
        'max_capacity_kw': 33.0,
        'tilt': 20.0,
        'azimuth': 40.0,
        'shading': 0.0,
        'array_type': 1,  # Roof-mounted
        'cost_multiplier': 1.0
    },
    {
        'name': 'small_shed',
        'max_capacity_kw': 10.0,
        'tilt': 20.0,
        'azimuth': 20.0,
        'shading': 20.0,
        'array_type': 1,  # Roof-mounted
        'cost_multiplier': 1.0
    },
    {
        'name': 'ground_mounted',
        'max_capacity_kw': float('inf'),  # Unlimited
        'tilt': 30.0,
        'azimuth': 5.0,
        'shading': 0.0,
        'array_type': 0,  # Ground-mounted
        'cost_multiplier': 1.2  # 20% cost increase
    }
]

print("\nExisting PV system:")
print(f"  - {existing_pv['name']}: {existing_pv['system_capacity_kw']} kW, Tilt: {existing_pv['tilt']}°, Azimuth: {existing_pv['azimuth']}°, Shading: {existing_pv['shading']}%")

print("\nNew PV options:")
for option in pv_options:
    print(f"  - {option['name']}: Max {option['max_capacity_kw']} kW, Tilt: {option['tilt']}°, Azimuth: {option['azimuth']}°, Shading: {option['shading']}%, Cost multiplier: {option['cost_multiplier']}")

In [None]:
# Cell 4: Define utility functions for the optimization
def load_demand_profile(csv_path):
    """Load demand profile from CSV file."""
    # Load the CSV and skip NaN timestamp rows immediately
    raw = pd.read_csv(csv_path, parse_dates=['Date and Time'], dayfirst=True)
    
    # Drop rows with NaN timestamps right away
    raw = raw.dropna(subset=['Date and Time'])
    print(f"CSV loaded with {len(raw)} valid rows")
    
    # Get the consumption column
    consumption_col = 'Consumtpion (kWh)'  # Based on your data sample
    if consumption_col not in raw.columns:
        # Try to find the consumption column
        for col in raw.columns:
            if 'consum' in col.lower() or 'demand' in col.lower():
                consumption_col = col
                break
        else:
            # If no consumption column found, use the second column
            consumption_col = raw.columns[1]
    
    print(f"Using consumption column: '{consumption_col}'")
    
    # Create a Series with timestamp index and consumption values
    s = pd.Series(raw[consumption_col].values, index=raw['Date and Time'])
    
    # Check for duplicate timestamps
    dup_count = s.index.duplicated().sum()
    if dup_count:
        print(f"⚠️ Dropping {dup_count} duplicate timestamps")
        s = s[~s.index.duplicated(keep='first')]
    
    # Build the expected half-hour index for entire year (no Feb 29)
    year = s.index.min().year
    start = pd.Timestamp(year, 1, 1, 0, 0)
    end = pd.Timestamp(year, 12, 31, 23, 30)
    expected = pd.date_range(start, end, freq="30min")
    expected = expected[~((expected.month==2) & (expected.day==29))]
    
    # Reindex to ensure complete coverage
    s = s.reindex(expected)
    missing = s.isna().sum()
    if missing:
        print(f"⚠️ Filling {missing} missing points with 0")
        s = s.fillna(0.0)
    
    # Final sanity check
    assert len(s) == 17520, f"Got {len(s)} points, expected 17520"
    return s

def create_30_year_profile(one_year_series):
    """Create a 30-year profile from a 1-year series."""
    start_year = one_year_series.index[0].year
    years = 30
    all_data = []
    
    for year_offset in range(years):
        # Copy the data for this year
        year_data = one_year_series.copy()
        
        # Create index for this specific year
        year_start = pd.Timestamp(start_year + year_offset, 1, 1, 0, 0)
        year_end = pd.Timestamp(start_year + year_offset, 12, 31, 23, 30)
        year_range = pd.date_range(start=year_start, end=year_end, freq="30min")
        
        # Remove Feb 29 if it's a leap year
        year_range = year_range[~((year_range.month == 2) & (year_range.day == 29))]
        
        # Make sure it has the right number of points
        assert len(year_range) == len(one_year_series), f"Year {start_year + year_offset} has {len(year_range)} points, expected {len(one_year_series)}"
        
        # Assign the new index and add to our list
        year_data.index = year_range
        all_data.append(year_data)
    
    # Concatenate all years
    return pd.concat(all_data)

def calculate_pv_cost(capacity_kw, cost_multiplier=1.0):
    """
    Calculate PV cost using the formula: y = 1047.3 * e^(-0.002*x) with minimum $750
    
    Args:
        capacity_kw: PV capacity in kW
        cost_multiplier: Multiplier for special installations (e.g., 1.2 for ground-mounted)
        
    Returns:
        cost_per_kw: Cost per kW in dollars
    """
    if capacity_kw <= 0:
        return 0.0
    
    # Apply economies of scale formula
    cost_per_kw = 1047.3 * np.exp(-0.002 * capacity_kw)
    
    # Apply minimum cost of $750/kW
    cost_per_kw = max(750.0, cost_per_kw)
    
    # Apply cost multiplier
    return cost_per_kw * cost_multiplier

def allocate_pv_capacity(total_capacity_kw, options):
    """
    Allocate PV capacity across available options based on priority.
    
    Args:
        total_capacity_kw: Total additional PV capacity to allocate
        options: List of PV options with max_capacity_kw and other parameters
        
    Returns:
        allocated_pv: List of PV configurations with allocated capacity
    """
    remaining_capacity = total_capacity_kw
    allocated_pv = []
    
    for option in options:
        option_copy = option.copy()
        # Allocate capacity to this option (limited by max capacity)
        allocation = min(remaining_capacity, option['max_capacity_kw'])
        
        if allocation > 0:
            option_copy['system_capacity_kw'] = allocation
            allocated_pv.append(option_copy)
            remaining_capacity -= allocation
        
        if remaining_capacity <= 0:
            break
    
    return allocated_pv

# Test PV cost function
print("\nTesting PV cost function:")
test_capacities = [5, 10, 20, 30, 40, 50, 100]
print("Capacity (kW) | Cost per kW ($)")
print("--------------------------")
for cap in test_capacities:
    cost = calculate_pv_cost(cap)
    print(f"{cap:12.1f} | ${cost:10.2f}")

# Test ground-mounted with 20% premium
print("\nGround-mounted PV with 20% premium:")
print("Capacity (kW) | Cost per kW ($)")
print("--------------------------")
for cap in [10, 30, 50]:
    cost = calculate_pv_cost(cap, cost_multiplier=1.2)
    print(f"{cap:12.1f} | ${cost:10.2f}")

# Test PV allocation
print("\nTesting PV allocation:")
print("Testing with 50 kW total capacity...")
test_options = [option.copy() for option in pv_options]
allocated = allocate_pv_capacity(50, test_options)
print("Allocation results:")
for pv in allocated:
    print(f"  - {pv['name']}: {pv['system_capacity_kw']:.2f} kW")

print("\nUtility functions defined and tested successfully!")

In [None]:
# Cell 5: Load one-year demand and build 30-year profile
# This cell requires Cell 3 (data directory paths) and Cell 4 (utility functions)
try:
    print(f"Loading demand data from: {demand_file}")
    one_year_demand = load_demand_profile(demand_file)
    print(f"One year data points: {len(one_year_demand)}")
    
    # Create 30-year demand profile
    demand_profile = create_30_year_profile(one_year_demand)
    print("\n30-year demand profile built:")
    print(f"  • Time steps : {len(demand_profile)}")
    print(f"  • Date range : {demand_profile.index[0]} → {demand_profile.index[-1]}")
    print(f"  • Total demand: {demand_profile.sum():.2f} kWh")
except NameError:
    print("Error: demand_file not defined. Please run Cell 3 first.")
    raise

In [None]:
# Cell 6: Simulate existing PV system
import time
from datetime import timedelta

print("\nSimulating 30-year PV generation for existing system...")
start_time = time.time()

# Define start years (one for each weather file)
start_years = [2025, 2040, 2050]

try:
    # Simulate existing PV system
    pv_profile = simulate_multi_year_pv(
        weather_files=weather_files,
        roof_params=[existing_pv],
        repeats_per_file=10,
        start_years=start_years
    )
    
    elapsed = time.time() - start_time
    print(f"PV simulation completed in {elapsed:.1f} seconds ({timedelta(seconds=int(elapsed))})")
    print(f"   • PV steps: {len(pv_profile)}")
    print(f"   • Date range: {pv_profile.index[0]} → {pv_profile.index[-1]}")
    print(f"   • Total generation: {pv_profile['simulated_kwh'].sum():.2f} kWh")
    
    # Optimize memory usage for large datasets
    print("\nOptimizing memory usage for 30-year simulation...")
    # Convert float64 to float32 to save memory (reduces memory footprint by ~50%)
    pv_profile['simulated_kwh'] = pv_profile['simulated_kwh'].astype(np.float32)
    demand_profile = demand_profile.astype(np.float32)
    print("Memory optimization complete.")
    
except NameError as e:
    print(f"Error: {e}. Make sure you've run the previous cells to define weather_files and existing_pv.")
    raise
except Exception as e:
    print(f"Error simulating PV: {e}")
    
    # Create a mock PV profile for testing if simulation fails
    print("Creating mock PV profile for testing...")
    
    # Create dummy PV profile based on demand profile
    if 'demand_profile' in locals():
        # Scale demand to create a synthetic PV profile (using a simple day/night pattern)
        pv_values = []
        for ts in demand_profile.index:
            hour = ts.hour
            # Simple bell curve for daylight hours (no generation at night)
            if 6 <= hour < 18:
                # Scale factor peaks at noon
                scale = 1.0 - abs(hour - 12) / 6.0
                # Apply seasonal variation
                month = ts.month
                # Southern hemisphere: summer is December-February
                if month in [12, 1, 2]:
                    seasonal_factor = 1.0  # Summer
                elif month in [3, 4, 5, 9, 10, 11]:
                    seasonal_factor = 0.7  # Spring/Fall
                else:
                    seasonal_factor = 0.4  # Winter
                
                # Convert to kW with randomization
                pv_value = 10.0 * scale * seasonal_factor * (0.8 + 0.4 * np.random.random())
            else:
                pv_value = 0.0
            
            pv_values.append(pv_value)
        
        # Create the PV profile
        pv_profile = pd.DataFrame({
            'simulated_kwh': np.array(pv_values, dtype=np.float32)
        }, index=demand_profile.index)
        
        print(f"Mock PV profile created with {len(pv_profile)} time steps")
        print(f"Total generation: {pv_profile['simulated_kwh'].sum():.2f} kWh")