In [None]:
# Cell 1: Import all required libraries
import sys
import time
import os
from pathlib import Path
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from scipy import optimize
from scipy.interpolate import griddata
# Pymoo imports for NSGA-II
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.termination import get_termination
from pymoo.optimize import minimize
from pymoo.core.callback import Callback
# Standard libraries for file handling
import json
import csv
import warnings
warnings.filterwarnings('ignore')  # Suppress warnings
# Additional math and stats
import math
from statistics import mean, median

# Set up paths to your scripts and data directories
try:
    # Get the parent directory of the current notebook
    notebook_dir = Path.cwd()
    project_root = notebook_dir.parent.parent  # Two levels up from the notebook
    
    # Point to your scripts folder - new path structure
    scripts_dir = project_root / "5_nsga_scripts" / "NSGA_base"
    
    # Point to data and results directories
    data_dir = project_root.parent / "data"
    results_dir = project_root / "5_nsga_results"
    
    # Create results directory if it doesn't exist
    results_dir.mkdir(exist_ok=True)
    
    print(f"Project root: {project_root}")
    print(f"Scripts directory: {scripts_dir}")
    print(f"Data directory: {data_dir}")
    print(f"Results directory: {results_dir}")
    
except Exception as e:
    print(f"Error setting up directories: {e}")
    raise

# Add scripts directory to path
if not scripts_dir.exists():
    raise FileNotFoundError(f"Scripts directory not found at: {scripts_dir}")

sys.path.insert(0, str(scripts_dir))

# Import custom modules
try:
    # Import configuration
    from config import *
    
    # Import core modules
    from battery import simulate_battery_dispatch
    from pv import simulate_multi_year_pv, load_demand_profile, create_30_year_profile
    from fin import compute_financials
    from obj import evaluate_solution
    from results import setup_results_directory, create_pareto_plot
    
    print("All modules imported successfully")
except ImportError as e:
    print(f"Error importing modules: {e}")
    print(f"Files in scripts directory: {[f.name for f in scripts_dir.iterdir() if f.is_file()]}")
    raise

print("All imports completed successfully")

# Create a new numbered subfolder for this run's results
run_dir = setup_results_directory(results_dir)
print(f"Results will be saved to: {run_dir}")

In [None]:
# Cell 2: Load Data Files and Configuration
print("Loading data files and configuration...")

# Verify data directory
data_dir = Path(config.DATA_DIR)
if not data_dir.exists():
    alternatives = [
        project_root / "data",
        project_root.parent / "Battery_Optimisation" / "data",
        Path.cwd() / "data"
    ]
    
    for alt in alternatives:
        if alt.exists():
            data_dir = alt
            break
    else:
        print("\nWARNING: Data directory not found at expected locations.")
        print("Please enter the absolute path to your data directory:")
        user_path = input().strip()
        data_dir = Path(user_path)
        if not data_dir.exists():
            raise FileNotFoundError(f"Data directory not found: {data_dir}\nPlease check your path and try again.")

print(f"Using data directory: {data_dir}")

# Find weather files
print("\nLooking for weather files...")
weather_files = []

for filename in config.DESIRED_WEATHER_FILES:
    file_path = data_dir / filename
    if file_path.exists():
        weather_files.append(str(file_path))
        print(f"  ✔ Found: {filename}")
    else:
        print(f"  ❌ Missing: {filename}")

if len(weather_files) < 3:
    # If we didn't find all the specific files, look for any .epw files
    print("\nSearching for alternative .epw files...")
    available_epw = sorted(list(data_dir.glob("*.epw")))
    
    if available_epw:
        while len(weather_files) < 3 and available_epw:
            for epw in available_epw:
                if str(epw) not in weather_files:
                    weather_files.append(str(epw))
                    print(f"  Using: {epw.name}")
                    break
    
    # If we still don't have 3 weather files, we need to check the path
    if len(weather_files) < 3:
        raise FileNotFoundError(
            f"Could not find the required weather files: {', '.join(config.DESIRED_WEATHER_FILES)}\n"
            f"Please ensure these files exist in: {data_dir}"
        )

# Find demand file
print("\nLooking for demand file...")
demand_file = None

# First try the primary file specified in DEMAND_FILE_CONFIG
primary_file = data_dir / config.DEMAND_FILE_CONFIG["primary_file"]
if primary_file.exists():
    demand_file = primary_file
    print(f"  ✔ Found primary demand file: {primary_file.name}")
else:
    # Try alternative files
    for filename in config.ALTERNATIVE_DEMAND_FILES:
        file_path = data_dir / filename
        if file_path.exists():
            demand_file = file_path
            print(f"  ✔ Found alternative demand file: {filename}")
            break
            
if demand_file is None:
    # List available CSV files
    csv_files = sorted(list(data_dir.glob("*.csv")))
    if csv_files:
        print("\nNo demand file found with expected name. Available CSV files:")
        for csv in csv_files:
            print(f"  - {csv.name}")
        # Ask user to select a file
        print("\nPlease enter the number of the CSV file to use for demand data:")
        for i, csv in enumerate(csv_files):
            print(f"  {i+1}. {csv.name}")
        selection = int(input().strip()) - 1
        if 0 <= selection < len(csv_files):
            demand_file = csv_files[selection]
            print(f"Using {demand_file.name} as demand file")
        else:
            raise ValueError("Invalid selection")
    else:
        raise FileNotFoundError(f"No CSV files found in {data_dir}")

# Display configuration summary
print("\nConfiguration summary:")
print(f"  • Data directory: {data_dir}")
print(f"  • Demand file: {demand_file.name}")
print(f"  • Weather files: {', '.join([Path(wf).name for wf in weather_files])}")

# Display existing PV system details
print("\nExisting PV system:")
existing_pv = config.EXISTING_PV
print(f"  - {existing_pv['name']}: {existing_pv['system_capacity_kw']} kW, " 
      f"Tilt: {existing_pv['tilt']}°, Azimuth: {existing_pv['azimuth']}°, "
      f"Shading: {existing_pv['shading']}%")

# Display new PV options
print("\nNew PV options:")
for option in config.PV_OPTIONS:
    max_capacity = option['max_capacity_kw']
    # Format "inf" nicely
    max_capacity_str = f"{max_capacity:.1f}" if max_capacity != float('inf') else "unlimited"
    print(f"  - {option['name']}: Max {max_capacity_str} kW, "
          f"Tilt: {option['tilt']}°, Azimuth: {option['azimuth']}°, "
          f"Shading: {option['shading']}%, Cost multiplier: {option['cost_multiplier']}")

# Continue with other configuration details
print(f"\n  • Discount rate: {config.DISCOUNT_RATE * 100}%")
print(f"  • Electricity rates: Peak: ${config.ELECTRICITY_RATES['peak']}/kWh, Off-peak: ${config.ELECTRICITY_RATES['offpeak']}/kWh")
print(f"  • Export rate: ${config.ELECTRICITY_RATES['export']}/kWh")
print(f"  • Battery efficiency: {config.BATTERY_EFF * 100}%")

# Add battery control mode display
if hasattr(config, 'BATTERY_CONTROL'):
    print(f"  • Battery control mode: {'Advanced' if config.BATTERY_CONTROL['peak_reserve_soc'] > config.MIN_SOC else 'Basic'}")
    if config.BATTERY_CONTROL['peak_reserve_soc'] > config.MIN_SOC:
        print(f"    - Peak reserve: {config.BATTERY_CONTROL['peak_reserve_soc']*100:.0f}% SOC, {config.BATTERY_CONTROL['peak_reserve_hours']} hours before peak")
        print(f"    - Off-peak minimum SOC: {config.BATTERY_CONTROL['off_peak_min_soc']*100:.0f}%")

print(f"  • Project lifetime: {config.PROJECT_LIFETIME} years")
print(f"  • Peak periods: AEDT (Oct-Mar): {config.PEAK_PERIODS['AEDT_period']['start_hour']}:00-{config.PEAK_PERIODS['AEDT_period']['end_hour']}:00, AEST (Apr-Sep): {config.PEAK_PERIODS['AEST_period']['start_hour']}:00-{config.PEAK_PERIODS['AEST_period']['end_hour']}:00")
print(f"  • Optimization: {config.POPULATION_SIZE} population, {config.N_GENERATIONS} generations")

# Add sensitivity analysis parameters
print("\nSensitivity Analysis Parameters:")
print(f"  • Inflation: {config.MAINTENANCE_INFLATION*100}% (sensitivity range: 2-5%)")
print(f"  • Discount rate: {config.DISCOUNT_RATE*100}% (sensitivity range: 5-11%)")
print(f"  • Battery cost: ${config.BATTERY_COST_FORMULA['base_cost']}/kWh (sensitivity: ±20%)")
print(f"  • PV cost: ${config.PV_COST_FORMULA['base_cost']}/kW (sensitivity: ±15%)")
print(f"  • Export rate: ${config.ELECTRICITY_RATES['export']}/kWh (sensitivity: try 0.05-0.10)")
print(f"  • Project lifetime: {config.PROJECT_LIFETIME} years (sensitivity: try 20, 25 years)")

# Weather file scenarios
print("\nWeather scenarios available:")
for scenario, files in config.WEATHER_SCENARIOS.items():
    if scenario == 'baseline':
        print(f"  • {scenario.capitalize()}: {', '.join([Path(f).name for f in files])} (current)")
    else:
        print(f"  • {scenario.capitalize()}: {', '.join([Path(f).name for f in files])}")

if config.BATTERY_CHARGING_CONTROL['enabled']:
    print(f"\nBattery charging restricted to: {config.BATTERY_CHARGING_CONTROL['allowed_hours']['start_hour']}:00-{config.BATTERY_CHARGING_CONTROL['allowed_hours']['end_hour']}:00")
else:
    print(f"\nBattery charging: Unrestricted (sensitivity: try time-of-day restrictions)")

if config.BATTERY_REBATES['enabled']:
    if config.BATTERY_REBATES['fixed_amount'] > 0:
        print(f"Battery rebate: Fixed ${config.BATTERY_REBATES['fixed_amount']}")
    else:
        print(f"Battery rebate: {config.BATTERY_REBATES['percentage']*100}% up to ${config.BATTERY_REBATES['cap']}")
else:
    print(f"Battery rebates: None (sensitivity: try fixed and percentage rebates)")

In [None]:
# Cell 3: Load Demand Data and Create 30-Year Profile
print("\nLoading demand data and creating profiles...")

def load_demand_profile(csv_path: Path) -> dict:
    """Load demand profile from CSV file."""
    # Get column mappings from config
    timestamp_col = config.DEMAND_FILE_CONFIG["columns"]["timestamp"]
    pv_gen_col = config.DEMAND_FILE_CONFIG["columns"]["pv_generation"]
    consumption_col = config.DEMAND_FILE_CONFIG["columns"]["consumption"]
    date_format = config.DEMAND_FILE_CONFIG["date_format"]
    
    # Load the CSV file
    raw = pd.read_csv(csv_path, parse_dates=[timestamp_col], dayfirst=True)
    
    # Drop rows with NaN timestamps right away
    raw = raw.dropna(subset=[timestamp_col])
    print(f"CSV loaded with {len(raw)} valid rows")
    
    # Create Series with timestamp index for both demand and generation
    consumption = pd.Series(raw[consumption_col].values, index=raw[timestamp_col])
    generation = pd.Series(raw[pv_gen_col].values, index=raw[timestamp_col])
    
    # Check for duplicate timestamps
    dup_count = consumption.index.duplicated().sum()
    if dup_count:
        print(f"⚠️ Dropping {dup_count} duplicate timestamps")
        consumption = consumption[~consumption.index.duplicated(keep='first')]
        generation = generation[~generation.index.duplicated(keep='first')]
    
    # Build the expected half-hour index for entire year (no Feb 29)
    year = consumption.index.min().year
    start = pd.Timestamp(year, 1, 1, 0, 0)
    end = pd.Timestamp(year, 12, 31, 23, 30)
    expected = pd.date_range(start, end, freq="30min")
    expected = expected[~((expected.month==2) & (expected.day==29))]
    
    # Reindex to ensure complete coverage
    consumption = consumption.reindex(expected)
    generation = generation.reindex(expected)
    
    # Fill missing values
    missing_consumption = consumption.isna().sum()
    missing_generation = generation.isna().sum()
    
    if missing_consumption:
        print(f"⚠️ Filling {missing_consumption} missing consumption points with 0")
        consumption = consumption.fillna(0.0)
        
    if missing_generation:
        print(f"⚠️ Filling {missing_generation} missing generation points with 0")
        generation = generation.fillna(0.0)
    
    # Final sanity check
    assert len(consumption) == 17520, f"Got {len(consumption)} consumption points, expected 17520"
    assert len(generation) == 17520, f"Got {len(generation)} generation points, expected 17520"
    
    # Return both series
    return {
        'consumption': consumption,
        'generation': generation
    }

def create_30_year_profile(one_year_series: pd.Series, years=30, start_year=2025, apply_growth=False) -> pd.Series:
    """
    Create a 30-year profile from a 1-year series.
    
    Args:
        one_year_series: Base year demand series
        years: Number of years to create
        start_year: Starting year for the profile
        apply_growth: If True, apply demand growth according to config settings
        
    Returns:
        Pandas Series with the multi-year profile
    """
    original_year = one_year_series.index[0].year
    all_data = []
    
    for year_offset in range(years):
        # Copy the data for this year
        year_data = one_year_series.copy()
        
        # Apply demand growth if enabled
        if apply_growth and config.DEMAND_GROWTH['enabled']:
            decade = year_offset // 10  # Calculate which decade we're in
            
            if config.DEMAND_GROWTH['pattern'] == 'decade':
                # Apply growth by decade (e.g., years 0-9 = 1x, years 10-19 = 1.1x, etc.)
                growth_multiplier = 1 + (decade * config.DEMAND_GROWTH['percent_per_decade'] / 100)
                year_data = year_data * growth_multiplier
                
            elif config.DEMAND_GROWTH['pattern'] == 'annual':
                # Apply compounding annual growth (more complex)
                annual_rate = (1 + config.DEMAND_GROWTH['percent_per_decade'] / 100) ** (1/10) - 1
                growth_multiplier = (1 + annual_rate) ** year_offset
                year_data = year_data * growth_multiplier
        
        # Create index for this specific year
        target_year = start_year + year_offset
        year_start = pd.Timestamp(target_year, 1, 1, 0, 0)
        year_end = pd.Timestamp(target_year, 12, 31, 23, 30)
        year_range = pd.date_range(start=year_start, end=year_end, freq="30min")
        
        # Remove Feb 29 if it's a leap year
        year_range = year_range[~((year_range.month == 2) & (year_range.day == 29))]
        
        # Make sure it has the right number of points
        assert len(year_range) == len(one_year_series), f"Year {target_year} has {len(year_range)} points, expected {len(one_year_series)}"
        
        # Assign the new index and add to our list
        year_data.index = year_range
        all_data.append(year_data)
    
    # Concatenate all years
    return pd.concat(all_data)

# Load the demand data
print(f"\nLoading demand data from: {demand_file}")
demand_data = load_demand_profile(demand_file)

# Display summary information
one_year_demand = demand_data['consumption']
one_year_generation = demand_data['generation']

print(f"\nDemand data summary:")
print(f"  • Time period: {one_year_demand.index[0]} to {one_year_demand.index[-1]}")
print(f"  • Total annual consumption: {one_year_demand.sum():.2f} kWh")
print(f"  • Total annual generation: {one_year_generation.sum():.2f} kWh")
print(f"  • Time step: {(one_year_demand.index[1] - one_year_demand.index[0]).total_seconds()/60:.0f} minutes")

# Create 30-year profiles
print("\nCreating 30-year profiles for simulation...")

# Check if demand growth is enabled
demand_growth_enabled = config.DEMAND_GROWTH['enabled']
if demand_growth_enabled:
    pattern = "decade" if config.DEMAND_GROWTH['pattern'] == 'decade' else "year"
    print(f"Applying demand growth of {config.DEMAND_GROWTH['percent_per_decade']}% per {pattern}")

# Create demand profile with potential growth
demand_profile = create_30_year_profile(
    one_year_demand, 
    years=config.PROJECT_LIFETIME, 
    start_year=2025,
    apply_growth=demand_growth_enabled
)

# Create PV profile (no growth - PV output doesn't grow with time)
existing_pv_profile = create_30_year_profile(
    one_year_generation, 
    years=config.PROJECT_LIFETIME, 
    start_year=2025,
    apply_growth=False
)

# Calculate baseline metrics for comparison
print("\nCalculating baseline metrics with existing PV:")
annual_import_with_pv = max(0, one_year_demand.sum() - one_year_generation.sum()) 
annual_export_with_pv = max(0, one_year_generation.sum() - one_year_demand.sum())
self_consumption_ratio = min(one_year_generation.sum(), one_year_demand.sum()) / one_year_generation.sum()

print(f"  • Annual grid import with existing PV: {annual_import_with_pv:.2f} kWh")
print(f"  • Annual grid export with existing PV: {annual_export_with_pv:.2f} kWh")
print(f"  • Self-consumption ratio: {self_consumption_ratio*100:.1f}%")

# Store baseline metrics for later comparison
baseline_metrics = {
    'annual_consumption': one_year_demand.sum(),
    'annual_historical_pv': one_year_generation.sum(),
    'annual_import_with_pv': annual_import_with_pv,
    'annual_export_with_pv': annual_export_with_pv,
    'self_consumption_ratio': self_consumption_ratio
}

# Show total 30-year profiles with growth effect if enabled
print("\n30-year profiles created:")
print(f"  • Time steps: {len(demand_profile)}")
print(f"  • Date range: {demand_profile.index[0]} → {demand_profile.index[-1]}")
print(f"  • Total 30-year demand: {demand_profile.sum():.2f} kWh")
print(f"  • Total 30-year existing PV generation: {existing_pv_profile.sum():.2f} kWh")

if demand_growth_enabled:
    # Calculate the growth effect
    first_year_demand = demand_profile[demand_profile.index.year == 2025].sum()
    last_year_demand = demand_profile[demand_profile.index.year == (2025 + config.PROJECT_LIFETIME - 1)].sum()
    growth_factor = last_year_demand / first_year_demand
    print(f"  • Demand growth effect: Year 1 = {first_year_demand:.2f} kWh, Year {config.PROJECT_LIFETIME} = {last_year_demand:.2f} kWh")
    print(f"  • Total growth factor over {config.PROJECT_LIFETIME} years: {growth_factor:.2f}x")

# Memory optimization for large datasets
print("\nOptimizing memory usage for 30-year simulation...")
demand_profile = demand_profile.astype(np.float32)
existing_pv_profile = existing_pv_profile.astype(np.float32)
print("Memory optimization complete.")

In [None]:
# Cell 4: Test Utility Functions for the Optimization
print("Testing utility functions for optimization...")

# Import necessary modules
from fin import calculate_pv_cost
from pv import allocate_pv_capacity

# Test PV cost function
print("\nTesting PV cost function:")
test_capacities = [5, 10, 20, 30, 40, 50, 100]
print("Capacity (kW) | Cost per kW ($)")
print("--------------------------")
for cap in test_capacities:
    cost = calculate_pv_cost(cap)
    print(f"{cap:12.1f} | ${cost:10.2f}")

# Test ground-mounted with premium
print(f"\nGround-mounted PV with {config.PV_OPTIONS[2]['cost_multiplier']*100:.0f}% premium:")
print("Capacity (kW) | Cost per kW ($)")
print("--------------------------")
for cap in [10, 30, 50]:
    cost = calculate_pv_cost(cap, cost_multiplier=config.PV_OPTIONS[2]['cost_multiplier'])
    print(f"{cap:12.1f} | ${cost:10.2f}")

# Test PV allocation
print("\nTesting PV allocation:")
print("Testing with 50 kW total capacity...")
allocated = allocate_pv_capacity(50, config.PV_OPTIONS)
print("Allocation results:")
for pv in allocated:
    print(f"  - {pv['name']}: {pv['system_capacity_kw']:.2f} kW")

print("\nUtility functions tested successfully!")

In [None]:
# Cell 4: Simulate existing PV system
import time
from datetime import timedelta

print("\nSimulating 30-year PV generation for existing system...")
start_time = time.time()

# Get configuration parameters
start_years = getattr(config, 'START_YEARS', [2025, 2040, 2050])
existing_pv_system = config.EXISTING_PV

# Simulate existing PV system
pv_profile = simulate_multi_year_pv(
    weather_files=weather_files,
    roof_params=[existing_pv_system],
    repeats_per_file=10,
    start_years=start_years
)

elapsed = time.time() - start_time
print(f"PV simulation completed in {elapsed:.1f} seconds ({timedelta(seconds=int(elapsed))})")
print(f"   • PV steps: {len(pv_profile)}")
print(f"   • Date range: {pv_profile.index[0]} → {pv_profile.index[-1]}")
print(f"   • Total generation: {pv_profile['simulated_kwh'].sum():.2f} kWh")

# Optimize memory usage for large datasets
print("\nOptimizing memory usage for 30-year simulation...")
# Convert float64 to float32 to save memory (reduces memory footprint by ~50%)
pv_profile['simulated_kwh'] = pv_profile['simulated_kwh'].astype(np.float32)
demand_profile = demand_profile.astype(np.float32)
print("Memory optimization complete.")

In [None]:
# Cell 5: Verify baseline costs and setup
import time
from datetime import timedelta
import gc

print("\nVerifying baseline costs and simulation setup...")
start_time = time.time()

# Display baseline costs from config
print(f"Annual no-PV cost: ${config.ANNUAL_NO_PV_COST:,.2f}")
print(f"Annual PV-only cost: ${config.ANNUAL_PV_ONLY_COST:,.2f}")
print(f"Annual savings from PV alone: ${config.ANNUAL_NO_PV_COST - config.ANNUAL_PV_ONLY_COST:,.2f}")

# Calculate 30-year costs with escalation
escalation_factor = sum((1 + config.ELECTRICITY_PRICE_ESCALATION)**year for year in range(config.PROJECT_LIFETIME))
no_pv_total_cost = config.ANNUAL_NO_PV_COST * escalation_factor
pv_only_total_cost = config.ANNUAL_PV_ONLY_COST * escalation_factor

print(f"\n30-year costs with {config.ELECTRICITY_PRICE_ESCALATION*100:.0f}% annual escalation:")
print(f"  • No-PV baseline: ${no_pv_total_cost:,.2f}")
print(f"  • PV-only baseline: ${pv_only_total_cost:,.2f}")
print(f"  • 30-year savings from PV alone: ${no_pv_total_cost - pv_only_total_cost:,.2f}")

# Clear memory before verification simulation
print("\nClearing memory cache before verification simulation...")
gc.collect()

print("Running verification simulation with 0 kWh battery...")
# Simulate with existing PV, no battery to verify
disp0, totals0 = simulate_battery_dispatch(
    pv_gen=pv_profile['simulated_kwh'],
    demand=demand_profile,
    battery_kwh=0.0,
    roundtrip_eff=config.BATTERY_EFF,
    min_soc_pct=config.MIN_SOC,
    annual_deg_rate=config.BATTERY_DEGRADATION,
    grid_emission_rate=config.GRID_EMISSIONS,
    config=config
)
print("✅ Verification simulation completed successfully")

# Display key metrics from verification simulation
print("\nVerification simulation results:")
print(f"  • Total demand: {totals0['total_demand']:,.2f} kWh")
print(f"  • Grid import: {totals0['total_grid_import_peak'] + totals0['total_grid_import_offpeak']:,.2f} kWh")
print(f"  • PV export: {totals0['total_pv_export']:,.2f} kWh")
print(f"  • Self-consumption rate: {totals0['self_consumption_rate']*100:.1f}%")
print(f"  • Renewable fraction: {totals0['renewable_fraction']*100:.1f}%")

# Final memory cleanup before optimization
print("\nFinal memory cleanup before optimization...")
gc.collect()

elapsed = time.time() - start_time
print(f"Verification completed in {elapsed:.1f} seconds ({timedelta(seconds=int(elapsed))})")
print("Ready to proceed with optimization.")

In [None]:
# Cell 6: Set up the NSGA-II optimization problem
from obj import BatteryPVOptimizationProblem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.termination import get_termination

print("Setting up optimization problem with battery + additional PV...")

try:
    # Create an instance of the optimization problem
    problem = BatteryPVOptimizationProblem(
        pv_profile=pv_profile, 
        demand_profile=demand_profile,
        config=config
    )
    
    print(f"Optimization problem defined with:")
    print(f"  • Variables: {problem.n_var} [{', '.join(map(str, problem.xl))}] to [{', '.join(map(str, problem.xu))}]")
    print(f"  • Objectives: {problem.n_obj} (IRR and NPV)")
    
    # Set up the NSGA-II algorithm
    algorithm = NSGA2(
        pop_size=config.POPULATION_SIZE,
        eliminate_duplicates=True
    )
    
    # Define termination criteria
    termination = get_termination("n_gen", config.N_GENERATIONS)
    
    print(f"NSGA-II algorithm configured with:")
    print(f"  • Population size: {config.POPULATION_SIZE}")
    print(f"  • Generations: {config.N_GENERATIONS}")
    print(f"  • Total evaluations: {config.POPULATION_SIZE * config.N_GENERATIONS}")
    print("Ready to start optimization")
    
except Exception as e:
    print(f"Error setting up optimization problem: {e}")
    raise

In [None]:
# Cell 7: Define callback for tracking optimization progress
from pymoo.core.callback import Callback
import time
import numpy as np
from datetime import timedelta

class BestSolutionCallback(Callback):
    def __init__(self):
        super().__init__()
        self.data = {
            "gen": [],
            "best_irr": [],
            "best_npv": [],
            "batt_irr": [],
            "batt_npv": [],
            "pv_irr": [],
            "pv_npv": [],
            "time_elapsed": []
        }
        self.start_time = time.time()
        self.last_print = self.start_time
    
    def notify(self, algorithm):
        gen = algorithm.n_gen
        self.data["gen"].append(gen)
        
        # Get current best objectives
        F = algorithm.pop.get("F")
        best_irr = -np.min(F[:, 0])  # Convert back from -IRR
        best_npv = -np.min(F[:, 1])  # Convert back from -NPV
        
        self.data["best_irr"].append(best_irr)
        self.data["best_npv"].append(best_npv)
        
        # Calculate time statistics
        elapsed = time.time() - self.start_time
        self.data["time_elapsed"].append(elapsed)
        
        # Calculate average time per generation
        avg_time_per_gen = elapsed / gen if gen > 0 else 0
        
        # Estimate time remaining
        n_generations = algorithm.termination.n_max_gen
        remaining_gens = n_generations - gen
        est_remaining_time = avg_time_per_gen * remaining_gens
        
        # Find battery and PV sizes for best IRR and NPV
        X = algorithm.pop.get("X")
        best_irr_idx = np.argmin(F[:, 0])
        best_npv_idx = np.argmin(F[:, 1])
        
        best_irr_batt = X[best_irr_idx, 0]
        best_irr_pv = X[best_irr_idx, 1]
        best_npv_batt = X[best_npv_idx, 0]
        best_npv_pv = X[best_npv_idx, 1]
        
        # Store values for best IRR and NPV
        self.data["batt_irr"].append(best_irr_batt)
        self.data["batt_npv"].append(best_npv_batt)
        self.data["pv_irr"].append(best_irr_pv)
        self.data["pv_npv"].append(best_npv_pv)
        
        # Log progress every 5 generations or if more than 30 seconds passed since last print
        current_time = time.time()
        time_since_last_print = current_time - self.last_print
        
        if gen % 5 == 0 or gen == 1 or time_since_last_print > 30:
            print(f"Generation {gen:3d}/{n_generations}: " 
                  f"Best IRR = {best_irr*100:7.2f}% (Batt: {best_irr_batt:.1f} kWh, PV: {best_irr_pv:.1f} kW), " 
                  f"Best NPV = ${best_npv:10,.2f} (Batt: {best_npv_batt:.1f} kWh, PV: {best_npv_pv:.1f} kW), "
                  f"Time: {timedelta(seconds=int(elapsed))}, "
                  f"Est. remaining: {timedelta(seconds=int(est_remaining_time))}")
            self.last_print = current_time
        
        # Print final message when optimization is complete
        if gen == n_generations:
            print(f"\nOptimization complete! Total time: {timedelta(seconds=int(elapsed))}")
            print(f"Best IRR solution: {best_irr*100:.2f}% (Battery: {best_irr_batt:.1f} kWh, PV: {best_irr_pv:.1f} kW)")
            print(f"Best NPV solution: ${best_npv:,.2f} (Battery: {best_npv_batt:.1f} kWh, PV: {best_npv_pv:.1f} kW)")
            print("Ready for next cell to process and visualize results!")

print("Progress tracking callback defined.")
callback = BestSolutionCallback()
print("Ready to run optimization!")

In [None]:
# Cell 8: Run NSGA-II optimization with parallel processing
from pymoo.optimize import minimize
import time
import pickle
from datetime import timedelta
import pandas as pd
import numpy as np
import multiprocessing
import os

print("Running NSGA-II optimization with parallel processing...")
total_start_time = time.time()

# Configure parallel processing
available_cores = multiprocessing.cpu_count()
n_processes = min(8, max(2, available_cores - 2))  # Leave 2 cores free for M3 Mac
print(f"Setting up parallel processing with {n_processes} cores out of {available_cores} available")

# Run with specified population size and generations from config
pop_size = config.POPULATION_SIZE
n_generations = config.N_GENERATIONS
print(f"Population size: {pop_size}, Generations: {n_generations}")
print(f"Total evaluations: {pop_size * n_generations}")

try:
    # Configure parallel processing - latest pymoo approach
    from pymoo.core.problem import StarmapParallelization
    from multiprocessing.pool import Pool
    
    # Create the pool with the number of processes
    pool = Pool(n_processes)
    
    # Pass the pool using the starmap parallelization
    runner = StarmapParallelization(pool.starmap)
    
    # Use runner in the minimize function
    res = minimize(
        problem,
        algorithm,
        termination,
        seed=42,
        verbose=False,
        callback=callback,
        elementwise_runner=runner
    )
    
    # Make sure to close the pool at the end
    pool.close()
    
    # Extract & save Pareto front
    solutions = res.X
    battery_sizes = solutions[:, 0]
    additional_pv = solutions[:, 1]
    pareto_F = res.F
    irr_vals = -pareto_F[:, 0]  # Convert from -IRR to IRR (higher is better)
    npv_vals = -pareto_F[:, 1]  # Convert from -NPV to NPV (higher is better)
    
    # Create Pareto front DataFrame
    df = pd.DataFrame({
        'battery_kwh': battery_sizes,
        'additional_pv_kw': additional_pv,
        'irr': irr_vals,
        'npv': npv_vals
    })
    
    # Add PV allocation details
    allocation_details = []
    from pv import allocate_pv_capacity
    for pv_kw in additional_pv:
        allocated_pv = allocate_pv_capacity(pv_kw, config.PV_OPTIONS)
        details = {
            'total_additional_pv': pv_kw,
            'total_system_pv': config.EXISTING_PV['system_capacity_kw'] + pv_kw
        }
        
        # Add allocation for each option
        for option in config.PV_OPTIONS:
            option_name = option['name']
            allocated = next((p['system_capacity_kw'] for p in allocated_pv if p['name'] == option_name), 0.0)
            details[f'{option_name}_kw'] = allocated
        
        allocation_details.append(details)
    
    # Convert allocation details to DataFrame and join with main results
    allocation_df = pd.DataFrame(allocation_details)
    df = pd.concat([df, allocation_df], axis=1)
    
    # Calculate PI for each solution
    pi_values = []
    for i, row in df.iterrows():
        battery_kwh = row['battery_kwh']
        additional_pv_kw = row['additional_pv_kw']
        capital_cost = 0
        
        # Calculate battery cost using formula from config
        if battery_kwh > 0:
            battery_cost_per_kwh = max(config.BATTERY_COST_FORMULA['minimum'], 
                                      config.BATTERY_COST_FORMULA['base_cost'] * 
                                      np.exp(config.BATTERY_COST_FORMULA['exponent'] * battery_kwh))
            battery_cost = battery_kwh * (battery_cost_per_kwh + config.BATTERY_COST_FORMULA['installation'])
            capital_cost += battery_cost
        
        # Calculate PV cost
        if additional_pv_kw > 0:
            # Get all PV allocations
            pv_cost = 0
            for option in config.PV_OPTIONS:
                option_name = option['name']
                if f'{option_name}_kw' in row:
                    option_kw = row[f'{option_name}_kw']
                    if option_kw > 0:
                        cost_multiplier = option.get('cost_multiplier', 1.0)
                        cost_per_kw = max(config.PV_COST_FORMULA['minimum'], 
                                         config.PV_COST_FORMULA['base_cost'] * 
                                         np.exp(config.PV_COST_FORMULA['exponent'] * option_kw))
                        pv_cost += option_kw * cost_per_kw * cost_multiplier
            capital_cost += pv_cost
        
        # Calculate PI
        pi = row['npv'] / capital_cost if capital_cost > 0 else float('inf')
        pi_values.append(pi)
    
    # Add PI to DataFrame
    df['pi'] = pi_values
    
    # Total runtime
    total_elapsed = time.time() - total_start_time
    print(f"\nOptimization complete! Total runtime: {timedelta(seconds=int(total_elapsed))}")
    print(f"Average time per evaluation: {total_elapsed/(pop_size * n_generations):.3f} seconds")
    
    # Save results
    df.to_csv(run_dir / 'pareto_solutions.csv', index=False)
    print(f"✅ Pareto front saved to {run_dir/'pareto_solutions.csv'}")
    
    # Display best solutions for each objective
    print("\nBest solutions found:")
    print(f"Best IRR: {df['irr'].max()*100:.2f}% with {df.loc[df['irr'].idxmax(), 'battery_kwh']:.1f} kWh battery and {df.loc[df['irr'].idxmax(), 'additional_pv_kw']:.1f} kW additional PV")
    print(f"Best NPV: ${df['npv'].max():,.2f} with {df.loc[df['npv'].idxmax(), 'battery_kwh']:.1f} kWh battery and {df.loc[df['npv'].idxmax(), 'additional_pv_kw']:.1f} kW additional PV")
    print(f"Best PI: {df['pi'].max():.2f} with {df.loc[df['pi'].idxmax(), 'battery_kwh']:.1f} kWh battery and {df.loc[df['pi'].idxmax(), 'additional_pv_kw']:.1f} kW additional PV")

    # Find balanced solution (closest to utopia point)
    print("\nFinding balanced solution...")
    # Normalize the objectives to 0-1 scale
    df_norm = df.copy()
    df_norm['irr_norm'] = (df_norm['irr'] - df_norm['irr'].min()) / (df_norm['irr'].max() - df_norm['irr'].min())
    df_norm['npv_norm'] = (df_norm['npv'] - df_norm['npv'].min()) / (df_norm['npv'].max() - df_norm['npv'].min())
    
    # Calculate the Euclidean distance to the utopia point (1,1)
    df_norm['distance'] = np.sqrt((1 - df_norm['irr_norm'])**2 + (1 - df_norm['npv_norm'])**2)
    
    # Find the balanced solution (minimum distance to utopia)
    balanced_idx = df_norm['distance'].idxmin()
    print(f"Balanced solution: IRR = {df.loc[balanced_idx, 'irr']*100:.2f}%, NPV = ${df.loc[balanced_idx, 'npv']:,.2f}")
    print(f"  with {df.loc[balanced_idx, 'battery_kwh']:.1f} kWh battery and {df.loc[balanced_idx, 'additional_pv_kw']:.1f} kW additional PV")

    # Calculate indices for best IRR, NPV, and PI solutions
    best_irr_idx = df['irr'].idxmax()
    best_npv_idx = df['npv'].idxmax()
    best_pi_idx = df['pi'].idxmax()
    
    # Save checkpoint of key variables to avoid rerunning optimization
    checkpoint = {
        'df': df,
        'best_irr_idx': best_irr_idx,
        'best_npv_idx': best_npv_idx,
        'best_pi_idx': best_pi_idx,
        'balanced_idx': balanced_idx,
        'pareto_front': True
    }
    
    with open(run_dir / 'optimization_checkpoint.pkl', 'wb') as f:
        pickle.dump(checkpoint, f)
    print(f"✅ Optimization checkpoint saved to {run_dir/'optimization_checkpoint.pkl'}")
    
    print("\nOptimization successful! Ready for detailed analysis in the next cell.")

except Exception as e:
    print(f"Error during optimization: {e}")
    import traceback
    traceback.print_exc()
    
    # Try to load checkpoint if it exists
    try:
        checkpoint_path = run_dir / 'optimization_checkpoint.pkl'
        if checkpoint_path.exists():
            print("Attempting to load previous optimization checkpoint...")
            with open(checkpoint_path, 'rb') as f:
                checkpoint = pickle.load(f)
                df = checkpoint['df']
                best_irr_idx = checkpoint['best_irr_idx']
                best_npv_idx = checkpoint['best_npv_idx']
                best_pi_idx = checkpoint.get('best_pi_idx')  # Handle older checkpoints without PI
                balanced_idx = checkpoint.get('balanced_idx')  # Handle older checkpoints
                
                print("✅ Checkpoint loaded successfully")
                print(f"Best IRR: {df['irr'].max()*100:.2f}% with {df.loc[best_irr_idx, 'battery_kwh']:.1f} kWh battery and {df.loc[best_irr_idx, 'additional_pv_kw']:.1f} kW additional PV")
                print(f"Best NPV: ${df['npv'].max():,.2f} with {df.loc[best_npv_idx, 'battery_kwh']:.1f} kWh battery and {df.loc[best_npv_idx, 'additional_pv_kw']:.1f} kW additional PV")
                if 'pi' in df.columns and best_pi_idx is not None:
                    print(f"Best PI: {df['pi'].max():.2f} with {df.loc[best_pi_idx, 'battery_kwh']:.1f} kWh battery and {df.loc[best_pi_idx, 'additional_pv_kw']:.1f} kW additional PV")
                if balanced_idx is not None:
                    print(f"Balanced solution: IRR = {df.loc[balanced_idx, 'irr']*100:.2f}%, NPV = ${df.loc[balanced_idx, 'npv']:,.2f}")
                    print(f"  with {df.loc[balanced_idx, 'battery_kwh']:.1f} kWh battery and {df.loc[balanced_idx, 'additional_pv_kw']:.1f} kW additional PV")
    except Exception as load_err:
        print(f"Error loading checkpoint: {load_err}")

In [None]:
# Cell 9: Process and Visualize Optimization Results
print("Processing and visualizing optimization results...")

try:
    # Check if we have results to process
    if 'df' not in locals() or df is None:
        # Try to load from checkpoint
        checkpoint_path = run_dir / 'optimization_checkpoint.pkl'
        if checkpoint_path.exists():
            print("Loading results from checkpoint...")
            with open(checkpoint_path, 'rb') as f:
                checkpoint = pickle.load(f)
                df = checkpoint['df']
                print("Checkpoint loaded successfully!")
        else:
            raise ValueError("No optimization results available. Please run the optimization first.")
    
    # Ensure lowercase column names for consistency
    df.columns = [col.lower() for col in df.columns]
    
    # Find best IRR and NPV indices
    if 'irr' in df.columns:
        best_irr_idx = df['irr'].idxmax()
        best_npv_idx = df['npv'].idxmax()
        print(f"Best IRR: {df.loc[best_irr_idx, 'irr']*100:.2f}% (Battery: {df.loc[best_irr_idx, 'battery_kwh']:.1f} kWh, PV: {df.loc[best_irr_idx, 'additional_pv_kw']:.1f} kW)")
        print(f"Best NPV: ${df.loc[best_npv_idx, 'npv']:,.2f} (Battery: {df.loc[best_npv_idx, 'battery_kwh']:.1f} kWh, PV: {df.loc[best_npv_idx, 'additional_pv_kw']:.1f} kW)")
    
    # Run the comprehensive results analysis
    from results import run_results_analysis
    
    results = run_results_analysis(
        df=df,
        callback=callback,
        pv_profile=pv_profile,
        demand_profile=demand_profile,
        config=config,
        run_dir=run_dir
    )
    
    print("\nOptimization analysis complete!")
    print(f"All results have been saved to: {run_dir}")
    
    # Display paths to key result files
    print("\nKey result files:")
    print(f"- Summary report: {run_dir/'summary_report.txt'}")
    print(f"- Pareto front visualization: {run_dir/'plots'/'pareto_front_irr_npv.png'}")
    print(f"- Solution comparison: {run_dir/'data'/'solution_comparison.csv'}")
    print(f"- Best IRR solution details: {run_dir/'data'/'best_irr_summary.csv'}")
    print(f"- Best NPV solution details: {run_dir/'data'/'best_npv_summary.csv'}")
    
except Exception as e:
    print(f"Error processing results: {e}")
    import traceback
    traceback.print_exc()