In [2]:
import pandas as pd
import numpy as np
import re
import math

EXCEL_PATH = "PV Panels and Companies.xlsx"

def compute_average_panel_metrics(excel_path: str = EXCEL_PATH) -> dict:
    """
    Reads the panel table and computes:
      - average panel area (ft²)
      - average panel wattage (W)
      - average cost per watt ($/W)
      - average cost per panel ($)
    """
    df = pd.read_excel(excel_path, sheet_name=0)

    # Taking only the rows we care about
    size_row = df[df["Brand"] == "Size"].iloc[0, 1:]
    power_row = df[df["Brand"] == "Power"].iloc[0, 1:]
    cost_row = df[df["Brand"] == "Average cost per watt*"].iloc[0, 1:]

    # Parsing
    def parse_sqft(val):
        if isinstance(val, str):
            m = re.search(r"([\d\.]+)", val)
            return float(m.group(1)) if m else np.nan
        return np.nan

    def parse_watts(val):
        if isinstance(val, str):
            m = re.search(r"([\d\.]+)", val)
            return float(m.group(1)) if m else np.nan
        return np.nan

    size_ft2 = size_row.apply(parse_sqft)
    power_w = power_row.apply(parse_watts)
    cost_per_w = pd.to_numeric(cost_row, errors="coerce")

    avg_panel_area_ft2 = size_ft2.mean()
    avg_panel_watt = power_w.mean()
    avg_cost_per_watt = cost_per_w.mean()
    avg_cost_per_panel = avg_panel_watt * avg_cost_per_watt

    return {
        "avg_panel_area_ft2": avg_panel_area_ft2,
        "avg_panel_watt": avg_panel_watt,
        "avg_cost_per_watt": avg_cost_per_watt,
        "avg_cost_per_panel": avg_cost_per_panel,
    }


# Compute stats once when the notebook runs
PANEL_STATS = compute_average_panel_metrics()

def estimate_pv_from_area(roof_area_ft2: float, panel_stats: dict = PANEL_STATS) -> dict:
    """
    Given usable roof area in square feet, estimate:
      - number of whole panels that fit
      - total DC system size (kW)
      - total panel hardware cost ($)

    Uses average panel area and cost derived from the Excel table.
    """
    panel_area = panel_stats["avg_panel_area_ft2"]
    panel_watt = panel_stats["avg_panel_watt"]
    cost_per_panel = panel_stats["avg_cost_per_panel"]

    # Number of whole panels that fit in the available area
    num_panels = math.floor(roof_area_ft2 / panel_area)

    if num_panels <= 0:
        return {
            "roof_area_ft2": roof_area_ft2,
            "avg_panel_area_ft2": panel_area,
            "avg_panel_watt": panel_watt,
            "avg_cost_per_watt": panel_stats["avg_cost_per_watt"],
            "num_panels": 0,
            "system_size_kw": 0.0,
            "estimated_panel_cost_usd": 0.0,
        }

    # Total DC system size in kW
    system_size_kw = (num_panels * panel_watt) / 1000.0

    # Total panel hardware cost
    total_cost = num_panels * cost_per_panel

    return {
        "roof_area_ft2": roof_area_ft2,
        "avg_panel_area_ft2": panel_area,
        "avg_panel_watt": panel_watt,
        "avg_cost_per_watt": panel_stats["avg_cost_per_watt"],
        "num_panels": num_panels,
        "system_size_kw": system_size_kw,
        "estimated_panel_cost_usd": total_cost,
    }


def estimate_pv_for_roofs(roof_areas_ft2, panel_stats: dict = PANEL_STATS):
    """
    Take an iterable (list/Series) of roof areas in ft² and compute:

      1.  A per-roof DataFrame with:
          - roof_id (index)
          - roof_area_ft2
          - num_panels
          - system_size_kw
          - estimated_panel_cost_usd

      2.  A summary dict with:
          - num_roofs
          - total_panels
          - total_system_kw
          - total_panel_cost_usd
    """
    # Convert to a clean Series of floats
    areas = pd.Series(roof_areas_ft2).astype(float)
    per_roof_records = [estimate_pv_from_area(a, panel_stats) for a in areas]

    per_roof_df = pd.DataFrame(per_roof_records)
    per_roof_df.index.name = "roof_id"  #or other identifier

    summary = {
        "num_roofs": int(len(per_roof_df)),
        "total_panels": int(per_roof_df["num_panels"].sum()),
        "total_system_kw": per_roof_df["system_size_kw"].sum(),
        "total_panel_cost_usd": per_roof_df["estimated_panel_cost_usd"].sum(),
    }

    return per_roof_df, summary


FileNotFoundError: [Errno 2] No such file or directory: 'PV Panels and Companies.xlsx'