In [1]:
# ===== Week 1 – Section 5: Baseline Forecast & Capacity Readiness (Business, full) =====
from pathlib import Path
import os, re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

BASE_PROJECT_NAME = "SDS-CP036-powercast"
WEEK = "Wk01"
SECTION = "Section5"
RUN_TAG = f"{WEEK}_{SECTION}"

BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd().resolve()

def find_repo_root(start: Path) -> Path:
    cur = start
    for _ in range(10):
        if (cur / ".git").exists() or (cur / "data").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start

def _is_writable(dirpath: Path) -> bool:
    try:
        dirpath.mkdir(parents=True, exist_ok=True)
        tmp = dirpath / "__write_test__"
        tmp.write_text("ok", encoding="utf-8")
        tmp.unlink(missing_ok=True)
        return True
    except Exception:
        return False

REPO_ROOT = find_repo_root(BASE_DIR)
DATA_DIR  = REPO_ROOT / "data"
OUTPUT_ROOT = REPO_ROOT if _is_writable(REPO_ROOT / "results") else BASE_DIR

RESULTS_DIR  = OUTPUT_ROOT / "results" / RUN_TAG
PLOTS_DIR    = RESULTS_DIR / "plots"
REPORTS_DIR  = RESULTS_DIR / "reports"
for d in (RESULTS_DIR, PLOTS_DIR, REPORTS_DIR):
    d.mkdir(parents=True, exist_ok=True)

BUSINESS_SUMMARY_MD  = REPORTS_DIR / f"{BASE_PROJECT_NAME}_{RUN_TAG}_Report_Business.md"
FORECAST_CSV         = RESULTS_DIR / f"{BASE_PROJECT_NAME}_{RUN_TAG}_Forecast7d.csv"

CANDIDATES = [DATA_DIR / "Tetuan City power consumption.csv", Path("/mnt/data/Tetuan City power consumption.csv")]
env_path = os.environ.get("ENERGY_CSV_PATH")
if env_path: CANDIDATES.insert(0, Path(env_path))
ENERGY_CSV = next((p for p in CANDIDATES if p.exists()), None)
if ENERGY_CSV is None: raise FileNotFoundError("Tetuan CSV not found:\n  - " + "\n  - ".join(map(str, CANDIDATES)))

def _parse_datetime_series(series: pd.Series) -> pd.Series:
    dt = pd.to_datetime(series, errors="coerce")
    if dt.isna().mean() > 0.5: dt = pd.to_datetime(series, errors="coerce", dayfirst=True)
    return dt

def normalize_and_alias(df: pd.DataFrame) -> pd.DataFrame:
    df.columns = [re.sub(r"\s+", " ", c).strip() for c in df.columns]
    if "DateTime" in df.columns:
        df["DateTime"] = _parse_datetime_series(df["DateTime"].astype(str))
    elif {"Date","Time"}.issubset(df.columns):
        df["DateTime"] = _parse_datetime_series(df["Date"].astype(str)+" "+df["Time"].astype(str))
    else:
        raise ValueError("Need 'DateTime' or 'Date'+'Time'")
    zone_map = {"Zone 1 Power Consumption":"Sub_metering_1","Zone 2 Power Consumption":"Sub_metering_2","Zone 3 Power Consumption":"Sub_metering_3"}
    for s,d in zone_map.items():
        if s in df.columns and d not in df.columns: df.rename(columns={s:d}, inplace=True)
    req = ["DateTime","Sub_metering_1","Sub_metering_2","Sub_metering_3"]
    miss = [c for c in req if c not in df.columns]
    if miss: raise ValueError(f"Missing after alias: {miss}")
    return df.dropna(subset=["DateTime"]).sort_values("DateTime").reset_index(drop=True)

df_raw = pd.read_csv(ENERGY_CSV, sep=",", low_memory=False)
if len(df_raw.columns) == 1: df_raw = pd.read_csv(ENERGY_CSV, sep=";", low_memory=False)
df = normalize_and_alias(df_raw.copy())
df["Total_kW"] = df[["Sub_metering_1","Sub_metering_2","Sub_metering_3"]].astype(float).sum(axis=1)

daily = df.set_index("DateTime")["Total_kW"].resample("D").sum().to_frame("Total_kW_daily")
daily["DoW"] = daily.index.dayofweek
dow_means = daily.groupby("DoW")["Total_kW_daily"].mean()

last_date = daily.index.max()
future_idx = pd.date_range(last_date + pd.Timedelta(days=1), periods=7, freq="D")
future_dow = future_idx.dayofweek
forecast_vals = [dow_means.get(d, dow_means.mean()) for d in future_dow]
forecast = pd.DataFrame({"Date": future_idx, "Forecast_Total_kW_daily": forecast_vals}).set_index("Date")
forecast.to_csv(FORECAST_CSV, index=True)

recent = daily.tail(60)
plt.figure(); plt.plot(recent.index, recent["Total_kW_daily"], label="Actual (last 60d)")
plt.title("Recent Daily Totals (last 60 days)"); plt.xlabel("Date"); plt.ylabel("kW")
recent_path = PLOTS_DIR / "section5_recent_daily.png"; plt.tight_layout(); plt.savefig(recent_path); plt.close()

plt.figure(); plt.plot(forecast.index, forecast["Forecast_Total_kW_daily"], label="Forecast (7d)")
plt.title("7-Day Baseline Forecast (DoW averages)"); plt.xlabel("Date"); plt.ylabel("kW")
fcst_path = PLOTS_DIR / "section5_forecast7d.png"; plt.tight_layout(); plt.savefig(fcst_path); plt.close()

first_ts = str(df["DateTime"].min()); last_ts  = str(df["DateTime"].max())
peak_day = forecast["Forecast_Total_kW_daily"].idxmax().date()
peak_val = float(forecast["Forecast_Total_kW_daily"].max())

md = f"""# 💼 Week 1 – {SECTION}: Baseline Forecast & Capacity Readiness (Business-Friendly Report)

## Dataset
Using file: **{ENERGY_CSV.name}**  
Period: **{first_ts} → {last_ts}**  
Rows: **{len(df):,}**

## Key Questions Answered
**Q1: What does the short-term baseline forecast look like?**  
- A simple **day-of-week average** model projects the next 7 days. Peak is **{peak_day}** (~**{peak_val:.0f} kW** daily).

**Q2: How should operations plan around expected peaks?**  
- Staff or schedule energy-intensive tasks away from forecasted peak days; consider shifting flexible loads to lower-demand days.

**Q3: Which visualizations helped?**  
- Recent actuals (last 60 days): `plots/{recent_path.name}`  
- 7-day baseline forecast: `plots/{fcst_path.name}`

## What we computed
- Canonical DateTime & zone aliasing; **Total_kW** across zones.  
- **Daily totals** and **DoW mean** profile.  
- A lightweight **7-day baseline** forecast using day-of-week averages (extendable to richer models later).
"""
BUSINESS_SUMMARY_MD.write_text(md, encoding="utf-8")
print("✅ Section 5 complete."); print("- Report:", BUSINESS_SUMMARY_MD)
print("- Plots:", recent_path.name, fcst_path.name)


✅ Section 5 complete.
- Report: /home/6376f5a9-d12b-4255-9426-c0091ad440a7/Powercast/results/Wk01_Section5/reports/SDS-CP036-powercast_Wk01_Section5_Report_Business.md
- Plots: section5_recent_daily.png section5_forecast7d.png
