In [1]:
# ===== Week 1 – Section 2: Temporal Trends & Seasonality (Business) =====
from pathlib import Path
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")

# ---------- Shared Header / Naming ----------
BASE_PROJECT_NAME = "SDS-CP036-powercast"
WEEK = "Wk01"
SECTION = "Section2"
RUN_TAG = f"{WEEK}_{SECTION}"

BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd().resolve()

def find_repo_root(start: Path) -> Path:
    cur = start
    for _ in range(10):
        if (cur / ".git").exists() or (cur / "data").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start

REPO_ROOT  = find_repo_root(BASE_DIR)
DATA_DIR   = REPO_ROOT / "data"
ENERGY_CSV = DATA_DIR / "power_consumption.csv"

RESULTS_DIR = REPO_ROOT / "results" / RUN_TAG
PLOTS_DIR   = RESULTS_DIR / "plots"
REPORTS_DIR = RESULTS_DIR / "reports"
for d in (RESULTS_DIR, PLOTS_DIR, REPORTS_DIR):
    d.mkdir(parents=True, exist_ok=True)

PLOT_DAILY = PLOTS_DIR / f"{BASE_PROJECT_NAME}_{RUN_TAG}_Plot_Daily_Line.png"
PLOT_BOXP  = PLOTS_DIR / f"{BASE_PROJECT_NAME}_{RUN_TAG}_Plot_Weekly_Box.png"
PLOT_HEAT  = PLOTS_DIR / f"{BASE_PROJECT_NAME}_{RUN_TAG}_Plot_Heatmap_Zone1.png"

BUSINESS_SUMMARY_MD  = REPORTS_DIR / f"{BASE_PROJECT_NAME}_{RUN_TAG}_Report_Business.md"
BUSINESS_REPORT_MD   = RESULTS_DIR / f"{BASE_PROJECT_NAME}_{RUN_TAG}_Business_Report.md"

# ---------- Load & Prepare (hardened) ----------
df = pd.read_csv(ENERGY_CSV, sep=",", low_memory=False)
if len(df.columns) == 1:
    df = pd.read_csv(ENERGY_CSV, sep=";", low_memory=False)
df.columns = df.columns.str.strip()

if "Date,Time" in df.columns and (("Date" not in df.columns) or ("Time" not in df.columns)):
    dt = df["Date,Time"].astype(str).str.split(",", n=1, expand=True)
    dt.columns = ["Date", "Time"]
    df = pd.concat([df.drop(columns=["Date,Time"]), dt], axis=1)

required_cols = ["Date", "Time", "Sub_metering_1", "Sub_metering_2", "Sub_metering_3"]
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f"Missing required columns: {missing}")

df["DateTime"] = pd.to_datetime(df["Date"].astype(str) + " " + df["Time"].astype(str),
                                dayfirst=True, errors="coerce")
df = df.dropna(subset=["DateTime"]).sort_values("DateTime").reset_index(drop=True)

zones = ["Sub_metering_1", "Sub_metering_2", "Sub_metering_3"]
for z in zones:
    df[z] = pd.to_numeric(df[z], errors="coerce")
df = df.dropna(subset=zones, how="all")
df = df.set_index("DateTime")

# ---------- Plots ----------
# Daily line
daily_df = df[zones].resample("D").mean()
plt.figure(figsize=(14, 5)); sns.lineplot(data=daily_df)
plt.title("Daily Average Energy Consumption by Zone"); plt.xlabel("Date"); plt.ylabel("Watt-hour")
plt.legend(labels=["Zone 1 (Kitchen)","Zone 2 (Laundry)","Zone 3 (Water Heater & AC)"])
plt.tight_layout(); plt.savefig(PLOT_DAILY); plt.close()

# Box plot by DoW
df_plot = df[zones].copy(); df_plot["DayOfWeek"] = df_plot.index.day_name()
melted = df_plot.reset_index().melt(id_vars=["DayOfWeek"], value_vars=zones, var_name="Zone", value_name="Wh")
zone_map = {"Sub_metering_1":"Zone 1 (Kitchen)","Sub_metering_2":"Zone 2 (Laundry)","Sub_metering_3":"Zone 3 (Water Heater & AC)"}
melted["Zone"] = melted["Zone"].map(zone_map)
order = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
plt.figure(figsize=(11, 6)); sns.boxplot(x="DayOfWeek", y="Wh", hue="Zone", data=melted, order=order)
plt.title("Energy Usage Distribution by Day of Week"); plt.xlabel("Day of Week"); plt.ylabel("Watt-hour")
plt.xticks(rotation=25); plt.tight_layout(); plt.savefig(PLOT_BOXP); plt.close()

# Heatmap Zone 1
df_h = df.copy(); df_h["Hour"] = df_h.index.hour; df_h["DayOfWeek"] = df_h.index.day_name()
pivot = df_h.pivot_table(index="DayOfWeek", columns="Hour", values="Sub_metering_1", aggfunc="mean").reindex(order)
plt.figure(figsize=(13,5)); sns.heatmap(pivot, cmap="YlOrRd")
plt.title("Zone 1 (Kitchen): Avg Usage Heatmap (Hour vs Day)"); plt.xlabel("Hour of Day"); plt.ylabel("Day of Week")
plt.tight_layout(); plt.savefig(PLOT_HEAT); plt.close()

# ---------- Business Summary ----------
biz_md = f"""# 💼 Week 1 – {SECTION}: Temporal Trends & Seasonality (Business-Friendly Report)

## Key Questions Answered
**Q1: What daily or weekly patterns are observable in power consumption across the three zones?**  
I reviewed usage by day and week. Kitchen (Zone 1) spikes around meal times. Laundry (Zone 2) is busier on weekends. Water heating/AC (Zone 3) is steadier.

**Q2: Are there seasonal or time-of-day peaks and dips in energy usage?**  
Yes. The line chart shows daily trends; the heatmap highlights time-of-day peaks in the kitchen around lunch and dinner.

**Q3: Which visualizations helped you uncover these patterns?**  
- Line plot (daily averages)  
- Box plot (by day of week)  
- Heatmap (hour vs day for Zone 1)
"""
BUSINESS_SUMMARY_MD.write_text(biz_md, encoding="utf-8")

wrapper = f"""# {BASE_PROJECT_NAME} — {RUN_TAG} — Business Report

🔗 **Open Business Summary:** `{BUSINESS_SUMMARY_MD.name}`

### Visuals
- ![{PLOT_DAILY.stem}](plots/{PLOT_DAILY.name})
- ![{PLOT_BOXP.stem}](plots/{PLOT_BOXP.name})
- ![{PLOT_HEAT.stem}](plots/{PLOT_HEAT.name})
"""
BUSINESS_REPORT_MD.write_text(wrapper, encoding="utf-8")

print("✅ Section 2 (Business) complete.")


✅ Section 2 (Business) complete.
