# HW03 — Python Fundamentals (Fixed Imports)
This version loads utilities from the `hw03_utils` package, with a fallback shim if not installed.

In [None]:

from pathlib import Path
import sys, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = Path.cwd().parent if (Path.cwd().name == "notebooks") else Path.cwd()
DATA_PATH = PROJECT_ROOT / "data" / "starter_data.csv"
PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
FIGS_DIR = PROJECT_ROOT / "figs"
SRC = PROJECT_ROOT / "src"

# Preferred: package import (after `pip install -e .`)
try:
    from hw03_utils import (
        time_loop_vs_vectorized,
        get_summary_stats,
        groupby_aggregate,
        basic_histogram,
        save_plot,
    )
except ImportError:
    # Fallback: make src importable and use back-compat utils shim
    if SRC.exists():
        sys.path.append(str(SRC))
    from utils import (
        time_loop_vs_vectorized,
        get_summary_stats,
        groupby_aggregate,
        basic_histogram,
        save_plot,
    )

PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
FIGS_DIR.mkdir(parents=True, exist_ok=True)
print(f"Project root: {PROJECT_ROOT}\nData path: {DATA_PATH}")


## 1) NumPy Operations — elementwise ops and loop vs vectorized timing

In [None]:
bench = time_loop_vs_vectorized(n=300_000); bench

## 2) Dataset Loading — `pandas` `.info()` and `.head()`

In [None]:

if not DATA_PATH.exists():
    print("WARNING: data/starter_data.csv not found. Using the bundled sample.")
    DATA_PATH = PROJECT_ROOT / "data" / "starter_data_SAMPLE.csv"

df = pd.read_csv(DATA_PATH)
display(df.info())
df.head()


## 3) Summary Statistics — numeric `.describe()`

In [None]:
summary = get_summary_stats(df); summary.head(10)

## 4) Groupby aggregation — pick a categorical column or auto-create one

In [None]:
grouped = groupby_aggregate(df); grouped.head(10)

## 5) Save outputs — CSV and JSON

In [None]:

out_csv = PROCESSED_DIR / "summary.csv"
summary.to_csv(out_csv, index=False)
print(f"Saved: {out_csv}")
out_json = PROCESSED_DIR / "summary.json"
summary.to_json(out_json, orient="records", indent=2)
print(f"Saved: {out_json}")


## Bonus) Basic plot — histogram of first numeric column

In [None]:

num_cols = df.select_dtypes(include="number").columns.tolist()
if num_cols:
    hist_path = FIGS_DIR / "basic_hist.png"
    saved = basic_histogram(df[num_cols[0]], title=f"Histogram of {num_cols[0]}", save_path=hist_path)
    print(f"Saved plot: {saved}")
else:
    print("No numeric columns found for plotting.")
