In [2]:
import pandas as pd

# load Day 4 file (yours is here)
src = "Data/Processed/uk_electricity_pbi.csv"
df = pd.read_csv(src, parse_dates=["dtm"])

# add Day 5 features
df["date"] = df["dtm"].dt.date
df["hour"] = df["dtm"].dt.hour
df["dow"] = df["dtm"].dt.day_name()
df["is_weekend"] = df["dtm"].dt.dayofweek >= 5

# example frequency band (edit thresholds if you want)
bins = [0, 49.9, 50.0, 50.1, 999]
labels = ["<49.9", "49.9–50.0", "50.0–50.1", ">50.1"]
df["freq_band"] = pd.cut(df["f"], bins=bins, labels=labels)

# save Day 5 output
out = "Data/Processed/uk_electricity_day5.csv"
df.to_csv(out, index=False)
out


'Data/Processed/uk_electricity_day5.csv'

In [4]:
from pathlib import Path
import pandas as pd

# Find project root by walking up until README.md is found
cwd = Path.cwd()
PROJECT_ROOT = next((p for p in [cwd] + list(cwd.parents) if (p / "README.md").exists()), cwd)

print("CWD:", cwd)
print("PROJECT_ROOT:", PROJECT_ROOT)

processed_dir = PROJECT_ROOT / "Data" / "Processed"  # <-- adjust ONLY if you truly don't use Data/Processed
processed_dir.mkdir(parents=True, exist_ok=True)

src = processed_dir / "uk_electricity_pbi.csv"
print("Looking for:", src)

CWD: c:\DataAnalystTraining\Energy-Portfolio\Notebooks
PROJECT_ROOT: c:\DataAnalystTraining\Energy-Portfolio
Looking for: c:\DataAnalystTraining\Energy-Portfolio\Data\Processed\uk_electricity_pbi.csv


In [5]:
df = pd.read_csv(src)

# Normalize column names lightly
df.columns = [c.strip() for c in df.columns]

# Identify datetime column
if "dtm" not in df.columns:
    raise ValueError(f"Couldn't find 'dtm' column. Columns are: {df.columns.tolist()}")

df["dtm"] = pd.to_datetime(df["dtm"], errors="coerce")
df = df.dropna(subset=["dtm"])

# Identify frequency column
freq_col = None
for candidate in ["f", "Frequency (Hz)", "frequency", "frequency_hz"]:
    if candidate in df.columns:
        freq_col = candidate
        break

if freq_col is None:
    raise ValueError(f"Couldn't find frequency column. Columns are: {df.columns.tolist()}")

df[freq_col] = pd.to_numeric(df[freq_col], errors="coerce")
df = df.dropna(subset=[freq_col])

df = df.sort_values("dtm")
print("Loaded:", df.shape, "| freq_col =", freq_col)
df.head()


Loaded: (2591999, 6) | freq_col = f


Unnamed: 0,dtm,f,minute,rolling_5min,deviation_from_50,out_of_band
0,2025-11-01 00:00:01,49.964,2025-11-01 00:00:00,49.964,-0.036,False
1,2025-11-01 00:00:02,49.967,2025-11-01 00:00:00,49.9655,-0.033,False
2,2025-11-01 00:00:03,49.964,2025-11-01 00:00:00,49.965,-0.036,False
3,2025-11-01 00:00:04,49.965,2025-11-01 00:00:00,49.965,-0.035,False
4,2025-11-01 00:00:05,49.969,2025-11-01 00:00:00,49.9658,-0.031,False


In [6]:
# Thresholds (adjust if you want)
LOW, HIGH = 49.8, 50.2

# Create out_of_band on raw rows first (so we can count per minute)
df["out_of_band"] = (df[freq_col] < LOW) | (df[freq_col] > HIGH)

# Resample to 1-minute
g = df.set_index("dtm")

agg_1min = g.resample("1min").agg(
    f_mean=(freq_col, "mean"),
    f_min=(freq_col, "min"),
    f_max=(freq_col, "max"),
    f_std=(freq_col, "std"),
    out_of_band_count=("out_of_band", "sum"),
)

agg_1min = agg_1min.reset_index()

# Reduce size further:
# - float32 reduces bytes
# - rounding + float_format reduces CSV text size a lot
for col in ["f_mean", "f_min", "f_max", "f_std"]:
    agg_1min[col] = agg_1min[col].astype("float32").round(4)

agg_1min["out_of_band_count"] = agg_1min["out_of_band_count"].astype("int32")

out_1min = processed_dir / "uk_electricity_1min.csv"
agg_1min.to_csv(out_1min, index=False, float_format="%.4f")

print("Saved:", out_1min)
print("Rows:", len(agg_1min))

Saved: c:\DataAnalystTraining\Energy-Portfolio\Data\Processed\uk_electricity_1min.csv
Rows: 43200


In [7]:
from pathlib import Path
print("CWD:", Path.cwd())
print("Exists Data/Processed?", (Path.cwd() / "Data" / "Processed").exists())
print("Exists Notebooks/data/processed?", (Path.cwd() / "Notebooks" / "data" / "processed").exists())


CWD: c:\DataAnalystTraining\Energy-Portfolio\Notebooks
Exists Data/Processed? True
Exists Notebooks/data/processed? False
