In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Generate a synthetic dataset for 2 weeks (14 days) with hourly intervals
date_rng = pd.date_range(start='2025-10-01', end='2025-10-15', freq='H', inclusive="left")

# Create sample data
data = {
    "timestamp": date_rng,
    "building_id": ["B001"] * len(date_rng),
    "building_type": ["commercial"] * len(date_rng),
    "temperature_C": np.random.normal(25, 5, len(date_rng)).round(2),
    "humidity_%": np.random.randint(40, 80, len(date_rng)),
    "wind_speed_mps": np.random.uniform(1, 6, len(date_rng)).round(2),
    "solar_radiation_Wm2": np.clip(np.random.normal(400, 150, len(date_rng)), 0, None).round(2),
    "day_of_week": [d.dayofweek for d in date_rng],
    "hour_of_day": [d.hour for d in date_rng],
    "is_weekend": [1 if d.dayofweek >= 5 else 0 for d in date_rng],
    "holiday_flag": np.random.choice([0, 1], len(date_rng), p=[0.95, 0.05]),
    "occupancy_level": np.random.randint(5, 50, len(date_rng)),
    "HVAC_status": np.random.choice(["On", "Off"], len(date_rng), p=[0.7, 0.3]),
    "lighting_load_kW": np.random.uniform(2, 10, len(date_rng)).round(2),
    "equipment_load_kW": np.random.uniform(3, 15, len(date_rng)).round(2),
    "electricity_price_$per_kWh": np.random.uniform(0.10, 0.25, len(date_rng)).round(3),
}

df = pd.DataFrame(data)

# Compute derived features
df["season"] = pd.cut(df["timestamp"].dt.month,
                      bins=[0, 3, 6, 9, 12],
                      labels=["Winter", "Spring", "Summer", "Fall"],
                      right=True)
df["total_load_kW"] = df["lighting_load_kW"] + df["equipment_load_kW"]
df["energy_consumption_kWh"] = (df["total_load_kW"] * np.random.uniform(0.9, 1.1, len(df))).round(2)

# Save to CSV
df.to_csv("energy_consumption_dataset.csv", index=False)

print("✅ Synthetic dataset created: energy_consumption_dataset.csv")
print(df.head())


  date_rng = pd.date_range(start='2025-10-01', end='2025-10-15', freq='H', inclusive="left")


✅ Synthetic dataset created: energy_consumption_dataset.csv
            timestamp building_id building_type  temperature_C  humidity_%  \
0 2025-10-01 00:00:00        B001    commercial          27.48          50   
1 2025-10-01 01:00:00        B001    commercial          24.31          57   
2 2025-10-01 02:00:00        B001    commercial          28.24          51   
3 2025-10-01 03:00:00        B001    commercial          32.62          48   
4 2025-10-01 04:00:00        B001    commercial          23.83          49   

   wind_speed_mps  solar_radiation_Wm2  day_of_week  hour_of_day  is_weekend  \
0            5.66               411.95            2            0           0   
1            2.60               552.00            2            1           0   
2            3.97                88.00            2            2           0   
3            2.85               207.38            2            3           0   
4            3.27               600.08            2            4       

In [1]:
%history -f energy.py
