In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import warnings
warnings.filterwarnings('ignore')

#load data
df = pd.read_csv('energydata_complete.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

print("Data shape:", df.shape)
print("Date range:", df['date'].min(), "to", df['date'].max())
print("Columns:", df.columns.tolist())


#Appliances energy consumption for whole period, and one week

fig, axes = plt.subplots(2, 1, figsize=(14, 8))

#Entire period
axes[0].plot(df['date'], df['Appliances'], color='steelblue', linewidth=0.5, alpha=0.8)
axes[0].set_title('Appliances Energy Consumption – Entire Period', fontsize=13)
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Energy (Wh)')
axes[0].grid(True, alpha=0.3)

#First week
one_week = df[(df['date'] >= df['date'].min()) & (df['date'] < df['date'].min() + pd.Timedelta(days=7))]
axes[1].plot(one_week['date'], one_week['Appliances'], color='darkorange', linewidth=1.2)
axes[1].set_title('Appliances Energy Consumption – First Week', fontsize=13)
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Energy (Wh)')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('task1_consumption_plots.png', dpi=150)
plt.close()
print("Task 1 saved.")


#Heatmap


df_h = df.set_index("date")  # <-- make a datetime index for resampling

hourly = df_h["Appliances"].resample("h").mean()

first_midnight = hourly.index[0].normalize()
if hourly.index[0] != first_midnight:
    first_midnight = first_midnight + pd.Timedelta(days=1)

week_start = first_midnight
week_end = week_start + pd.Timedelta(days=7)

week = hourly.loc[week_start:week_end - pd.Timedelta(hours=1)]

heat_df = pd.DataFrame({
    "dow": week.index.dayofweek,
    "hour": week.index.hour,
    "val": week.values
})
heat_df["day"] = heat_df["dow"].map({0:"Mon",1:"Tue",2:"Wed",3:"Thu",4:"Fri",5:"Sat",6:"Sun"})

pivot = (heat_df.pivot_table(index="hour", columns="day", values="val", aggfunc="mean")
         .reindex(columns=["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]))

plt.figure(figsize=(4.2, 6.2))

im = plt.imshow(
    pivot.values,
    aspect="auto",
    origin="lower",
    cmap="YlOrRd",          # <-- yellow → orange → red
    interpolation="nearest" # cleaner “block” look
)

plt.title("Hourly Appliances Consumption for a\nDay of Week", fontsize=10)
plt.xlabel("Day of Week", fontsize=9)
plt.ylabel("Hour of day", fontsize=9)

plt.xticks(range(7), pivot.columns, fontsize=8)
plt.yticks(range(0, 24, 1), range(0, 24, 1), fontsize=7)

cbar = plt.colorbar(im)
cbar.set_label("Appliances", fontsize=9)

plt.tight_layout()
plt.savefig("task2_heatmap.png", dpi=200)
plt.close()

#Histogram for energy consumption

fig, ax = plt.subplots(figsize=(9, 5))
ax.hist(df['Appliances'], bins=50, color='teal', edgecolor='white', alpha=0.85)
ax.set_title('Histogram of Appliances Energy Consumption', fontsize=13)
ax.set_xlabel('Energy (Wh)')
ax.set_ylabel('Frequency')
ax.grid(True, axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('task3_histogram.png', dpi=150)
plt.close()
print("Task 3 saved.")


#NSM plot

df['NSM'] = df['date'].dt.hour * 3600 + df['date'].dt.minute * 60 + df['date'].dt.second

fig, ax = plt.subplots(figsize=(10, 5))
ax.scatter(df['NSM'], df['Appliances'], alpha=0.05, s=5, color='purple')
ax.set_title('Appliances Energy Consumption vs NSM (Seconds from Midnight)', fontsize=12)
ax.set_xlabel('NSM (seconds from midnight)')
ax.set_ylabel('Appliances Energy (Wh)')
ax.set_xticks([0, 21600, 43200, 64800, 86400])
ax.set_xticklabels(['0:00', '6:00', '12:00', '18:00', '24:00'])
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('task4_nsm_vs_energy.png', dpi=150)
plt.close()
print("Task 4 saved.")


#Press_mm_hg plot

fig, ax = plt.subplots(figsize=(9, 5))
ax.scatter(df['Press_mm_hg'], df['Appliances'], alpha=0.05, s=5, color='crimson')
ax.set_title('Appliances Energy Consumption vs Press_mm_Hg', fontsize=12)
ax.set_xlabel('Atmospheric Pressure (mm Hg)')
ax.set_ylabel('Appliances Energy (Wh)')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('task5_pressure_vs_energy.png', dpi=150)
plt.close()
print("Task 5 saved.")

print("\nAll tasks complete!")

Data shape: (19735, 29)
Date range: 2016-01-11 17:00:00 to 2016-05-27 18:00:00
Columns: ['date', 'Appliances', 'lights', 'T1', 'RH_1', 'T2', 'RH_2', 'T3', 'RH_3', 'T4', 'RH_4', 'T5', 'RH_5', 'T6', 'RH_6', 'T7', 'RH_7', 'T8', 'RH_8', 'T9', 'RH_9', 'T_out', 'Press_mm_hg', 'RH_out', 'Windspeed', 'Visibility', 'Tdewpoint', 'rv1', 'rv2']
Task 1 saved.
Task 3 saved.
Task 4 saved.
Task 5 saved.

All tasks complete!
