In [1]:
import os
import pandas as pd
import pytz
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

In [3]:
data_root = "path/to/surf_dataset"

loads = [
    ('load1', 'node_load1', 'node_load1_diurnal.parquet'),
    ('load5', 'node_load5', 'node_load5_diurnal.parquet'),
    ('load15', 'node_load15', 'node_load15_diurnal.parquet'),
]

In [5]:
color = ['lightcoral', 'steelblue', 'yellowgreen']
marker = ['o', '^', 's']
hatch = ['', '/', '\\']

fig, ax = plt.subplots(figsize=(11,5))

index = 0
barWidth = 0.25
offset = [-barWidth, 0, barWidth]
df = None

for load, folder_name, processed_data_path in loads:
    
    cach_file = os.path.join("cache", f"loads_diurnal_hourly_cache_{load}.npy")
    
    if not os.path.isfile(cach_file):
        df = pd.read_parquet(data_root + folder_name)

        # Pivot all columns so that it becomes a multi-index of (time, node).
        df = df.stack()
        # Set the names of the multi-index
        df.index.names = ['time', 'node']
        # Change the series name to the load name and then make it a dataframe
        df = df.rename(load).to_frame()

        # Drop all rows that do not feature at least one value >= 0
        df = df.loc[(df >= 0).any(axis=1)]

        df.reset_index(inplace=True)
        df["dt"] = pd.to_datetime(df['time'], utc=True, unit="s")
        # Convert everything into localized Amsterdam time and then drop the timezone info again
        # dropping it is required to save the parquet file.
        df["dt"] = df["dt"].dt.tz_convert(pytz.timezone('Europe/Amsterdam')).dt.tz_localize(None)
        # Get hour of day and day columns to plot
        df["hour_of_day"] = df["dt"].dt.hour

        yerr_vals = df.groupby("hour_of_day")[load].std()
        df = df.groupby("hour_of_day").mean()
        x_vals = np.arange(len(df[load])) + offset[index]
        y_vals = df[load]
        
        with open(cach_file, 'wb') as cache_file:
            np.save(cache_file, x_vals)
            np.save(cache_file, y_vals)
            np.save(cache_file, yerr_vals)
    else:
        with open(cach_file, 'rb') as cache_file:
            x_vals = np.load(cache_file)
            y_vals = np.load(cache_file)
            yerr_vals = np.load(cache_file)
    
    negative_direction_values = np.zeros(len(yerr_vals))  # We create a 2d array to make sure matplotlib does not create downwards errorbars
    ax.bar(x_vals, y_vals, yerr=[negative_direction_values, yerr_vals], edgecolor='black', color=color[index], hatch=hatch[index], label=load, width=barWidth, capsize=3)
    index += 1

ax.set_xlim(left=-1)
ax.set_ylim(bottom=0, top=100)
ax.set_xlabel("Hour of Day", fontsize=20)
ax.set_ylabel("Load", fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=18)
ax.tick_params(axis='both', which='minor', labelsize=16)
ax.legend(ncol=len(color), prop={"size": 14}, bbox_to_anchor=(0.5, 1.15), loc=9)
fig.tight_layout()

date_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")

fig.savefig(f"loads_diurnal_hourly_{date_time}.pdf")


del fig
del ax
if df: del df

In [6]:
color = ['lightcoral', 'steelblue', 'yellowgreen']
marker = ['o', '^', 's']
hatch = ['', '/', '\\']

fig, ax = plt.subplots(figsize=(11,5))

index = 0
offset = [-0.3, 0, 0.3]
df = None

for load, folder_name, processed_data_path in loads:
    
    cach_file = os.path.join("cache", f"loads_daily_cache_{load}.npy")
    
    if not os.path.isfile(cach_file):
        df = pd.read_parquet(data_root + folder_name)

        # Pivot all columns so that it becomes a multi-index of (time, node).
        df = df.stack()
        # Set the names of the multi-index
        df.index.names = ['time', 'node']
        # Change the series name to the load name and then make it a dataframe
        df = df.rename(load).to_frame()

        # Drop all rows that do not feature at least one value >= 0
        df = df[(df >= 0).any(axis=1)]

        df.reset_index(inplace=True)
        df["dt"] = pd.to_datetime(df['time'], utc=True, unit="s")
        # Convert everything into localized Amsterdam time and then drop the timezone info again
        # dropping it is required to save the parquet file.
        df["dt"] = df["dt"].dt.tz_convert(pytz.timezone('Europe/Amsterdam')).dt.tz_localize(None)
        # Get hour of day and day columns to plot
    #     df["hour_of_day"] = df["dt"].dt.hour
        df["day"] = df["dt"].apply(lambda x : x.weekday())

        yerr_vals = df.groupby("day")[load].std()

        df = df.groupby("day").mean()
        x_vals = np.arange(len(df[load])) + offset[index]
        y_vals = df[load]

        with open(cach_file, 'wb') as cache_file:
            np.save(cache_file, x_vals)
            np.save(cache_file, y_vals)
            np.save(cache_file, yerr_vals)
    else:
        with open(cach_file, 'rb') as cache_file:
            x_vals = np.load(cache_file)
            y_vals = np.load(cache_file)
            yerr_vals = np.load(cache_file)
    
    negative_direction_values = np.zeros(len(yerr_vals))  # We create a 2d array to make sure matplotlib does not create downwards errorbars
    ax.bar(x_vals, y_vals, yerr=[negative_direction_values, yerr_vals], edgecolor='black', color=color[index], hatch=hatch[index], label=load, width=barWidth, capsize=3)
    index += 1

ax.set_xlim(left=-1)
ax.set_ylim(bottom=0, top=100)
ax.set_xticks(list(np.arange(7)))
ax.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
ax.set_xlabel("Day of Week", fontsize=20)
ax.set_ylabel("Load", fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=18)
ax.tick_params(axis='both', which='minor', labelsize=16)
ax.legend(ncol=len(color), prop={"size": 14}, bbox_to_anchor=(0.5, 1.15), loc=9)
fig.tight_layout()

date_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
fig.savefig(f"loads_diurnal_daily_{date_time}.pdf")

del fig
del ax
if df: del df