In [None]:
import pandas as pd
from fbprophet import Prophet
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np

In [None]:
# Reading hourly electricity demand data from the source file (.csv)
electricity_demand = pd.read_csv(r"rawData\electricity_demand\hourly_electricity_demand.csv")

In [None]:
df = pd.DataFrame()
# Create a 'x' and 'y' column for convenience
df['Load'] = electricity_demand['NL_load']     # create a new y-col (optional)
df['Hours'] = np.arange(len(df))

lin_reg = LinearRegression()
lin_reg.fit(X=df['Hours'].values[:, np.newaxis], y=df['Load'].values[:, np.newaxis])

# Make predictions w.r.t. 'x' and store it in a column called 'y_pred'
df['Load_pred'] = lin_reg.predict(df['Hours'].values[:, np.newaxis])

# Plot 'y' and 'y_pred' vs 'x'
df[['Load', 'Load_pred', 'Hours']].plot(x='Hours', figsize=(16,6))  # Remember 'y' is 'Adj Close'

In [None]:
f, ax = plt.subplots(figsize=(16, 8))
ydata = electricity_demand["NL_load"]
ydata2 = forecast_lean["yhat"]
linedata = df["Load_pred"]
plt.xlabel("Hours", {'fontsize': 14})
plt.ylabel("Electricity demand (MWh)", {'fontsize': 14})
plt.title("Collected electricity demand data (2006-2017)", {'fontsize': 18})
ax.plot(ydata)
ax.plot(linedata)
ax.set_ylim(ymin=0)
# plt.savefig('Collected electricity demand data (2006-2017).png')
plt.show(f)

In [None]:
# Reading hourly electricity demand data from the source file (.csv)
electricity_demand = pd.read_csv(r"rawData\electricity_demand\hourly_electricity_demand.csv")

# Formatting the dataframe with respect to the requirements of "fbprophet"
firstRowDate = "1/1/2006"
electricity_demand.insert(0, "ds", pd.date_range(start=firstRowDate, periods=len(electricity_demand), freq='H'))
electricity_demand = electricity_demand.rename({'NL_load': 'y'}, axis=1)

# Running "Prophet" to predict hourly electricity demand between "01.01.2020 00:00-01:00" and "31.12.2049 23:00-00:00". 
m = Prophet()
m.fit(electricity_demand)

# 8760*32 --> from 2018 to 2050, "H" --> hourly
future = m.make_future_dataframe(periods=8760*32, freq='H' )

# Creating a new dataframe with forecasted data. 
forecast = m.predict(future)
forecast.to_json(r"processedData\processed_electricity_demand\forecast_electricity.json", orient="split")

# Checkpoint 1

In [None]:
forecast_lean = pd.read_json(r"processedData\processed_electricity_demand\forecast_electricity.json", orient="split")

In [None]:
forecast_lean = forecast_lean[['yhat']].iloc[122640:122640+87600,:]
f, ax = plt.subplots(figsize=(20, 10))
ydata = forecast_lean["yhat"]
plt.xlabel("Hours", {'fontsize': 14})
plt.ylabel("Electricity demand (MWh)", {'fontsize': 14})
plt.title("Collected electricity demand data (2020-2030)", {'fontsize': 18})
ax.plot(ydata)
ax.set_ylim(ymin=0)
# plt.savefig('Collected electricity demand data (2020-2030).png')
plt.show(f)

In [None]:
# Selecting predicted values (only for 2020to2050)
forecast_lean = forecast_lean[['yhat']].iloc[122640:,:]
ax = forecast_lean.plot(figsize=[16,6])
ax.set_xlabel("Hours")
ax.set_ylabel("Electricity demand, MWh")

In [None]:
forecast_lean = forecast_lean.reset_index(drop=True)
forecast_lean.to_json(r"processedData\processed_electricity_demand\forecast_electricity_lean.json", orient="split")

# Checkpoint 2

In [None]:
forecast_lean = pd.read_json(r"processedData\processed_electricity_demand\forecast_electricity_lean.json", orient="split")

for d in tqdm(range(3650*3)):
    for h in range(24):
        forecast_lean.loc[(d*24)+h, "dayIndex"] = d

representativeDays = [149, 336, 1255, 1381, 1414, 1486, 2239, 3284, 3462]
listHours = []

for y in range(3):
    for d in representativeDays:
        for h in range(24):
            listHours.append(24*(d+y*3650) + h)

forecast_lean = forecast_lean[forecast_lean.index.isin(listHours)]
forecast_lean = forecast_lean.reset_index(drop=True)

In [None]:
three_hourly_electricity_demand = pd.DataFrame(columns=["Value"])

emptyListValue = []
emptyListDay = []

for i in tqdm(range(int(len(forecast_lean)/3))):
    day = forecast_lean.loc[1+(3*i),"dayIndex"]
    value = forecast_lean.loc[1+(3*i),"yhat"] * 3
    value =  value * 1000 # converting MWh to KWh
    emptyListValue.append(value)
    emptyListDay.append(day)
    
three_hourly_electricity_demand["Value"] = emptyListValue
three_hourly_electricity_demand["dayIndex"] = emptyListDay

daily_elec_demand = pd.DataFrame(columns=["Value", "dayIndex"])
for i in tqdm(range(int(len(three_hourly_electricity_demand)/8))):
    daily_elec_demand.loc[i, "dayIndex"] = three_hourly_electricity_demand.loc[(0 + 8*i), "dayIndex"]
    daily_elec_demand.loc[i, "Value"] = list(three_hourly_electricity_demand.loc[(0 + 8*i):(7 + 8*i), "Value"])

In [None]:
daily_elec_demand.to_json(r"processedData\processed_electricity_demand\daily_3hourly_elecDemand.json", orient='split')