# Compare the forecasts with real records

In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
load_dotenv('.env')

# Get the values of host, user, pswd, db, and schema from the environment variables
host = os.getenv('host')
user = os.getenv('user')
pswd = os.getenv('pswd')
db = os.getenv('db')
schema = os.getenv('schema')


# Use the values as needed
engine = create_engine(
    f"postgresql://{user}:{pswd}@{host}/{db}?options=-csearch_path%3D{schema}", echo=False)
conn = engine.connect()

In [2]:
import datetime as dt
import pytz

now = dt.datetime.now(pytz.timezone('Asia/Singapore'))
date = now.strftime("%Y-%m-%d")
time = now.strftime("%H:%M")

period = int(now.strftime("%H")) * 2 + int(now.strftime("%M")) // 30 + 1

# date = '2024-03-25' # A hard-coded value for testing
# period = 33 # A hard-coded value for testing

# print(f"Now is {date} {time} Period {period}")

In [3]:
predicted = pd.read_sql(f"""
SELECT "Date", "Period", "Predicted_Demand"
FROM public."Predicted_Demand"
""", conn)
predicted.sort_values(by=['Date', 'Period'], inplace=True)
predicted.tail(5)

Unnamed: 0,Date,Period,Predicted_Demand
107,2024-04-03,25,7194.61914
108,2024-04-03,26,7154.188496
109,2024-04-03,27,7319.652043
110,2024-04-03,28,7480.862056
111,2024-04-03,29,7452.093301


In [4]:
earliest_date = predicted.iloc[0, 0].strftime("%Y-%m-%d")
rt_dpr = pd.read_sql(f"""
SELECT "Date", "Period", "Demand", "TCL", "Transmission_Loss"
FROM public."Real_Time_DPR"
WHERE ("Date" >= '{earliest_date}')
ORDER BY "Date", "Period"  
""", conn)
rt_dpr.sort_values(by=['Date', 'Period'], inplace=True)
rt_dpr.reset_index(drop=True, inplace=True)
rt_dpr.fillna(0, inplace=True)
# rt_dpr.head(5)

In [5]:
def return_real(row):
    date = row['Date']
    period = row['Period']

    real = rt_dpr[(rt_dpr['Date'] == date) & (rt_dpr['Period'] == period)]
    real["total"] = real['Demand'] + real['TCL'] + real['Transmission_Loss']
    real = real["total"].values[0]
    # print(real)

    return real

predicted["Real"] = predicted.apply(lambda row: return_real(row), axis=1)
predicted["Error (R-P)"] = predicted["Real"] - predicted["Predicted_Demand"]

In [6]:
from sklearn.metrics import mean_squared_error
import numpy as np

score = np.sqrt(mean_squared_error(predicted["Real"], predicted["Predicted_Demand"]))

In [7]:
print(f"Now is {date} {time} Period {period}")
print(f"RMSE: {score}")
predicted

Now is 2024-04-03 14:07 Period 29
RMSE: 65.44427816448494


Unnamed: 0,Date,Period,Predicted_Demand,Real,Error (R-P)
0,2024-03-27,31,7154.288840,7127.147,-27.141840
1,2024-03-27,32,7175.052103,7150.128,-24.924103
2,2024-03-27,33,7203.578004,7164.434,-39.144004
3,2024-03-27,34,7205.663140,7291.121,85.457860
4,2024-03-27,35,7337.046935,7273.332,-63.714935
...,...,...,...,...,...
107,2024-04-03,25,7194.619140,7104.224,-90.395140
108,2024-04-03,26,7154.188496,7220.290,66.101504
109,2024-04-03,27,7319.652043,7407.115,87.462957
110,2024-04-03,28,7480.862056,7454.984,-25.878056


In [8]:
conn.close()