# Forecast Net Demand

In [6]:
# Set to true when running by CronTab
crontab = True

In [20]:
ROOT = '/home/sdc/DR_DemandForecast/DemandForecast'
if not crontab: ROOT = '.'

In [21]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
env_file = f'{ROOT}/src/.env' if crontab else '.env'
load_dotenv(env_file, override=True)

# Get the values of host, user, pswd, db, and schema from the environment variables
host = os.getenv('host')
user = os.getenv('user')
pswd = os.getenv('pswd')
db = os.getenv('db')
schema = 'public'


#  Use the values as needed
engine = create_engine(
    f"postgresql://{user}:{pswd}@{host}/{db}?options=-csearch_path%3D{schema}", echo=False)
conn = engine.connect()

## Import data from CSV to PostgreSQL

This step is used for testing purposes.

Set `IMPORT_DATA` to `False` to skip this step.

In [14]:
IMPORT_DATA = True

In [27]:
import pandas as pd
import datetime as dt

if IMPORT_DATA:
    
    # Load and filer data from csv file
    
    rt_dpr = pd.read_csv(f'{ROOT}/data/RT_DPR.csv')
    rt_dpr = rt_dpr[['Date', 'Period', 'Demand', 'TCL', 'Transmission_Loss']]
    rt_dpr['Transmission_Loss'] = rt_dpr['Transmission_Loss'].fillna(0)
    rt_dpr = rt_dpr[rt_dpr['Date'] > '2023-06-30']
    rt_dpr = rt_dpr.sort_values(by=['Date', 'Period'])
    rt_dpr.reset_index(drop=True, inplace=True)
    
    vc_per = pd.read_csv(f'{ROOT}/data/VCData_Period.csv')
    
    # !!! The Real_Time_DPR table here is different from the one Matthew uses. Don't replace.
    # rt_dpr.to_sql('Real_Time_DPR', conn, if_exists='replace', index=False)
    vc_per.to_sql('VCData_Period', conn, if_exists='replace', index=False)

## Data from DB

In [28]:
import datetime as dt
import pytz

now = dt.datetime.now(pytz.timezone('Asia/Singapore'))
date = now.strftime("%Y-%m-%d")
time = now.strftime("%H:%M")

period = int(now.strftime("%H")) * 2 + int(now.strftime("%M")) // 30 + 1


if period + 1 > 48:
    next_period = 1
    next_date = now + dt.timedelta(days=1)
    next_date = next_date.strftime("%Y-%m-%d")
else:
    next_period = period + 1
    next_date = date

# next_date = '2024-03-27' # A hard-coded value for testing
# next_period = 22 # A hard-coded value for testing
print(f"# @ {date} {time} Period {period} -> Predict: {next_date} Period {next_period}")

# @ 2024-04-22 14:41 Period 30 -> Predict: 2024-04-22 Period 31


In [29]:
rt_dpr = pd.read_sql(f"""
SELECT "Date", "Period", "Demand", "TCL", "TransmissionLoss", "Solar"
FROM public."RealTimeDPR"
WHERE ("Date" < '{date}' OR ("Date" = '{next_date}' AND "Period" < {next_period}))
ORDER BY "Date" DESC, "Period" DESC  
LIMIT 336
""", conn)
rt_dpr.sort_values(by=['Date', 'Period'], inplace=True)
rt_dpr.reset_index(drop=True, inplace=True)
rt_dpr.fillna(0, inplace=True)

rt_dpr.iloc[[0, -1]]

Unnamed: 0,Date,Period,Demand,TCL,TransmissionLoss,Solar
0,2024-04-16,23,7281.244,0.0,38.081,134.8
267,2024-04-22,29,6994.669,0.0,37.293,528.68


In [30]:
vc_per = pd.read_sql('SELECT * FROM public."VCDataPeriod"', conn)
vc_per.iloc[[0, -1]]

Unnamed: 0,Year,Quarter,Period,TCQ_Weekday,TCQ_Weekend_PH
0,2023,3,1,457184.607704,486146.668221
191,2024,2,48,437836.736105,448734.605604


## Construct input data

In [57]:
import holidays

# Calculate required data fields

sg_holidays = holidays.country_holidays('SG')

rt_dpr['Total Demand'] = rt_dpr['Demand'] + rt_dpr['TCL'] + rt_dpr['TransmissionLoss'] + rt_dpr['TransmissionLoss']+ rt_dpr['Solar']
view = rt_dpr[['Date', 'Period', 'Total Demand']].copy()

def find_tcq(row):
    # print(row)
    date_obj = dt.datetime.strptime(str(row['Date']), '%Y-%m-%d')
    year = date_obj.year
    quarter = (date_obj.month - 1) // 3 + 1
    
    period = row['Period']
    
    isWeekend = 1 if date_obj.isoweekday() > 5 else 0
    isPublicHoliday = date_obj in sg_holidays
    
    if isWeekend or isPublicHoliday:
        # print(f"Date: {date_obj} isWeekend: {isWeekend} isPublicHoliday: {isPublicHoliday}")
        tcq = vc_per[(vc_per['Year'] == year) & (vc_per['Quarter'] == quarter) & (vc_per['Period'] == period)]['TCQ_Weekday'].values[0] / 1000
    else:
        tcq = vc_per[(vc_per['Year'] == year) & (vc_per['Quarter'] == quarter) & (vc_per['Period'] == period)]['TCQ_Weekend_PH'].values[0] / 1000

    # print(f"Date: {date_obj} TCQ: {tcq}")
    return tcq

view['TCQ'] = view.apply(lambda row: find_tcq(row), axis=1)
view['Net Demand'] = view['Total Demand'] - view['TCQ']
view.reset_index(drop=True, inplace=True)


In [58]:
view

Unnamed: 0,Date,Period,Total Demand,TCQ,Net Demand
0,2024-04-16,23,7492.206,467.211594,7024.994406
1,2024-04-16,24,7436.166,466.819762,6969.346238
2,2024-04-16,25,7379.022,464.571777,6914.450223
3,2024-04-16,26,7408.044,462.978535,6945.065465
4,2024-04-16,27,7473.557,465.246770,7008.310230
...,...,...,...,...,...
263,2024-04-22,25,7459.587,464.571777,6995.015223
264,2024-04-22,26,7463.955,462.978535,7000.976465
265,2024-04-22,27,7511.722,465.246770,7046.475230
266,2024-04-22,28,7555.949,468.581782,7087.367218


### Debug: Copying data into shape of 336

In [60]:
# import numpy as np
# repeat_count = 336 // 268
# remainder = 336 % 268
# repeating_index = np.concatenate([np.repeat(np.arange(268), repeat_count), np.arange(remainder)])
# repeating_index
# view = view.iloc[repeating_index].reset_index(drop=True)
# view


Unnamed: 0,Date,Period,Total Demand,TCQ,Net Demand
0,2024-04-16,23,7492.206,467.211594,7024.994406
1,2024-04-16,24,7436.166,466.819762,6969.346238
2,2024-04-16,25,7379.022,464.571777,6914.450223
3,2024-04-16,26,7408.044,462.978535,6945.065465
4,2024-04-16,27,7473.557,465.246770,7008.310230
...,...,...,...,...,...
331,2024-04-17,42,7229.832,507.427836,6722.404164
332,2024-04-17,43,7183.794,513.005546,6670.788454
333,2024-04-17,44,7090.524,511.745260,6578.778740
334,2024-04-17,45,6908.326,503.120418,6405.205582


## Load scaler

In [44]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib
import os
import glob

# Load the most recent scaler file
resDir = f'{ROOT}/model'
newestDir = max(glob.glob(os.path.join(resDir, '*/')), key=os.path.getmtime)
# newestDir = './model/20240325_1527/'
if not crontab: print(newestDir)

In [45]:
scaler_files = glob.glob(os.path.join(newestDir, "*.pkl"))
if not crontab: print("Scaler files:", scaler_files)
scaler = joblib.load(scaler_files[0])
if not crontab: print("Loaded scaler:", scaler_files[0])

# Transform data using the loaded scaler
data = view.copy()
data['Target'] = data['Net Demand']
data['Target'] = scaler.fit_transform(data['Target'].values.reshape(-1,1))

def create_dataset(dataset):
    return np.array([dataset])

predict_X = create_dataset(data['Target'].values)

# Reshape input to be [samples, time steps, features]
predict_X = np.reshape(predict_X, (predict_X.shape[0], predict_X.shape[1], 1))

if not crontab: print(f"Predict_X shape: {predict_X.shape}")

## Make prediction

In [46]:
import tensorflow as tf
tf.keras.utils.disable_interactive_logging()

if not crontab: print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [47]:
import os
import glob
from keras.models import load_model

# Get a list of all model files in the directory
model_files = glob.glob(os.path.join(newestDir, "*.keras"))

# Sort the list of model files by modification time (most recent first)
model_files.sort(key=os.path.getmtime, reverse=True)

# Select the most recent model file
most_recent_model_file = model_files[0]

# Load the selected model
model = load_model(most_recent_model_file, )

# Print the path of the loaded model for verification
if not crontab: print("Loaded model:", most_recent_model_file)


# Make predictions
predict_result = model.predict(predict_X)

# Invert predictions to original scale
inverted_predictions = scaler.inverse_transform(predict_result)


In [48]:
# Print or use the predictions as needed
if not crontab: print(f"Predictions: {inverted_predictions[0][0]}")

## Add VCData back

In [49]:
def total_demand(row):
    # print(row)
    date_obj = dt.datetime.strptime(str(row['Date']), '%Y-%m-%d')
    year = date_obj.year
    quarter = (date_obj.month - 1) // 3 + 1
    
    period = row['Period']
    
    isWeekend = 1 if date_obj.isoweekday() > 5 else 0
    isPublicHoliday = date_obj in sg_holidays
    
    if isWeekend or isPublicHoliday:
        # print(f"Date: {date_obj} isWeekend: {isWeekend} isPublicHoliday: {isPublicHoliday}")
        tcq = vc_per[(vc_per['Year'] == year) & (vc_per['Quarter'] == quarter) & (vc_per['Period'] == period)]['TCQ_Weekday'].values[0] / 1000
    else:
        tcq = vc_per[(vc_per['Year'] == year) & (vc_per['Quarter'] == quarter) & (vc_per['Period'] == period)]['TCQ_Weekend_PH'].values[0] / 1000

    demand = tcq + row["Predicted_Demand"]
    return demand

In [50]:
data = {
    "Date": [next_date],
    "Period": [next_period],
    "Predicted_Demand": [inverted_predictions[0][0]]
}

data = pd.DataFrame(data)

In [51]:
data["Predicted_Demand"] = data.apply(lambda row: total_demand(row), axis=1)
predicted_demand = data["Predicted_Demand"][0]
print(f"# Predicted Demand: {predicted_demand}")

# Predicted Demand: 6645.486150518011


## Save prediction to DB

In [None]:
from sqlalchemy import text

# Check if the table 'Predicted_Demand' exists
table_exists = engine.dialect.has_table(conn, 'DemandForecast')
if not crontab: print(f"Table 'DemandForecast' exists: {table_exists}")

if not table_exists:
    # Create the table 'DemandForecast'
    create_table_query = """
    CREATE TABLE public."DemandForecast" (
        "Date" DATE,
        "Period" INTEGER,
        "Predicted_Demand" FLOAT,
        PRIMARY KEY ("Date", "Period")
    )
    """
    conn.execute(text(create_table_query))

# Check if a row with the same Date and Period exists
row_exists_query = f"""
SELECT EXISTS (
    SELECT 1
    FROM public."DemandForecast"
    WHERE "Date" = '{next_date}' AND "Period" = '{next_period}'
)
"""
row_exists = conn.execute(text(row_exists_query)).scalar()
if not crontab: print(f"Row exists: {row_exists}")

if row_exists:
    # Update the existing row with the predicted net demand
    update_query = f"""
    UPDATE public."DemandForecast"
    SET "Predicted_Demand" = {predicted_demand}
    WHERE "Date" = '{next_date}' AND "Period" = {next_period}
    """
    conn.execute(text(update_query))
else:
    # Insert a new row with the predicted net demand
    insert_query = f"""
    INSERT INTO public."DemandForecast" ("Date", "Period", "Predicted_Demand")
    VALUES ('{next_date}', {next_period}, {predicted_demand})
    """
    conn.execute(text(insert_query))

In [None]:
conn.commit()
conn.close()