# RTP Forecasting: Transfer Learning

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import time
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
import keras
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.layers import *

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
print(tf.__version__)

2.12.1


In [3]:
cwd = os.getcwd()

In [4]:
def make_dir(path):
    if os.path.exists(path) is False:
        os.makedirs(path)

In [5]:
model_path = os.path.join(cwd,'saved_model')
make_dir(model_path)

In [6]:
#timing callback
class TimeHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)

In [7]:
# Plot history and future
def plot_predictions(pred , actual, title):
    plt.figure(figsize=(20, 4), dpi=150)
    plt.plot(np.arange(len(pred)), np.array(pred),label='cnn',alpha=0.7)
    plt.plot(np.arange(len(pred)), np.array(actual),label='PF', alpha=0.7)
    plt.axhline(y=0, color='black', linestyle='--', lw=1, alpha=0.5)
    plt.legend(loc='upper right')
    plt.xlabel('Time step' ,  fontsize=18)
    plt.ylabel('Price' , fontsize=18)
    plt.title(title, fontsize=16)

In [8]:
# Plot history and future
def plot_predictions_slide(pred_1,pred_2,pred_3, actual, title):
    plt.figure(figsize=(20, 4), dpi=150)
    plt.plot(np.arange(len(pred_1)), np.array(actual),label='PF', alpha=0.7)
    plt.plot(np.arange(len(pred_1)), np.array(pred_1),label='cnn-24',alpha=0.7)
    plt.plot(np.arange(len(pred_1)), np.array(pred_2),label='cnn-48',alpha=0.7)
    plt.plot(np.arange(len(pred_1)), np.array(pred_3),label='cnn-27',alpha=0.7)
    plt.axhline(y=0, color='black', linestyle='--', lw=1, alpha=0.5)
    plt.legend(loc='upper right')
    plt.xlabel('Time step' ,  fontsize=18)
    plt.ylabel('Price' , fontsize=18)
    plt.title(title, fontsize=16)

In [9]:
# zones = ['CAPITL', 'CENTRL', 'DUNWOD', 'GENESE', 'HUD VL', 'LONGIL',
#         'MHK VL', 'MILLWD', 'N.Y.C.', 'NORTH', 'WEST']
zone = 'CAPITL'
year = 2021

### Data Import

In [10]:
# Read each timeseries (RTP = Real-Time Price, DAP = Day-Ahead Price, LF = Load Forecast)
raw_DAP = pd.read_csv("nyiso/da_lmp_zones_df_2015_2021.csv", index_col=0)
raw_RTP = pd.read_csv("nyiso/rt_lmp_zones_df_2015_2021.csv", index_col=0)
raw_LF = pd.read_csv("nyiso/load_frcstd_df_2015_2021.csv", index_col=0)

# Prepare the dataset as a dataframe
raw_data = pd.concat([raw_DAP.loc[:,zone], raw_LF.loc[:,zone], raw_RTP.loc[:,zone]],
                       axis=1).loc['2017-01-01 05:00:00+00:00':]
raw_data.columns = ['DAP', 'LF', 'RTP']
raw_data.index.names = ['date']
raw_data.to_csv('nyiso/NYISO_'+zone+'_raw.csv')

log_data = raw_data.copy(deep=True)
log_data.loc[:,"DAP"] = np.log(raw_data.loc[:,"DAP"] + 1 - min(raw_data.loc[:,"DAP"]))
log_data.loc[:,"RTP"] = np.log(raw_data.loc[:,"RTP"] + 1 - min(raw_data.loc[:,"RTP"]))
log_data.to_csv('nyiso/NYISO_'+zone+'_log.csv')

In [11]:
# price statistics
print('DAP mean: {:.4f}'.format(raw_data.iloc[8760*4+24:,:1].mean()[0]))
print('DAP std: {:.4f}'.format(raw_data.iloc[8760*4+24:,:1].std()[0]))
print('RTP mean: {:.4f}'.format(raw_data.iloc[8760*4+24:,2:].mean()[0]))
print('RTP std: {:.4f}'.format(raw_data.iloc[8760*4+24:,2:].std()[0]))

DAP mean: 44.6038
DAP std: 18.9331
RTP mean: 44.1633
RTP std: 29.5993


# Train model using short training set

In [12]:
# Split dataset: 2020 year for training and 2021 year for testing
x_train_df = log_data.iloc[8760*3:8760*4+24,:]
x_test_df = log_data.iloc[8760*4+24:,:]

y_train_df = log_data.iloc[8760*3:8760*4+24,2:]
y_test_df = log_data.iloc[8760*4+24:,2:]

In [13]:
# Standardization
x_mean, x_std = x_train_df.mean(), x_train_df.std()
y_mean, y_std = y_train_df.mean(), y_train_df.std()

x_train = ((x_train_df - x_mean)/x_std).to_numpy()
x_test = ((x_test_df - x_mean)/x_std).to_numpy()

y_train = ((y_train_df - y_mean)/y_std).to_numpy()
y_test = ((y_test_df - y_mean)/y_std).to_numpy()

In [14]:
# reshape data for CNN model
n_steps_in = 48
n_steps_out = 24

x_train_cnn = np.array([x_train[i:i+n_steps_in] for i in range(0, x_train.shape[0]-n_steps_in-n_steps_out+1)])
y_train_cnn = np.array([y_train[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_train.shape[0]-n_steps_in-n_steps_out+1)])

x_test_cnn = np.array([x_test[i:i+n_steps_in] for i in range(0, x_test.shape[0]-n_steps_in-n_steps_out+1)])
y_test_cnn = np.array([y_test[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_test.shape[0]-n_steps_in-n_steps_out+1)])

print(x_train_cnn.shape,y_train_cnn.shape,x_test_cnn.shape,y_test_cnn.shape)

(8713, 48, 3) (8713, 24, 1) (8689, 48, 3) (8689, 24, 1)


In [15]:
# set hyperparameters
n_filters  = 32  # number of filters
n_neurons  = 64  # number of neurons in the Dense layer
activation     = 'relu' # activation function
kernel_size    = 3
pool_size = 1
learning_rate  = 0.0001
minibatch_size = 32
num_epochs     = 50

In [16]:
# Building the model
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=n_filters,kernel_size=kernel_size, strides=2, padding='same',
                     input_shape=(x_train_cnn.shape[1],x_train_cnn.shape[2]), activation=activation))
cnn_model.add(Conv1D(filters=n_filters,kernel_size=kernel_size, strides=2, padding='same',
                     input_shape=(x_train_cnn.shape[1],x_train_cnn.shape[2]), activation=activation))
cnn_model.add(MaxPooling1D(pool_size=pool_size))
cnn_model.add(Flatten())
cnn_model.add(Dense(n_neurons, activation=activation))
cnn_model.add(Dense(n_steps_out, activation='linear'))
cnn_model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [17]:
# Running training

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

history = cnn_model.fit(x_train_cnn, y_train_cnn, 
                        batch_size = minibatch_size,
                        epochs = num_epochs,
                        validation_split=0.2, verbose=1,
                        callbacks=[early_stop],
                        shuffle=False)

# Saving the model
model_path = os.path.join(cwd,'saved_model')
make_dir(model_path)
cnn_model.save(os.path.join(model_path,'1y_cnn_model_'+zone+'_48h.h5'))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [18]:
################# Evaluation ##################
y_test_pred = cnn_model.predict(x_test_cnn)

# Evaluation metrics
print('Testing result for a model trained from scratch using only 1 year of training')
print('MAE: {:.4f}'.format(np.abs(y_test_pred - y_test_cnn[:,:,0]).mean()))

# Rescaling to get actual price 
y_test_pred_rescale = y_test_pred*y_std.values + y_mean.values
y_test_cnn_rescale = y_test_cnn*y_std.values + y_mean.values
y_test_pred_invlog = np.exp(y_test_pred_rescale) -1 + min(raw_data.loc[:,"RTP"])
y_test_cnn_invlog = np.exp(y_test_cnn_rescale) -1 + min(raw_data.loc[:,"RTP"])

# Evaluation metrics
MAE_woTL = np.abs(y_test_pred_invlog - y_test_cnn_invlog[:,:,0]).mean()
print('Rescaled MAE: {:.4f}'.format(MAE_woTL))
print('')

Testing result for a model trained from scratch using only 1 year of training
MAE: 0.8395
Rescaled MAE: 13.0006



# Transfer Learning

In [19]:
# Import trained model on different zone
cnn_model = load_model(os.path.join(model_path,'cnn_model_N.Y.C._48h.h5'))

In [20]:
# Test on the transfered model without retraining it
y_test_pred = cnn_model.predict(x_test_cnn)

# Evaluation metrics
print('Testing result for a transfered model without retraining')
print('MAE: {:.4f}'.format(np.abs(y_test_pred - y_test_cnn[:,:,0]).mean()))

# Rescaling to get actual price 
y_test_pred_rescale = y_test_pred*y_std.values + y_mean.values
y_test_pred_invlog = np.exp(y_test_pred_rescale) -1 + min(raw_data.loc[:,"RTP"])

# Evaluation metrics
MAE_TL = np.abs(y_test_pred_invlog - y_test_cnn_invlog[:,:,0]).mean()
print('Rescaled MAE: {:.4f}'.format(MAE_TL))
print('')

Testing result for a transfered model without retraining
MAE: 0.8094
Rescaled MAE: 12.5288



## Retrain the transfered model on 1 year of training

In [21]:
# Lets freeze all layer except the output layer
cnn_model.get_layer(index=0).trainable = False
cnn_model.get_layer(index=1).trainable = False
cnn_model.get_layer(index=2).trainable = False
cnn_model.get_layer(index=3).trainable = False
cnn_model.get_layer(index=4).trainable = False
cnn_model.get_layer(index=5).trainable = True

# set hyperparameters
n_filters  = 32  # number of filters
n_neurons  = 64  # number of neurons in the Dense layer
activation     = 'relu' # activation function
kernel_size    = 3
pool_size = 1
learning_rate  = 0.0001
minibatch_size = 32
num_epochs     = 10
    
history = cnn_model.fit(x_train_cnn, y_train_cnn, 
                    batch_size      = minibatch_size,
                    epochs          = num_epochs,
                    validation_split= 0.2, 
                    verbose         = 1,
                    callbacks       = [early_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
# Test on the transfered model after retraining it
y_test_pred = cnn_model.predict(x_test_cnn)

# Evaluation metrics
print('Testing result for a transfered model after retraining')
print('MAE: {:.4f}'.format(np.abs(y_test_pred - y_test_cnn[:,:,0]).mean()))

# Rescaling to get actual price 
y_test_pred_rescale = y_test_pred*y_std.values + y_mean.values
y_test_pred_invlog = np.exp(y_test_pred_rescale) -1 + min(raw_data.loc[:,"RTP"])

# Evaluation metrics
MAE_TL_trained = np.abs(y_test_pred_invlog - y_test_cnn_invlog[:,:,0]).mean()
print('Rescaled MAE: {:.4f}'.format(MAE_TL_trained))
print('')

Testing result for a transfered model after retraining
MAE: 0.7849
Rescaled MAE: 12.2110



## 1 year TL Summary Result

In [23]:
# Evaluation metrics
print('W/O TL MAE        : {:.4f}'.format(MAE_woTL))
print('W TL MAE          : {:.4f}'.format(MAE_TL))
print('W TL retrained MAE: {:.4f}'.format(MAE_TL_trained))
print('')

print('% improvement')
print('W TL MAE          : {:.4f}'.format((MAE_woTL-MAE_TL)/MAE_woTL*100))
print('W TL retrained MAE: {:.4f}'.format((MAE_woTL-MAE_TL_trained)/MAE_woTL*100))
print('')

W/O TL MAE        : 13.0006
W TL MAE          : 12.5288
W TL retrained MAE: 12.2110

% improvement
W TL MAE          : 3.6284
W TL retrained MAE: 6.0735



# Transfer Learning with full training set

In [24]:
# prepare the full training set

# Split dataset: 2017-2020 years for training and 2021 year for testing
x_train_df = log_data.iloc[:8760*4+24,:]
x_test_df = log_data.iloc[8760*4+24:,:]

y_train_df = log_data.iloc[:8760*4+24,2:]
y_test_df = log_data.iloc[8760*4+24:,2:]

# Standardization
x_mean, x_std = x_train_df.mean(), x_train_df.std()
y_mean, y_std = y_train_df.mean(), y_train_df.std()

x_train = ((x_train_df - x_mean)/x_std).to_numpy()
x_test = ((x_test_df - x_mean)/x_std).to_numpy()

y_train = ((y_train_df - y_mean)/y_std).to_numpy()
y_test = ((y_test_df - y_mean)/y_std).to_numpy()

# reshape data for CNN model
n_steps_in = 48
n_steps_out = 24

x_train_cnn = np.array([x_train[i:i+n_steps_in] for i in range(0, x_train.shape[0]-n_steps_in-n_steps_out+1)])
y_train_cnn = np.array([y_train[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_train.shape[0]-n_steps_in-n_steps_out+1)])

x_test_cnn = np.array([x_test[i:i+n_steps_in] for i in range(0, x_test.shape[0]-n_steps_in-n_steps_out+1)])
y_test_cnn = np.array([y_test[i+n_steps_in:i+n_steps_in+n_steps_out] for i in range(0, y_test.shape[0]-n_steps_in-n_steps_out+1)])

print(x_train_cnn.shape,y_train_cnn.shape,x_test_cnn.shape,y_test_cnn.shape)


(34993, 48, 3) (34993, 24, 1) (8689, 48, 3) (8689, 24, 1)


In [25]:
# Import the fully trained model for this zone
cnn_model = load_model(os.path.join(model_path,'cnn_model_'+zone+'_48h.h5'))

In [26]:
################# Evaluation ##################
y_test_pred = cnn_model.predict(x_test_cnn)

# Evaluation metrics
print(zone)
print('Testing result for a model trained from scratch using full training set')
print('MAE: {:.4f}'.format(np.abs(y_test_pred - y_test_cnn[:,:,0]).mean()))

# Rescaling to get actual price 
y_test_pred_rescale = y_test_pred*y_std.values + y_mean.values
y_test_cnn_rescale = y_test_cnn*y_std.values + y_mean.values
y_test_pred_invlog = np.exp(y_test_pred_rescale) -1 + min(raw_data.loc[:,"RTP"])
y_test_cnn_invlog = np.exp(y_test_cnn_rescale) -1 + min(raw_data.loc[:,"RTP"])

# Evaluation metrics
MAE_woTL_full = np.abs(y_test_pred_invlog - y_test_cnn_invlog[:,:,0]).mean()
print('Rescaled MAE: {:.4f}'.format(MAE_woTL_full))
print('')

CAPITL
Testing result for a model trained from scratch using full training set
MAE: 0.4724
Rescaled MAE: 11.9008



In [27]:
# Import trained model on different zone
cnn_model = load_model(os.path.join(model_path,'cnn_model_N.Y.C._48h.h5'))

# Test on the transfered model without retraining it
y_test_pred = cnn_model.predict(x_test_cnn)

# Evaluation metrics
print(zone)
print('Testing result for a transfered model without retraining')
print('MAE: {:.4f}'.format(np.abs(y_test_pred - y_test_cnn[:,:,0]).mean()))

# Rescaling to get actual price 
y_test_pred_rescale = y_test_pred*y_std.values + y_mean.values
y_test_pred_invlog = np.exp(y_test_pred_rescale) -1 + min(raw_data.loc[:,"RTP"])

# Evaluation metrics
MAE_TL_full = np.abs(y_test_pred_invlog - y_test_cnn_invlog[:,:,0]).mean()
print('Rescaled MAE: {:.4f}'.format(MAE_TL_full))
print('')

CAPITL
Testing result for a transfered model without retraining
MAE: 0.4836
Rescaled MAE: 12.1424



## Retrain the transfered model on full training set

In [28]:
# Lets freeze all layer except the output layer
cnn_model.get_layer(index=0).trainable = False
cnn_model.get_layer(index=1).trainable = False
cnn_model.get_layer(index=2).trainable = False
cnn_model.get_layer(index=3).trainable = False
cnn_model.get_layer(index=4).trainable = False
cnn_model.get_layer(index=5).trainable = True

# set hyperparameters
n_filters  = 32  # number of filters
n_neurons  = 64  # number of neurons in the Dense layer
activation     = 'relu' # activation function
kernel_size    = 3
pool_size = 1
learning_rate  = 0.0001
minibatch_size = 32
num_epochs     = 10
    
history = cnn_model.fit(x_train_cnn, y_train_cnn, 
                    batch_size      = minibatch_size,
                    epochs          = num_epochs,
                    validation_split= 0.2, 
                    verbose         = 1,
                    callbacks       = [early_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
# Test on the transfered model after retraining it
y_test_pred = cnn_model.predict(x_test_cnn)

# Evaluation metrics
print(zone)
print('Testing result for a transfered model after retraining')
print('MAE: {:.4f}'.format(np.abs(y_test_pred - y_test_cnn[:,:,0]).mean()))

# Rescaling to get actual price 
y_test_pred_rescale = y_test_pred*y_std.values + y_mean.values
y_test_pred_invlog = np.exp(y_test_pred_rescale) -1 + min(raw_data.loc[:,"RTP"])

# Evaluation metrics
MAE_TL_trained_full = np.abs(y_test_pred_invlog - y_test_cnn_invlog[:,:,0]).mean()
print('Rescaled MAE: {:.4f}'.format(MAE_TL_trained_full))
print('')

CAPITL
Testing result for a transfered model after retraining
MAE: 0.5021
Rescaled MAE: 12.6963



## 1 year TL Summary Result

In [30]:
# Evaluation metrics
print('Full training set (4 years)')
print('W/O TL MAE        : {:.4f}'.format(MAE_woTL_full))
print('W TL MAE          : {:.4f}'.format(MAE_TL_full))
print('W TL retrained MAE: {:.4f}'.format(MAE_TL_trained_full))
print('')

print('% improvement')
print('W TL MAE          : {:.4f}'.format((MAE_woTL_full-MAE_TL_full)/MAE_woTL_full*100))
print('W TL retrained MAE: {:.4f}'.format((MAE_woTL_full-MAE_TL_trained_full)/MAE_woTL_full*100))
print('')

Full training set (4 years)
W/O TL MAE        : 11.9008
W TL MAE          : 12.1424
W TL retrained MAE: 12.6963

% improvement
W TL MAE          : -2.0301
W TL retrained MAE: -6.6842



# Summary Result

In [31]:
print('1 Year of Training')
print('% improvement')
print('W TL MAE          : {:.4f}'.format((MAE_woTL-MAE_TL)/MAE_woTL*100))
print('W TL retrained MAE: {:.4f}'.format((MAE_woTL-MAE_TL_trained)/MAE_woTL*100))
print('')

print('4 Years of Training')
print('% improvement')
print('W TL MAE          : {:.4f}'.format((MAE_woTL_full-MAE_TL_full)/MAE_woTL_full*100))
print('W TL retrained MAE: {:.4f}'.format((MAE_woTL_full-MAE_TL_trained_full)/MAE_woTL_full*100))
print('')

1 Year of Training
% improvement
W TL MAE          : 3.6284
W TL retrained MAE: 6.0735

4 Years of Training
% improvement
W TL MAE          : -2.0301
W TL retrained MAE: -6.6842



Takeaway: if we have limited training data, transfer learning can be helpful, it improve predictions with much less number of epochs