# Bond Yield Prediction using LSTM Encoder-Decoder Architecture

This notebook implements a deep learning approach for predicting US bond yields using an LSTM encoder-decoder architecture with professor forcing.

In [None]:
# Import libraries
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm, trange
from dotenv import load_dotenv

# Machine Learning libraries
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Deep Learning libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import pytorch_lightning as pl

# Visualization libraries
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load environment variables
load_dotenv()

In [None]:
## Configuration and Constants

# Set random seeds for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

# Model hyperparameters
SEQUENCE_LENGTH = 22
INPUT_SIZE = 3
HIDDEN_SIZE = 50
NUM_LAYERS = 2
BATCH_SIZE = 50
LEARNING_RATE = 0.006
N_EPOCHS = 500
TARGET_LENGTH = 22

# Data split ratios
TRAIN_RATIO = 0.8
VAL_RATIO = 0.75  # 75% of training data for validation split

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
## Data Loading and Initial Processing

def load_data():
    """Load and perform initial processing of bond and economic data."""
    
    # Load main bond data
    bond_data_path = os.getenv('BOND_DATA_PATH', '/content/sample_data/us-government-bond.csv')
    df = pd.read_csv(bond_data_path)
    df.dropna(inplace=True)
    
    # Load CPI data
    cpi_data_path = os.getenv('CPI_DATA_PATH', '/content/sample_data/CORESTICKM159SFRBATL.csv')
    cpi = pd.read_csv(cpi_data_path)
    cpi["DATE"] = pd.to_datetime(cpi["DATE"])
    cpi = cpi.rename(columns={"DATE": "date"})
    
    # Load ISM data  
    ism_data_path = os.getenv('ISM_DATA_PATH', '/content/sample_data/AMTMNO.csv')
    ism = pd.read_csv(ism_data_path)
    ism["DATE"] = pd.to_datetime(ism["DATE"])
    ism = ism.rename(columns={"DATE": "date"})
    
    return df, cpi, ism

# Load the data
df, cpi, ism = load_data()
print(f"Loaded bond data shape: {df.shape}")
print(f"Loaded CPI data shape: {cpi.shape}")
print(f"Loaded ISM data shape: {ism.shape}")

In [None]:
## Data Preprocessing and Feature Engineering

def preprocess_data(df, cpi, ism):
    """Complete data preprocessing pipeline."""
    
    # Process main dataframe
    df['date'] = pd.to_datetime(df['date'], format="%d/%m/%Y")
    df['DivYield'] = df['DivYield'].replace('%', '', regex=True)
    df["DivYield"] = pd.to_numeric(df["DivYield"])
    
    # Merge datasets
    df = pd.merge(df, ism, how="left", on="date")
    df = pd.merge(df, cpi, how="left", on="date")
    
    # Handle missing values
    df.fillna(method="backfill", inplace=True)
    df.dropna(inplace=True)
    
    return df

def create_features(df):
    """Create feature matrix and target variable."""
    
    # Separate features and target
    features_df = df.drop(['date'], axis=1)
    
    # Define columns to scale (all except target)
    columns_to_scale = [col for col in features_df.columns if col != 'us_5_year_yields']
    df_to_scale = features_df[columns_to_scale]
    df_unscaled = features_df[['us_5_year_yields']]
    
    # Apply scaling
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df_to_scale)
    scaled_df = pd.DataFrame(scaled_data, columns=columns_to_scale, index=features_df.index)
    
    # Combine scaled and unscaled data
    features_df = pd.concat([scaled_df, df_unscaled], axis=1)
    
    return features_df, scaler

# Preprocess data
df = preprocess_data(df, cpi, ism)
features_df, scaler = create_features(df)

print(f"Preprocessed data shape: {features_df.shape}")
print(f"Features: {list(features_df.columns)}")
features_df.tail()

I had to change the way the standard scaler was used to make sure only the input features and not the target was being scaled.

In [None]:
features_df.tail(500)

Unnamed: 0,us_10_year_yields,us_1_year_yields,us_6_month_yields,us_3_month_yield,debt_to_gdp,deficit_as_percent_of_gdp,daily_us_real_gdp,M0,DivYield,us_5_year_yields
5004,-1.045150,0.412737,-0.043067,0.013196,1.108133,-0.128078,1.099446,1.597051,0.533233,1.61
5005,-1.030384,0.412737,-0.043067,0.002055,1.108133,-0.128078,1.099446,1.597051,0.533233,1.62
5006,-1.000853,0.407106,-0.043067,0.002055,1.108133,-0.128078,1.099446,1.597051,0.533233,1.67
5007,-1.015619,0.412737,-0.043067,0.002055,1.108133,-0.128078,1.099446,1.597051,0.533233,1.65
5008,-1.030384,0.407106,-0.048557,0.002055,1.108133,-0.128078,1.099446,1.597051,0.533233,1.63
...,...,...,...,...,...,...,...,...,...,...
5499,-1.288781,-0.955649,-0.784240,-0.822339,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.26
5500,-1.281398,-0.966911,-0.789730,-0.822339,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.27
5501,-1.237101,-0.966911,-0.795220,-0.827909,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.29
5502,-1.259250,-0.966911,-0.795220,-0.827909,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.27


In [None]:
#features_df = features_df.drop(columns=["deficit_as_percent_of_gdp", "daily_us_real_gdp","DivYield", "us_10_year_yields","us_1_year_yields"])

In [None]:
# Using 80% of data as training data
train_size = int(len(features_df) * .8)
train_size

4403

In [None]:
# Train test splitting
train_df, test_df = features_df[:train_size], features_df[train_size + 1:]
train_df.shape, test_df.shape

((4403, 10), (1100, 10))

In [None]:
features_df

Unnamed: 0,us_10_year_yields,us_1_year_yields,us_6_month_yields,us_3_month_yield,debt_to_gdp,deficit_as_percent_of_gdp,daily_us_real_gdp,M0,DivYield,us_5_year_yields
0,2.476425,2.417450,2.290257,2.196726,-1.177458,1.705668,-2.459378,-1.316215,-1.718687,6.50
1,2.409980,2.366769,2.257316,2.168875,-1.177458,1.705668,-2.459378,-1.313851,-1.718687,6.40
2,2.505956,2.394925,2.251826,2.174445,-1.177458,1.705668,-2.459378,-1.310779,-1.718687,6.51
3,2.469042,2.383662,2.224375,2.157735,-1.177458,1.705668,-2.459378,-1.304634,-1.718687,6.46
4,2.432129,2.366769,2.207905,2.141024,-1.177458,1.705668,-2.459378,-1.299435,-1.718687,6.42
...,...,...,...,...,...,...,...,...,...,...
5499,-1.288781,-0.955649,-0.784240,-0.822339,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.26
5500,-1.281398,-0.966911,-0.789730,-0.822339,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.27
5501,-1.237101,-0.966911,-0.795220,-0.827909,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.29
5502,-1.259250,-0.966911,-0.795220,-0.827909,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.27


In [None]:
y_train = train_df['us_5_year_yields']
x_train = train_df.drop('us_5_year_yields', axis=1)
x_test = test_df.drop('us_5_year_yields', axis=1)
y_test = test_df['us_5_year_yields']

In [None]:
## Principal Component Analysis and Data Splitting

def perform_pca_and_split(features_df, n_components=3, train_ratio=0.8):
    """Perform PCA analysis and train-test split."""
    
    # Split data
    train_size = int(len(features_df) * train_ratio)
    train_df, test_df = features_df[:train_size], features_df[train_size + 1:]
    
    # Separate features and target
    y_train = train_df['us_5_year_yields']
    x_train = train_df.drop('us_5_year_yields', axis=1)
    x_test = test_df.drop('us_5_year_yields', axis=1)
    y_test = test_df['us_5_year_yields']
    
    # Calculate covariance matrix
    cov_matrix = np.cov(x_train, rowvar=False)
    
    # Apply PCA
    pca = PCA(n_components=n_components)
    x_train_pca = pca.fit_transform(x_train)
    x_test_pca = pca.transform(x_test)  # Use transform, not fit_transform for test
    
    # Convert to DataFrame
    x_train_pca = pd.DataFrame(x_train_pca, columns=[f'Component {i+1}' for i in range(n_components)])
    x_test_pca = pd.DataFrame(x_test_pca, columns=[f'Component {i+1}' for i in range(n_components)])
    
    return x_train_pca, x_test_pca, y_train, y_test, pca, cov_matrix

# Perform PCA and data splitting
x_train, x_test, y_train, y_test, pca, cov_matrix = perform_pca_and_split(features_df)

print(f"Training data shape: {x_train.shape}")
print(f"Test data shape: {x_test.shape}")
print(f"PCA explained variance ratio: {pca.explained_variance_ratio_}")
print(f"Total explained variance: {pca.explained_variance_ratio_.sum():.3f}")

x_train.head()

In [None]:
## Data Visualization Functions

def plot_covariance_matrix(cov_matrix):
    """Plot covariance matrix heatmap."""
    cov_matrix_df = pd.DataFrame(cov_matrix)
    
    fig = go.Figure(data=go.Heatmap(
        z=cov_matrix_df.values,
        x=list(range(cov_matrix_df.shape[1])),
        y=list(range(cov_matrix_df.shape[0])),
        colorscale='Viridis',
        colorbar=dict(title='Covariance'),
    ))
    
    fig.update_layout(
        title='Feature Covariance Matrix Heatmap',
        xaxis_title='Feature Index',
        yaxis_title='Feature Index',
        template='plotly_white'
    )
    
    return fig

def plot_pca_explained_variance(pca):
    """Plot PCA explained variance."""
    explained_variance = pca.explained_variance_ratio_
    cumulative_variance = np.cumsum(explained_variance)
    
    fig = go.Figure()
    
    # Bar plot for explained variance
    fig.add_trace(go.Bar(
        x=[f'PC{i+1}' for i in range(len(explained_variance))],
        y=explained_variance,
        name='Explained Variance',
        yaxis='y'
    ))
    
    # Line plot for cumulative explained variance
    fig.add_trace(go.Scatter(
        x=[f'PC{i+1}' for i in range(len(cumulative_variance))],
        y=cumulative_variance,
        mode='lines+markers',
        name='Cumulative Explained Variance',
        yaxis='y2'
    ))
    
    fig.update_layout(
        title='PCA Explained Variance Analysis',
        xaxis_title='Principal Components',
        yaxis=dict(title='Explained Variance', side='left'),
        yaxis2=dict(title='Cumulative Variance', side='right', overlaying='y'),
        template='plotly_white'
    )
    
    return fig

def plot_feature_loadings(pca, feature_names):
    """Plot feature loadings for first principal component."""
    loadings = pca.components_[0]
    loadings_df = pd.DataFrame(loadings, index=feature_names, columns=['Loading'])
    
    # Generate colors
    colors = [f'rgb({np.random.randint(0, 255)}, {np.random.randint(0, 255)}, {np.random.randint(0, 255)})' 
              for _ in feature_names]
    
    fig = go.Figure(data=go.Bar(
        x=loadings_df.index,
        y=loadings_df['Loading'],
        marker_color=colors,
        name='Feature Loadings'
    ))
    
    fig.update_layout(
        title='Feature Loadings for First Principal Component',
        xaxis_title='Features',
        yaxis_title='Loading Weight',
        template='plotly_white',
        xaxis_tickangle=-45
    )
    
    return fig

# Create visualizations
cov_fig = plot_covariance_matrix(cov_matrix)
pca_fig = plot_pca_explained_variance(pca)

# Get feature names for loadings plot
feature_names = [col for col in features_df.columns if col != 'us_5_year_yields']
loadings_fig = plot_feature_loadings(pca, feature_names)

print("Covariance matrix shape:", cov_matrix.shape)
print("PCA loadings shape:", pca.components_.shape)

In [None]:
import plotly.graph_objects as go

In [None]:
# Create the bar plot for explained variance
bar_plot = go.Bar(
    x=[f'PC{i+1}' for i in range(len(explained_variance))],
    y=explained_variance,
    name='Explained Variance'
)

# Create the line plot for cumulative explained variance
line_plot = go.Scatter(
    x=[f'PC{i+1}' for i in range(len(cumulative_variance))],
    y=cumulative_variance,
    mode='lines+markers',
    name='Cumulative Explained Variance'
)

# Combine both plots
fig = go.Figure(data=[bar_plot, line_plot])

# Update layout
fig.update_layout(
    title='Explained Variance by PCA Components',
    xaxis_title='Principal Components',
    yaxis_title='Variance Explained',
    yaxis=dict(range=[0, 1]),  # Ensuring the y-axis goes from 0 to 1
    template='plotly_white'
)

# Show plot
fig.show()


In [None]:
loadings = pca.components_[0]
loadings

feature_names = features_df.columns[:-1]
loadings = pd.DataFrame(loadings, index=features_df.columns[:-1], columns=['Loading'])

In [None]:
loadings

Unnamed: 0,Loading
us_10_year_yields,0.188217
us_1_year_yields,0.229841
us_6_month_yields,0.267369
us_3_month_yield,0.262349
debt_to_gdp,-0.047165
deficit_as_percent_of_gdp,0.722486
daily_us_real_gdp,0.427048
M0,-0.239519
DivYield,0.086516


In [None]:
import random

In [None]:
def random_color():
    return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})'
colors = [random_color() for _ in feature_names]

In [None]:
fig = go.Figure(data=go.Bar(
    x=loadings.index,
    y=loadings['Loading'],
    marker_color=colors,  # Apply random colors
    name='Feature Loadings'
))

# Update layout
fig.update_layout(
    title='Feature Loadings for the First Principal Component',
    xaxis_title='Features',
    yaxis_title='Loadings',
    template='plotly_white',
    width=800,  # Adjust figure width
    height=400  # Adjust figure height
)

# Show plot
fig.show()

In [None]:
x_train

Unnamed: 0,Component 1,Component 2,Component 3
0,5.568241,-0.261349,0.335937
1,5.499481,-0.293837,0.313103
2,5.542820,-0.262673,0.303289
3,5.506406,-0.280640,0.293283
4,5.472349,-0.298082,0.285427
...,...,...,...
4398,-1.824538,-0.732795,1.242179
4399,-1.810386,-0.726841,1.248571
4400,-1.822329,-0.734389,1.248848
4401,-1.810303,-0.726606,1.248100


In [None]:
mean = sum(y_train)/len(y_train)

eval = y_train.tail(22)

eval

Unnamed: 0,us_5_year_yields
4381,1.95
4382,1.93
4383,1.92
4384,1.88
4385,1.89
4386,1.87
4387,1.86
4388,1.82
4389,1.83
4390,1.82


In [None]:
test_df


Unnamed: 0,us_10_year_yields,us_1_year_yields,us_6_month_yields,us_3_month_yield,debt_to_gdp,deficit_as_percent_of_gdp,daily_us_real_gdp,M0,DivYield,us_5_year_yields
4404,-0.727691,-0.578357,-0.268164,-0.265316,1.006575,0.261684,0.380669,0.897980,0.582187,1.81
4405,-0.757222,-0.572726,-0.273654,-0.270886,1.006575,0.261684,0.380669,0.897980,0.582187,1.78
4406,-0.764605,-0.572726,-0.273654,-0.282026,1.006575,0.261684,0.380669,0.897980,0.582187,1.74
4407,-0.742457,-0.561463,-0.279144,-0.287597,1.006575,0.261684,0.380669,0.897980,0.582187,1.77
4408,-0.705543,-0.561463,-0.262673,-0.276456,1.006575,0.261684,0.380669,0.897980,0.582187,1.83
...,...,...,...,...,...,...,...,...,...,...
5499,-1.288781,-0.955649,-0.784240,-0.822339,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.26
5500,-1.281398,-0.966911,-0.789730,-0.822339,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.27
5501,-1.237101,-0.966911,-0.795220,-0.827909,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.29
5502,-1.259250,-0.966911,-0.795220,-0.827909,2.279304,-2.046589,-0.548482,2.610206,-0.715114,1.27


In [None]:
## Data Preparation for LSTM

def split_dataframe(df, chunk_size):
    """Split dataframe into chunks of specified size, filtering out small chunks."""
    chunks = [df.iloc[i:i + chunk_size] for i in range(0, len(df), chunk_size)]
    filtered_chunks = [chunk for chunk in chunks if len(chunk) >= chunk_size]
    return filtered_chunks

def prepare_sequences(x_train, x_test, y_train, y_test, sequence_length=SEQUENCE_LENGTH):
    """Convert data into sequences for LSTM training."""
    
    # Split into sequences
    x_train_seq = split_dataframe(x_train, sequence_length)
    x_test_seq = split_dataframe(x_test, sequence_length)
    y_train_seq = split_dataframe(y_train, sequence_length)
    y_test_seq = split_dataframe(y_test, sequence_length)
    
    # Convert to numpy arrays
    x_train_np = [np.array(seq) for seq in x_train_seq]
    x_test_np = [np.array(seq) for seq in x_test_seq]
    y_train_np = [np.array(seq) for seq in y_train_seq]
    y_test_np = [np.array(seq) for seq in y_test_seq]
    
    # Convert to PyTorch tensors
    X_train = torch.tensor(x_train_np).type(torch.float32)
    Y_train = torch.tensor(y_train_np).type(torch.float32)
    X_test = torch.tensor(x_test_np).type(torch.float32)
    Y_test = torch.tensor(y_test_np).type(torch.float32)
    
    # Transpose for LSTM input format: (seq_len, batch_size, input_size)
    X_train = X_train.transpose(0, 1)
    Y_train = Y_train.transpose(0, 1).reshape(sequence_length, -1, 1)
    X_test = X_test.transpose(0, 1)
    Y_test = Y_test.transpose(0, 1).reshape(sequence_length, -1, 1)
    
    return X_train, Y_train, X_test, Y_test

# Prepare sequences
X_train, Y_train, X_test, Y_test = prepare_sequences(x_train, x_test, y_train, y_test)

print(f"X_train shape: {X_train.shape}")
print(f"Y_train shape: {Y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"Y_test shape: {Y_test.shape}")

# Move to device
X_train = X_train.to(device)
Y_train = Y_train.to(device)

In [None]:
x_train

[    Component 1  Component 2  Component 3
 0      5.568241    -0.261349     0.335937
 1      5.499481    -0.293837     0.313103
 2      5.542820    -0.262673     0.303289
 3      5.506406    -0.280640     0.293283
 4      5.472349    -0.298082     0.285427
 5      5.505420    -0.281451     0.294055
 6      5.548963    -0.256303     0.299067
 7      5.578030    -0.245101     0.316393
 8      5.523536    -0.273193     0.303535
 9      5.552173    -0.256347     0.305647
 10     5.623931    -0.230807     0.348663
 11     5.609936    -0.236588     0.342220
 12     5.624450    -0.224181     0.334771
 13     5.622241    -0.224433     0.332212
 14     5.599924    -0.246104     0.349638
 15     5.611572    -0.241682     0.355972
 16     5.611383    -0.243362     0.359513
 17     5.618115    -0.243006     0.368231
 18     5.652003    -0.237732     0.404218
 19     5.720939    -0.217747     0.456138
 20     5.701777    -0.230449     0.458170
 21     5.683113    -0.237744     0.448590,
     Compo

In [None]:
class BondsDataset(Dataset):

    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)



In [None]:
class BondsDataModule(pl.LightningDataModule):

    def __init__(self, train_sequences, test_sequences, batch_size=2):
        super().__init__()
        self.train_sequences = train_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size
        self.DataLoader = torch.utils.data.DataLoader

    def setup(self):
        self.train_dataset = BondsDataset(self.train_sequences)
        self.test_dataset = BondsDataset(self.test_sequences)

    def train_dataloader(self):
        return self.DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=1
        )

    def val_dataloader(self):
        return self.DataLoader(
            self.test_dataset,
            batch_size=1,
            shuffle=False,
            num_workers=1
        )

    def test_dataloader(self):
        return self.DataLoader(
            self.test_dataset,
            batch_size=1,
            shuffle=False,
            num_workers=1
        )

In [None]:

import numpy as np
import random
import os, errno
import sys
from tqdm import trange

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

criterion = nn.MSELoss()


In [None]:
## LSTM Model Architecture

class LSTMEncoder(nn.Module):
    """LSTM Encoder for sequence-to-sequence prediction."""
    
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMEncoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers, 
            dropout=0.5
        )

    def forward(self, x_input):
        """Forward pass through encoder."""
        lstm_out, self.hidden = self.lstm(
            x_input.view(x_input.shape[0], x_input.shape[1], self.input_size)
        )
        return lstm_out, self.hidden

    def init_hidden(self, batch_size):
        """Initialize hidden states."""
        return (
            torch.zeros(self.num_layers, batch_size, self.hidden_size),
            torch.zeros(self.num_layers, batch_size, self.hidden_size)
        )


class LSTMDecoder(nn.Module):
    """LSTM Decoder for sequence-to-sequence prediction."""
    
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMDecoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers, 
            dropout=0.5
        )
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x_input, encoder_hidden_states):
        """Forward pass through decoder."""
        lstm_out, self.hidden = self.lstm(x_input, encoder_hidden_states)
        output = self.linear(lstm_out)
        return output, self.hidden


class LSTMDecoder2(nn.Module):
    """Alternative LSTM Decoder with different input size."""
    
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(LSTMDecoder2, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size=1, 
            hidden_size=hidden_size, 
            num_layers=num_layers, 
            batch_first=True
        )
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x_input, encoder_hidden_states):
        """Forward pass through alternative decoder."""
        lstm_out, self.hidden = self.lstm(x_input, encoder_hidden_states)
        output = self.linear(lstm_out)
        return output, self.hidden


class Discriminator(nn.Module):
    """Discriminator for Professor Forcing training."""
    
    def __init__(self, input_size, hidden_size, linear_size, lin_dropout):
        super(Discriminator, self).__init__()
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=2, 
            batch_first=True
        )
        
        self.linears = nn.Sequential(
            nn.Linear(hidden_size * 2, linear_size),
            nn.ReLU(),
            nn.Dropout(lin_dropout),
            nn.ReLU(),
            nn.Dropout(lin_dropout),
            nn.Linear(linear_size, 1),
            nn.Sigmoid()
        )

    def forward(self, hidden_states):
        """Forward pass through discriminator."""
        batch_size = hidden_states.size(0)
        initial_hidden = self.init_hidden(batch_size)
        _, rnn_final_hidden = self.lstm(hidden_states, initial_hidden)
        
        rnn_final_hidden = (
            rnn_final_hidden[0].view(batch_size, -1), 
            rnn_final_hidden[1].view(batch_size, -1)
        )
        
        scores = self.linears(rnn_final_hidden[0])
        return scores

    def init_hidden(self, batch_size):
        """Initialize hidden states for discriminator."""
        hidden_1 = torch.zeros(2, batch_size, self.hidden_size)
        hidden_2 = torch.zeros(2, batch_size, self.hidden_size)
        return (hidden_1, hidden_2)

# Initialize loss functions
criterion = nn.MSELoss()
binary_cross_entropy = nn.BCELoss()

print("Model architecture defined successfully!")

In [None]:
# @title Coding the Professor Forcing Architecture


class Discriminator(nn.Module):
  def __init__(self, input_size, hidden_size, linear_size, lin_dropout):
    super(Discriminator, self).__init__()

    self.hidden_size = hidden_size


    self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=2, batch_first=True)

    self.linears = nn.Sequential(
        nn.Linear(hidden_size * 2, linear_size),
        nn.ReLU(),
        nn.Dropout(lin_dropout),
        nn.ReLU(),
        nn.Dropout(lin_dropout),
        nn.Linear(linear_size, 1),
        nn.Sigmoid()
    )

  def forward(self, hidden_states):
    batch_size = hidden_states.size(0)
    initial_hidden = self.init_hidden(hidden_states.size(0))
    _, rnn_final_hidden = self.lstm(hidden_states, initial_hidden)
    rnn_final_hidden = (rnn_final_hidden[0].view(batch_size,-1), rnn_final_hidden[1].view(batch_size,-1))

    scores = self.linears(rnn_final_hidden[0])
    return scores

  def init_hidden(self, batch_size,):
    hidden_1 = torch.zeros(2, batch_size, self.hidden_size)
    hidden_2 = torch.zeros(2, batch_size, self.hidden_size)

    return (hidden_1, hidden_2)









In [None]:
binary_cross_entropy = nn.BCELoss()


In [None]:
## Main LSTM Sequence-to-Sequence Model

class LSTMSeq2Seq(nn.Module):
    """Complete LSTM Encoder-Decoder model with training and prediction capabilities."""
    
    def __init__(self, input_size, hidden_size):
        super(LSTMSeq2Seq, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.encoder = LSTMEncoder(input_size=input_size, hidden_size=hidden_size, num_layers=2)
        self.decoder = LSTMDecoder(input_size=input_size, hidden_size=hidden_size, num_layers=2)
        self.decoder2 = LSTMDecoder2(input_size=1, hidden_size=hidden_size, num_layers=2)

    def train_model(self, input_tensor, target_tensor, n_epochs, target_len, batch_size, 
                   training_prediction="recursive", teacher_forcing_ratio=0.5, 
                   learning_rate=0.01, dynamic_tf=False):
        """Train the model with specified parameters."""
        
        losses = np.full(n_epochs, np.nan)
        optimizer = optim.Adam(self.parameters(), lr=learning_rate)
        criterion = nn.MSELoss()
        n_batches = int(input_tensor.shape[1] // batch_size)
        
        print(f"Training with {n_batches} batches")
        
        with trange(n_epochs) as tr:
            for it in tr:
                batch_loss = 0
                
                for b in range(n_batches):
                    # Get batch data
                    input_batch = input_tensor[:, b: b + batch_size, :]
                    target_batch = target_tensor[:, b: b + batch_size, :]
                    outputs = torch.zeros(target_len, batch_size, 1)
                    
                    # Initialize encoder
                    encoder_hidden = self.encoder.init_hidden(batch_size=batch_size)
                    optimizer.zero_grad()
                    
                    # Encode
                    encoder_output, encoder_hidden = self.encoder(input_batch)
                    decoder_input = input_batch[-1, :, :]
                    
                    # Prepare decoder hidden state
                    hidden_state = encoder_hidden[0]
                    cell_state = encoder_hidden[1]
                    decoder_hidden = (hidden_state[:, 0, :], cell_state[:, 0, :])
                    
                    # Decode based on training strategy
                    if training_prediction == "recursive":
                        for t in range(target_len):
                            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                            outputs[t] = decoder_output
                            decoder_input = decoder_output
                            
                    elif training_prediction == "teacher_forcing":
                        for t in range(target_len):
                            if t == 0:
                                decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                                outputs[t] = decoder_output
                                decoder_input = target_batch[t, :, :]
                            else:
                                decoder_output, decoder_hidden = self.decoder2(decoder_input, decoder_hidden)
                                outputs[t] = decoder_output
                                decoder_input = target_batch[t, :, :]
                    
                    # Calculate loss and backpropagate
                    target_batch = target_batch.reshape(target_batch.shape[0], target_batch.shape[1], 1)
                    loss = criterion(outputs, target_batch)
                    batch_loss += loss.item()
                    loss.backward()
                    optimizer.step()
                
                losses[it] = batch_loss
                if dynamic_tf and teacher_forcing_ratio > 0:
                    teacher_forcing_ratio = teacher_forcing_ratio - 0.02
                    
                tr.set_postfix(loss=f"{batch_loss:.3f}")
        
        # Save model
        model_save_path = os.getenv('MODEL_SAVE_PATH', 'trained_model.pth')
        torch.save(self.state_dict(), model_save_path)
        
        return sum(losses) / len(losses)

    def predict(self, input_tensor, target_len):
        """Generate predictions using the trained model."""
        
        encoder_output, encoder_hidden = self.encoder(input_tensor)
        outputs = torch.zeros(target_len, input_tensor.shape[1], 1)
        
        # Prepare decoder
        decoder_input = input_tensor[-1, :, :]
        hidden_state = encoder_hidden[0]
        cell_state = encoder_hidden[1]
        decoder_hidden = (hidden_state[:, 0, :], cell_state[:, 0, :])
        
        # Generate predictions
        for t in range(target_len):
            if t == 0:
                decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                outputs[t] = decoder_output
                decoder_input = decoder_output
            else:
                decoder_output, decoder_hidden = self.decoder2(decoder_input, decoder_hidden)
                outputs[t] = decoder_output
                decoder_input = decoder_output
        
        return outputs.detach().numpy()


class ModelAlternate(LSTMSeq2Seq):
    """Alternative model with Professor Forcing training."""
    
    def __init__(self, input_size, hidden_size):
        super(ModelAlternate, self).__init__(hidden_size=hidden_size, input_size=input_size)
        self.discriminator = Discriminator(
            input_size=1, 
            hidden_size=hidden_size, 
            linear_size=64, 
            lin_dropout=0.5
        )
        self.other_params = [
            {'params': self.encoder.parameters(), 'lr': 0.0001},
            {'params': self.decoder.parameters(), 'lr': 0.0002},
            {'params': self.decoder2.parameters(), 'lr': 0.0003, 'weight_decay': 1e-4}
        ]

    def adversarial_train(self, learning_rate, input_tensor, target_tensor, 
                         n_epochs, target_len, batch_size):
        """Train model using adversarial approach with discriminator."""
        
        losses = np.full(n_epochs, np.nan)
        gen_optimizer = optim.SGD(self.other_params)
        disc_optimizer = optim.SGD(self.discriminator.parameters(), lr=0.003)
        
        n_batches = int(input_tensor.shape[1] // batch_size)
        
        with trange(n_epochs) as tr:
            for it in tr:
                for b in range(n_batches):
                    input_batch = input_tensor[:, b:b + batch_size, :]
                    target_batch = target_tensor[:, b:b + batch_size, :]
                    outputs = torch.zeros(target_len, batch_size, 1).to(input_tensor.device)
                    labels = torch.zeros(target_len, batch_size, 1).to(input_tensor.device)
                    
                    encoder_hidden = self.encoder.init_hidden(batch_size=batch_size)
                    gen_optimizer.zero_grad()
                    disc_optimizer.zero_grad()
                    
                    encoder_output, encoder_hidden = self.encoder(input_batch)
                    decoder_input = input_batch[-1, :, :]
                    
                    hidden_state = encoder_hidden[0]
                    cell_state = encoder_hidden[1]
                    decoder_hidden = (hidden_state[:, 0, :], cell_state[:, 0, :])
                    
                    for t in range(target_len):
                        if t == 0:
                            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                            outputs[t] = decoder_output
                            decoder_input = target_batch[t, :, :]
                        else:
                            decoder_output, decoder_hidden = self.decoder2(decoder_input, decoder_hidden)
                            outputs[t] = decoder_output
                            decoder_input = torch.cat([
                                decoder_output[0:25, :], 
                                target_batch[t, 0:25, :]
                            ], dim=0)
                        
                        labels[t] = torch.cat([torch.ones(25, 1), torch.zeros(25, 1)], dim=0)
                    
                    labels = labels.transpose(1, 0)
                    outputs = outputs.transpose(1, 0)
                    
                    preds = self.discriminator(outputs)
                    indices = torch.randperm(preds.size(0))
                    preds = preds[indices]
                    labels = labels[indices][:, :, 0, :]
                    
                    discriminator_loss = binary_cross_entropy(preds, labels)
                    generator_loss = -discriminator_loss
                    
                    if b % 2 == 0:
                        generator_loss.backward()
                        gen_optimizer.step()
                    else:
                        discriminator_loss.backward()
                        disc_optimizer.step()
        
        # Save model
        model_save_path = os.getenv('MODEL_SAVE_PATH', 'trained_model.pth')
        torch.save(self.state_dict(), model_save_path)

print("Main model classes defined successfully!")

Find a way to use the updated weights saved in "trained_model.pth" for the encoder and decoder layers. The weights and biases in an LSTM are the same for each cell as you probably have read.

In [None]:
# @title Alternate Model Trained with Professor Forcing

class model_alternate(lstm_seq2seq):
    def __init__(self, input_size, hidden_size):
        super(model_alternate, self).__init__(hidden_size=hidden_size, input_size=input_size)
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.discriminator = Discriminator(input_size=1, hidden_size=hidden_size, linear_size=64, lin_dropout=0.5)
        self.other_params = [
            {'params': self.encoder.parameters(), 'lr': 0.0001},
            {'params': self.decoder.parameters(), 'lr': 0.0002},
            {'params': self.decoder2.parameters(), 'lr': 0.0003, 'weight_decay': 1e-4}
        ]

    def adversarial_train(self, learning_rate, input_tensor, target_tensor, n_epochs, target_len, batch_size):
        losses = np.full(n_epochs, np.nan)
        gen_optimizer = optim.SGD(self.other_params)
        disc_optimizer = optim.SGD(self.discriminator.parameters(), lr=0.003)

        n_batches = int(input_tensor.shape[1] // batch_size)
        with trange(n_epochs) as tr:
            for it in tr:
                for b in range(n_batches):
                    input_batch = input_tensor[:, b:b + batch_size, :]
                    target_batch = target_tensor[:, b:b + batch_size, :]
                    outputs = torch.zeros(target_len, batch_size, 1).to(input_tensor.device)
                    labels = torch.zeros(target_len, batch_size, 1).to(input_tensor.device)
                    encoder_hidden = self.encoder.init_hidden(batch_size=batch_size)

                    gen_optimizer.zero_grad()
                    disc_optimizer.zero_grad()

                    encoder_output, encoder_hidden = self.encoder(input_batch)
                    decoder_input = input_batch[-1, :, :]

                    hidden_state = encoder_hidden[0]
                    cell_state = encoder_hidden[1]
                    decoder_hidden = (hidden_state[:, 0, :], cell_state[:, 0, :])

                    for t in range(target_len):
                        if t == 0:
                            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                            outputs[t] = decoder_output
                            decoder_input = target_batch[t, :, :]
                        else:
                            decoder_output, decoder_hidden = self.decoder2(decoder_input, decoder_hidden)
                            outputs[t] = decoder_output

                            decoder_input = torch.cat([decoder_output[0:25, :], target_batch[t, 0:25, :]], dim=0)


                        labels[t] = torch.cat([torch.ones(25, 1), torch.zeros(25, 1)], dim=0)

                    labels = labels.transpose(1, 0) # Adjust shape as needed
                    outputs = outputs.transpose(1,0)


                    preds = self.discriminator(outputs)

                    indices = torch.randperm(preds.size(0))

                    preds = preds[indices]
                    labels = labels[indices]

                    labels = labels[:,0,:]

                    discriminator_loss = binary_cross_entropy(preds, labels)
                    generator_loss = -discriminator_loss


                    if b % 2 == 0:
                        generator_loss.backward()
                        gen_optimizer.step()
                    else:
                        discriminator_loss.backward()
                        disc_optimizer.step()

        torch.save(self.state_dict(), 'trained_model.pth')











In [None]:
def list_to_numpy(list_of_lists):

    """
    Convert all lists in a list of lists to NumPy arrays.

    Parameters:
    list_of_lists (list of lists): The list containing sublists to be converted.

    Returns:
    list of np.ndarray: A list containing NumPy arrays.
    """
    collection =  [np.array(sublist) for sublist in list_of_lists]
    return collection

def numpy_to_torch(Xtrain, Ytrain, Xtest, Ytest):
    '''
    convert numpy array to PyTorch tensor

    : param Xtrain:                           windowed training input data (input window size, # examples, # features); np.array
    : param Ytrain:                           windowed training target data (output window size, # examples, # features); np.array
    : param Xtest:                            windowed test input data (input window size, # examples, # features); np.array
    : param Ytest:                            windowed test target data (output window size, # examples, # features); np.array
    : return X_train_torch, Y_train_torch,
    :        X_test_torch, Y_test_torch:      all input np.arrays converted to PyTorch tensors

    '''

    X_train_torch = torch.tensor(Xtrain).type(torch.float32)
    Y_train_torch = torch.tensor(Ytrain).type(torch.float32)

    X_test_torch = torch.tensor(Xtest).type(torch.float32)
    Y_test_torch = torch.tensor(Ytest).type(torch.float32)

    return X_train_torch, Y_train_torch, X_test_torch, Y_test_torch

In [None]:

x_train = list_to_numpy(x_train)
x_test = list_to_numpy(x_test)
y_train = list_to_numpy(y_train)
y_test = list_to_numpy(y_test)



In [None]:

X_train, Y_train, X_test, Y_test = numpy_to_torch(x_train, y_train, x_test, y_test)


In [None]:
X_train = X_train.transpose(0, 1)  # From (batch_size, seq_len, input_length) to (seq_len, batch_size, input_length)
Y_train = Y_train.transpose(0, 1)  # From (seq_len, batch_size, 1) to (batch_size, seq_len, 1)
X_test = X_test.transpose(0, 1)    # From (batch_size, seq_len, input_length) to (seq_len, batch_size, input_length)
Y_test = Y_test.transpose(0, 1)

In [None]:
Y_test = Y_test.reshape(22,50,1)
Y_train = Y_train.reshape(22,200,1)


Y_train.shape

torch.Size([22, 200, 1])

In [None]:
Y_test

tensor([[[1.8100],
         [1.7100],
         [1.9500],
         ...,
         [0.9800],
         [1.1800],
         [1.1500]],

        [[1.7800],
         [1.7500],
         [1.9100],
         ...,
         [1.0200],
         [1.1800],
         [1.2100]],

        [[1.7400],
         [1.7800],
         [1.9500],
         ...,
         [1.0100],
         [1.2000],
         [1.1300]],

        ...,

        [[1.6900],
         [1.9700],
         [2.0100],
         ...,
         [1.1900],
         [1.1600],
         [1.2900]],

        [[1.6300],
         [1.9500],
         [2.0100],
         ...,
         [1.2000],
         [1.1800],
         [1.2700]],

        [[1.6400],
         [1.9500],
         [2.0600],
         ...,
         [1.1600],
         [1.1400],
         [1.2600]]])

In [None]:

N_EPOCHS = 15
BATCH_SIZE = 50



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_train = X_train.to(device)
Y_train = Y_train.to(device)







model = lstm_seq2seq(input_size = 3, hidden_size = 50)

model = model.to(device)
loss = model.train_model(X_train, Y_train, n_epochs = 500, target_len = 22, batch_size = 50, training_prediction = 'teacher_forcing', teacher_forcing_ratio = 0.6, learning_rate = 0.006, dynamic_tf = False)


4


100%|██████████| 500/500 [02:38<00:00,  3.16it/s, loss=0.025]


In [None]:
extra = model_alternate(input_size = 3, hidden_size = 50).to(device)

In [None]:
model.load_state_dict(torch.load('trained_model.pth'))


You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.



<All keys matched successfully>

In [None]:
extra.adversarial_train(input_tensor=X_train, target_tensor=Y_train, n_epochs = 500, target_len = 22, batch_size = 50, learning_rate=0.003)

100%|██████████| 500/500 [03:04<00:00,  2.70it/s]


In [None]:
print(model.named_parameters)

<bound method Module.named_parameters of lstm_seq2seq(
  (encoder): lstm_encoder(
    (lstm): LSTM(3, 50, num_layers=2, dropout=0.5)
  )
  (decoder): lstm_decoder(
    (lstm): LSTM(3, 50, num_layers=2, dropout=0.5)
    (linear): Linear(in_features=50, out_features=1, bias=True)
  )
  (decoder2): lstm_decoder2(
    (lstm): LSTM(1, 50, num_layers=2, batch_first=True)
    (linear): Linear(in_features=50, out_features=1, bias=True)
  )
)>


In [None]:
preds = model.predict(X_test, 22)

In [None]:
Y_preds_out = preds.reshape(22,50).transpose(1,0)

In [None]:
Y_test.shape[0]

22

In [None]:
Y_test = Y_test.reshape(22,50).transpose(1,0)

In [None]:
Y_test = Y_test.reshape(-1,1).reshape(-1)
Y_preds_out = Y_preds_out.reshape(-1,1).reshape(-1)

In [None]:
data_2 = pd.DataFrame({"True":Y_test, "Predicted": Y_preds_out})

In [None]:
data_2["Squared_Diff"] = (data_2["True"] - data_2["Predicted"])**2

In [None]:
mse = np.mean(data_2["Squared_Diff"])
print(mse)

5.9987154


In [None]:
Y_preds_out

array([6.4957824, 6.4666905, 6.510626 , ..., 2.4441032, 2.4327395,
       2.4221156], dtype=float32)

In [None]:
np.mean(data_2["True"])

1.560609

In [None]:
data_2

Unnamed: 0,True,Predicted,Squared_Diff
0,1.81,6.495782,21.956556
1,1.78,6.466691,21.965067
2,1.74,6.510626,22.758873
3,1.77,6.548952,22.838379
4,1.83,6.581836,22.579948
...,...,...,...
1095,1.26,2.469326,1.462469
1096,1.27,2.456273,1.407243
1097,1.29,2.444103,1.331954
1098,1.27,2.432739,1.351963


Calculated the MSE out of sample manually before realising it could be done in a function.

Transposing moves the data into individual sequences rather than by Batches.


In [None]:
def calculate_mse(Y_pred, Y_true):
  Y_true = Y_true.numpy()
  Y_true = Y_true.reshape(Y_true.shape[0], Y_true.shape[1]).transpose(1,0)
  Y_pred = Y_pred.reshape(Y_pred.shape[0], Y_pred.shape[1]).transpose(1,0)

  Y_true = Y_true.reshape(-1,1).reshape(-1)
  Y_pred = Y_pred.reshape(-1,1).reshape(-1)
  MSE = (np.mean((Y_pred-Y_true)**2))



  data = pd.DataFrame({"True":Y_true, "Predicted":Y_pred})



  #MSE = np.mean((data["True"] - data["Predicted"])**2)



  return MSE, data



In [None]:
Y_preds_in = model.predict(X_train,22)

In [None]:
mse = calculate_mse(Y_preds_in,Y_train)



In [None]:
data = mse[1]

In [None]:
mse[0]

0.9030966

In [None]:
data["Time"] = df["date"]

In [None]:
data

Unnamed: 0,True,Predicted,Time
0,6.50,6.498363,2000-01-03
1,6.40,6.208910,2000-01-04
2,6.51,6.268754,2000-01-05
3,6.46,6.326372,2000-01-06
4,6.42,6.380508,2000-01-07
...,...,...,...
4395,1.84,2.372994,2016-11-07
4396,1.83,2.352981,2016-11-08
4397,1.84,2.334708,2016-11-09
4398,1.80,2.318113,2016-11-10


In [None]:
from torch.utils.data import TensorDataset, random_split

dataset = TensorDataset(X_train.transpose(1,0), Y_train.transpose(1,0))

# Calculate the number of samples for training (75%) and validation (25%)
train_size = int(0.75 * len(dataset))
val_size = len(dataset) - train_size

# Split the dataset into training and validation sets
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Extract the tensors back from the datasets
X_train_split, Y_train_split = train_dataset.dataset.tensors[0][train_dataset.indices], train_dataset.dataset.tensors[1][train_dataset.indices]
X_val_split, Y_val_split = val_dataset.dataset.tensors[0][val_dataset.indices], val_dataset.dataset.tensors[1][val_dataset.indices]

X_train_split = X_train_split.transpose(1,0)
Y_train_split = Y_train_split.transpose(1,0)
X_val_split = X_val_split.transpose(1,0)
Y_val_split = Y_val_split.transpose(1,0)


In [None]:
X_val_split.shape

torch.Size([22, 50, 3])

In [None]:
#@title Hyperparameter Optimization
"""

#############################################################################################################################
from scipy.optimize import differential_evolution
# Hyperparameter Grid for Optimization
param_bounds = {
    'epochs': (400, 500),            # Number of epochs between 400 and 500
    'learning_rate': (0.0001, 0.01), # Learning rate between 0.0001 and 0.01
    'batch_size': (16, 128),         # Batch size between 16 and 128
    'hidden_size': (50, 300)         # Hidden size between 50 and 300 units
}

def objective_function(params):
    epochs = int(params[0])
    learning_rate = params[1]
    batch_size = int(params[2])
    hidden_size = int(params[3])
    model_2 = lstm_seq2seq(input_size=3, hidden_size= hidden_size)

    # Train the model
    losses = model_2.train_model(X_train_split, Y_train_split, n_epochs=epochs, target_len=22,
                                 batch_size=batch_size, training_prediction='teacher_forcing',
                                 teacher_forcing_ratio=0.6, learning_rate=learning_rate, dynamic_tf=False)

    Y_val_pred = model_2.predict(X_val_split, 22)
    mse = calculate_mse(Y_val_pred, Y_val_split)

    return mse[0]

# Create a custom callback to update the progress bar
#def progress_callback(xk, convergence):
#   progress_bar.update(1)  # Update the progress bar by 1 step

# Perform differential evolution with progress bar
num_iterations = 1  # maxiter
popsize = 1  # population size
total_evaluations = num_iterations * popsize

#with tqdm(total=total_evaluations) as progress_bar:
result = differential_evolution(objective_function,
                                    bounds=[param_bounds['epochs'], param_bounds['learning_rate'],
                                            param_bounds['batch_size'], param_bounds['hidden_size']],
                                    strategy='best1bin', maxiter=num_iterations, popsize=popsize,
                                    tol=0.01, seed=42, disp=True)
"""

"\n\n#############################################################################################################################\nfrom scipy.optimize import differential_evolution\n# Hyperparameter Grid for Optimization\nparam_bounds = {\n    'epochs': (400, 500),            # Number of epochs between 400 and 500\n    'learning_rate': (0.0001, 0.01), # Learning rate between 0.0001 and 0.01\n    'batch_size': (16, 128),         # Batch size between 16 and 128\n    'hidden_size': (50, 300)         # Hidden size between 50 and 300 units\n}\n\ndef objective_function(params):\n    epochs = int(params[0])\n    learning_rate = params[1]\n    batch_size = int(params[2])\n    hidden_size = int(params[3])\n    model_2 = lstm_seq2seq(input_size=3, hidden_size= hidden_size)\n\n    # Train the model\n    losses = model_2.train_model(X_train_split, Y_train_split, n_epochs=epochs, target_len=22,\n                                 batch_size=batch_size, training_prediction='teacher_forcing',\n       

In [None]:
########################################################################################################
#All code for the visualisation and plotting.

In [None]:
import plotly.express as px

In [None]:
import plotly.graph_objects as go


#Main Plot
fig = go.Figure()


fig.add_trace(go.Scatter(x=data["Time"], y=data["True"], mode="lines+markers", name="Actual",line=dict(color="blue")))
fig.add_trace(go.Scatter(
    x=data['Time'],
    y=data['Predicted'],
    mode='lines+markers',
    name='Predicted',
    line=dict(color='Green')
))


fig.update_layout(
    title='In Sample Yield Predictions',
    xaxis_title='Date',
    yaxis_title='Yields',
    legend_title='Series',
    template='plotly_white'
)



In [None]:
data["Residuals"] = data["True"] - data["Predicted"]
data["True Rolling Mean"] = data["True"].rolling(window=10).mean()
data["True Rolling Std"] = data["True"].rolling(window=10).std()
data["Predicted Rolling Mean"] = data["Predicted"].rolling(window=10).mean()
data["Predicted Rolling Std"] = data["Predicted"].rolling(window=10).std()

In [None]:
fig_residuals = px.scatter(data, x="Time", y="Residuals", title="Residuals Over Time", labels={"Date":"Time","Residuals":"Residuals"})

In [None]:
grouped_residuals = pd.DataFrame()
grouped_residuals["Values"] = np.nan
grouped_residuals["Time"] = np.nan


for i in range(0, len(data), 22):
  group = data.iloc[i:i+22].copy()
  group = sum(group["Residuals"])/22
  grouped_residuals.loc[i,"Values"] = group
  grouped_residuals.loc[i, "Time"] = data.loc[i,"Time"]



grouped_residuals['Positive_Residuals'] = grouped_residuals['Values'].apply(lambda x: x if x > 0 else np.nan)
grouped_residuals['Negative_Residuals'] = grouped_residuals['Values'].apply(lambda x: x if x < 0 else np.nan)


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '2000-01-03 00:00:00' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [None]:
grouped_residuals

Unnamed: 0,Values,Time,Positive_Residuals,Negative_Residuals
0,0.036797,2000-01-03 00:00:00,0.036797,
22,-0.301713,2000-02-02 00:00:00,,-0.301713
44,-0.060144,2000-03-03 00:00:00,,-0.060144
66,-0.130568,2000-04-04 00:00:00,,-0.130568
88,0.620417,2000-05-04 00:00:00,0.620417,
...,...,...,...,...
4290,-0.625371,2016-06-13 00:00:00,,-0.625371
4312,-0.861479,2016-07-13 00:00:00,,-0.861479
4334,-0.708720,2016-08-12 00:00:00,,-0.708720
4356,-0.891317,2016-09-13 00:00:00,,-0.891317


In [None]:
grouped_residuals["Time"] = data["Time"]

In [None]:
# Create a figure
fig2 = go.Figure()

# Add the zero line
fig2.add_trace(go.Scatter(
    x=grouped_residuals['Time'],
    y=[0] * len(grouped_residuals),
    mode='lines',
    name='Zero Line',
    line=dict(color='black', dash='dash')
))

# Add the positive residuals plot
fig2.add_trace(go.Scatter(
    x=grouped_residuals['Time'],
    y=grouped_residuals['Positive_Residuals'],
    mode='lines+markers',
    name='Positive Residuals',
    line=dict(color='green')
))


# Add the negative residuals plot
fig2.add_trace(go.Scatter(
    x=grouped_residuals['Time'],
    y=grouped_residuals['Negative_Residuals'],
    mode='lines+markers',
    name='Negative Residuals',
    line=dict(color='red')
))

# Update layout
fig2.update_layout(
    title='Grouped Residuals Plot',
    xaxis_title='Date',
    yaxis_title='Residuals',
    legend_title='Series',
    template='plotly_white'
)

# Show the plot
fig2.show()

In [None]:
data = data.bfill()

data

Unnamed: 0,True,Predicted,Time,Residuals,True Rolling Mean,True Rolling Std,Predicted Rolling Mean,Predicted Rolling Std
0,6.50,6.498363,2000-01-03,0.001637,6.511,0.073402,6.422264,0.122797
1,6.40,6.208910,2000-01-04,0.191091,6.511,0.073402,6.422264,0.122797
2,6.51,6.268754,2000-01-05,0.241246,6.511,0.073402,6.422264,0.122797
3,6.46,6.326372,2000-01-06,0.133628,6.511,0.073402,6.422264,0.122797
4,6.42,6.380508,2000-01-07,0.039492,6.511,0.073402,6.422264,0.122797
...,...,...,...,...,...,...,...,...
4395,1.84,2.372994,2016-11-07,-0.532994,1.841,0.027669,2.493072,0.088386
4396,1.83,2.352981,2016-11-08,-0.522981,1.837,0.025841,2.464903,0.082966
4397,1.84,2.334708,2016-11-09,-0.494708,1.835,0.024608,2.438513,0.077500
4398,1.80,2.318113,2016-11-10,-0.518113,1.833,0.026687,2.413907,0.072040


In [None]:
fig3 = go.Figure()


fig3.add_trace(go.Scatter(
    x=data['Time'],
    y=data['True Rolling Mean'],
    mode='lines+markers',
    name='True Rolling Mean',
    line=dict(color='blue')
))

fig3.add_trace(go.Scatter(
    x=data['Time'],
    y=data['Predicted Rolling Mean'],
    mode='lines+markers',
    name='Predicted Rolling Mean',
    line=dict(color='Orange')
))






In [None]:
from plotly.subplots import make_subplots

In [None]:
fig4 = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1,
                     subplot_titles=('True Rolling Std', 'Predicted Rolling Std'))

# Add True Rolling Std plot
fig4.add_trace(go.Scatter(
    x=data['Time'],
    y=data['True Rolling Std'],
    mode='lines+markers',
    name='True Rolling Std',
    line=dict(color='Green')
), row=1, col=1)

# Add Predicted Rolling Std plot
fig4.add_trace(go.Scatter(
    x=data['Time'],
    y=data['Predicted Rolling Std'],
    mode='lines+markers',
    name='Predicted Rolling Std',
    line=dict(color='Purple')
), row=2, col=1)

# Update layout
fig4.update_layout(
    title='Rolling Standard Deviations',
    xaxis_title='Date',
    yaxis_title='Standard Deviation',
    template='plotly_white',
    showlegend=True
)

# Show the plot
fig4.show()


In [None]:
##########################################################################################################################

In [None]:
dates =  df["date"].iloc[-1100:]
len(dates)


dates =  dates.reset_index(drop=True)

In [None]:
dates

Unnamed: 0,date
0,2016-11-15
1,2016-11-16
2,2016-11-17
3,2016-11-18
4,2016-11-21
...,...
1095,2021-01-26
1096,2021-01-27
1097,2021-01-28
1098,2021-01-29


In [None]:
data_2['Time'] = dates

data_2

Unnamed: 0,True,Predicted,Squared_Diff,Time
0,1.81,6.495782,21.956556,2016-11-15
1,1.78,6.466691,21.965067,2016-11-16
2,1.74,6.510626,22.758873,2016-11-17
3,1.77,6.548952,22.838379,2016-11-18
4,1.83,6.581836,22.579948,2016-11-21
...,...,...,...,...
1095,1.26,2.469326,1.462469,2021-01-26
1096,1.27,2.456273,1.407243,2021-01-27
1097,1.29,2.444103,1.331954,2021-01-28
1098,1.27,2.432739,1.351963,2021-01-29


In [None]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=data_2["Time"], y=data_2["True"], mode="lines+markers", name="Actual",line=dict(color="blue")))
fig.add_trace(go.Scatter(
    x=data_2['Time'],
    y=data_2['Predicted'],
    mode='lines+markers',
    name='Predicted',
    line=dict(color='Green')
))


fig.update_layout(
    title='Out of Sample Yield Predictions',
    xaxis_title='Date',
    yaxis_title='Yields',
    legend_title='Series',
    template='plotly_white'
)



In [None]:
fig_residuals = px.scatter(data, x="Time", y="Residuals", title="Residuals Over Time", labels={"Date":"Time","Residuals":"Residuals"})

data_2["Residuals"] = data_2["True"] - data_2["Predicted"]
data_2["True Rolling Mean"] = data_2["True"].rolling(window=10).mean()
data_2["True Rolling Std"] = data_2["True"].rolling(window=10).std()
data_2["Predicted Rolling Mean"] = data_2["Predicted"].rolling(window=10).mean()
data_2["Predicted Rolling Std"] = data_2["Predicted"].rolling(window=10).std()

In [None]:
len(data_2)

1100

In [None]:
grouped_residuals_2 = pd.DataFrame()
grouped_residuals_2["Values"] = np.nan
grouped_residuals_2["Time"] = np.nan


for i in range(0, len(data_2), 22):
  group = data_2.iloc[i:i+22].copy()
  group = sum(group["Residuals"])/22
  grouped_residuals_2.loc[i,"Values"] = group
  grouped_residuals_2.loc[i, "Time"] = data_2.loc[i,"Time"]



grouped_residuals_2['Positive_Residuals'] = grouped_residuals_2['Values'].apply(lambda x: x if x > 0 else np.nan)
grouped_residuals_2['Negative_Residuals'] = grouped_residuals_2['Values'].apply(lambda x: x if x < 0 else np.nan)


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '2016-11-15 00:00:00' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [None]:
# Create a figure
fig2 = go.Figure()

# Add the zero line
fig2.add_trace(go.Scatter(
    x=grouped_residuals_2['Time'],
    y=[0] * len(grouped_residuals_2),
    mode='lines',
    name='Zero Line',
    line=dict(color='black', dash='dash')
))

# Add the positive residuals plot
fig2.add_trace(go.Scatter(
    x=grouped_residuals_2['Time'],
    y=grouped_residuals_2['Positive_Residuals'],
    mode='lines+markers',
    name='Positive Forecast Error',
    line=dict(color='green')
))


# Add the negative residuals plot
fig2.add_trace(go.Scatter(
    x=grouped_residuals_2['Time'],
    y=grouped_residuals_2['Negative_Residuals'],
    mode='lines+markers',
    name='Negative Forecast Error',
    line=dict(color='red')
))

# Update layout
fig2.update_layout(
    title='Grouped Residuals Plot',
    xaxis_title='Date',
    yaxis_title='Residuals',
    legend_title='Series',
    template='plotly_white'
)

# Show the plot
fig2.show()

In [None]:
data_2 = data_2.bfill()

data_2

Unnamed: 0,True,Predicted,Squared_Diff,Time,Residuals,True Rolling Mean,True Rolling Std,Predicted Rolling Mean,Predicted Rolling Std
0,1.81,6.495782,21.956556,2016-11-15,-4.685782,1.781,0.026854,6.585080,0.076635
1,1.78,6.466691,21.965067,2016-11-16,-4.686690,1.781,0.026854,6.585080,0.076635
2,1.74,6.510626,22.758873,2016-11-17,-4.770626,1.781,0.026854,6.585080,0.076635
3,1.77,6.548952,22.838379,2016-11-18,-4.778952,1.781,0.026854,6.585080,0.076635
4,1.83,6.581836,22.579948,2016-11-21,-4.751836,1.781,0.026854,6.585080,0.076635
...,...,...,...,...,...,...,...,...,...
1095,1.26,2.469326,1.462469,2021-01-26,-1.209326,1.222,0.032931,2.547250,0.058052
1096,1.27,2.456273,1.407243,2021-01-27,-1.186273,1.228,0.035839,2.528689,0.053963
1097,1.29,2.444103,1.331954,2021-01-28,-1.154103,1.234,0.040879,2.511423,0.050138
1098,1.27,2.432739,1.351963,2021-01-29,-1.162740,1.235,0.041700,2.495367,0.046588


In [None]:
fig3 = go.Figure()


fig3.add_trace(go.Scatter(
    x=data_2['Time'],
    y=data_2['True Rolling Mean'],
    mode='lines+markers',
    name='True Rolling Mean',
    line=dict(color='blue')
))

fig3.add_trace(go.Scatter(
    x=data_2['Time'],
    y=data_2['Predicted Rolling Mean'],
    mode='lines+markers',
    name='Predicted Rolling Mean',
    line=dict(color='Orange')
))



In [None]:
fig4 = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1,
                     subplot_titles=('True Rolling Std', 'Predicted Rolling Std'))

# Add True Rolling Std plot
fig4.add_trace(go.Scatter(
    x=data_2['Time'],
    y=data_2['True Rolling Std'],
    mode='lines+markers',
    name='True Rolling Std',
    line=dict(color='Green')
), row=1, col=1)

# Add Predicted Rolling Std plot
fig4.add_trace(go.Scatter(
    x=data_2['Time'],
    y=data_2['Predicted Rolling Std'],
    mode='lines+markers',
    name='Predicted Rolling Std',
    line=dict(color='Purple')
), row=2, col=1)

# Update layout
fig4.update_layout(
    title='Rolling Standard Deviations',
    xaxis_title='Date',
    yaxis_title='Standard Deviation',
    template='plotly_white',
    showlegend=True
)

# Show the plot
fig4.show()