In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, num_layers=2, output_size=10):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)  
        
    def forward(self, x):
        lstm_out, o = self.lstm(x)
        return self.fc(lstm_out[:, -1, :]) 

In [None]:
def create_sequences(data, seq_length, output_size):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length - output_size):  
        sequences.append(data[i:i+seq_length])
        targets.append(data[i+seq_length:i+seq_length+output_size]) 
    return np.array(sequences), np.array(targets)

scaler = MinMaxScaler(feature_range=(0, 1))
real_corn_scaled = scaler.fit_transform(corn["corn_price"].values.reshape(-1, 1))

seq_length = 30
output_size = 10  
X, y = create_sequences(real_corn_scaled, seq_length, output_size)

# I am splitting it this way because it is timeseries data
# Shuffling will ruin the dependencies I am trying to detect

train_size = int(len(X) * 0.8)
val_size = int(len(X) * 0.1)

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]


X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)


input_size = 1 
model = LSTMModel(input_size, output_size=output_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
train_losses = []
val_losses = []
test_losses = []

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs.squeeze(), y_train.squeeze())  
    loss.backward()
    optimizer.step()

    
    model.eval()
    with torch.no_grad():
        y_pred_val = model(X_val).squeeze()
        val_loss = criterion(y_pred_val, y_val.squeeze())
        
        y_pred_test = model(X_test).squeeze()
        test_loss = criterion(y_pred_test, y_test.squeeze())
        
    train_losses.append(loss.item())
    val_losses.append(val_loss.item())
    test_losses.append(test_loss.item())
    
    if epoch % 5 == 0:
        print(f"Epoch [{epoch}/{num_epochs}], Train Loss: {loss.item():.6f}, Val Loss: {val_loss.item():.6f}, Test Loss: {test_loss.item():.6f}")


plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.plot(test_losses, label="Test Loss")
plt.title("Train, Validation, and Test Losses")
plt.legend()
plt.show()

In [None]:
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test).squeeze().numpy()
    y_pred_train = model(X_train).squeeze().numpy()
    y_pred_val = model(X_val).squeeze().numpy()


y_pred_test_flat = y_pred_test.reshape(-1, 1)
y_test_flat = y_test.reshape(-1, 1)
y_pred_train_flat = y_pred_train.reshape(-1, 1)
y_train_flat = y_train.reshape(-1, 1)
y_pred_val_flat = y_pred_val.reshape(-1, 1)
y_val_flat = y_val.reshape(-1, 1)


y_pred_test = scaler.inverse_transform(y_pred_test_flat)
y_test = scaler.inverse_transform(y_test_flat)
y_pred_train = scaler.inverse_transform(y_pred_train_flat)
y_train = scaler.inverse_transform(y_train_flat)
y_pred_val = scaler.inverse_transform(y_pred_val_flat)
y_val = scaler.inverse_transform(y_val_flat)

y_pred_test = y_pred_test.reshape(-1, 10, 1)
y_test = y_test.reshape(-1, 10, 1)
y_pred_train = y_pred_train.reshape(-1, 10, 1)
y_train = y_train.reshape(-1, 10, 1)
y_pred_val = y_pred_val.reshape(-1, 10, 1)
y_val = y_val.reshape(-1, 10, 1)