In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import maketab as mt

class CreateDataset(Dataset):
    def __init__(self, data, n, m):
        self.data = data
        self.n = n  # Sequence length
        self.m = m  # Prediction length
        self.total_length = len(self.data)

    def __len__(self):
        return self.total_length - self.n - self.m + 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.n]
        y = self.data[idx + self.n:idx + self.n + self.m]
        x = torch.tensor(x, dtype=torch.float32).unsqueeze(-1)  # Add feature dimension
        y = torch.tensor(y, dtype=torch.float32).unsqueeze(-1)
        return x, y

data = np.linspace(0, 1, 11)
seq_len = 3
pred_len = 2
dataset = CreateDataset(data, seq_len, pred_len)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

In [6]:
class ElmanRNN(nn.Module):
    def __init__(self, input_size=1, hidden_size=12, output_size=1):
        super(ElmanRNN, self).__init__()
        self.hidden_size = hidden_size
        
        # Elman RNN (simple RNN)
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden=None):
        # Initialize hidden state if not provided
        if hidden is None:
            hidden = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate RNN
        out, hidden = self.rnn(x, hidden)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out, hidden

In [None]:
model = ElmanRNN(hidden_size=3)
for X,y in dataloader:
    print(X)
    y = list(torch.unbind(y, dim=1))
    h = None
    for i in y:
        if h is None:
            pred, h = model(X, h)

        else:
            pred = pred.unsqueeze(1)
            pred, h = model(pred, h)
            
        print(f"pred: {pred}")
        print(f"true: {i}")

In [10]:
def train_model(model, dataloader, epochs):
    model.train()
    losses = []
    for epoch in range(epochs):
        epoch_loss = 0
        for batch_x, batch_y in dataloader:
            optimizer.zero_grad()
            print(f"input on start: {batch_x}")
            y = list(torch.unbind(batch_y, dim=1))
            h = None
            for i in y:
                if h is None:
                    pred, h = model(batch_x, h)

                else:
                    pred = pred.unsqueeze(1)
                    pred, h = model(pred, h)

                print(f"pred: {pred}")
                print(f"true_y: {i}")
                loss = criterion(pred, i)
                print("Computing gradient:")
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
        
        avg_loss = epoch_loss / len(dataloader)
        print(f'Epoch {epoch}, Loss: {avg_loss:.6f}')
        losses.append(avg_loss)   
    return model

In [11]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model = train_model(model, dataloader, 100)

input on start: tensor([[[0.0000],
         [0.1000],
         [0.2000]]])
pred: tensor([[0.3872]], grad_fn=<AddmmBackward0>)
true_y: tensor([[0.3000]])
Computing gradient:
pred: tensor([[0.4100]], grad_fn=<AddmmBackward0>)
true_y: tensor([[0.4000]])
Computing gradient:


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.