In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        # input_size: The number of features in the input 
        # at each time step.
        # hidden_size: The number of features in the hidden
        # state (also referred to as the number of hidden units).
        # Number of features in the hidden state, which determines 
        # how much memory the RNN has at each time step.
        # output_size: The number of features in the final 
        # output (this could be the number of classes for 
        # classification or the dimension of the output for 
        # regression).
        
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        
        # Defining the layers
        # batch_first=True: This means the input tensor will 
        # have shape (batch_size, seq_len, input_size). If 
        # batch_first is False (the default), the input shape 
        # would be (seq_len, batch_size, input_size). 
        # Specifying batch_first=True makes it more convenient 
        # to use, as batch size is often the first dimension 
        # in data processing.
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initializing hidden state for the first time step
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        # 1: The RNN is unidirectional and has 1 layer, 
        # so the first dimension represents the number of 
        # layers in the RNN (which is 1 in this case).
        # x.size(0): This gives the batch size (the number 
        # of sequences being processed in parallel). This 
        # ensures that each sequence has its own hidden state.
        # self.hidden_size: This is the number of features in 
        # the hidden state, as defined earlier in the constructor.
        
        # Passing the input and hidden state through the RNN layer
        out, hn = self.rnn(x, h0)
        # This passes the input tensor x and the initial hidden 
        # state h0 through the RNN:
        # out: The output from the RNN for every time step 
        # (shape: (batch_size, seq_len, hidden_size)).
        # hn: The final hidden state at the last time step 
        # (shape: (num_layers, batch_size, hidden_size)), 
        # which can be used to initialize the hidden state
        # for the next batch of sequences (if training 
        # statefully, which isn't happening here).
        
        # Getting the last time step's output
        out = self.fc(out[:, -1, :])
        #Here, we take the output from the last time step 
        # for each sequence in the batch:
        # out[:, -1, :] means: "for every sequence in 
        # the batch (:), take the output of the last 
        # time step (-1), and take all features in 
        # the hidden state (:)".
        # This gives us the output corresponding to the 
        # final time step of each sequence, which is often 
        # what you want when performing tasks like sequence
        # classification or regression.
        
        return out

In [3]:
# Hyperparameters
input_size = 10  # Number of features in input
hidden_size = 20 # Number of hidden units in RNN
output_size = 1  # Output size (e.g., for regression)
num_epochs = 100  # Number of training epochs
learning_rate = 0.001
sequence_length = 7  # Length of the input sequence
batch_size = 5  # Number of samples in each batch

In [4]:
# Create the model instance
model = SimpleRNN(input_size, hidden_size, output_size)

# Loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
# Generate synthetic data for training
# Here, we create random inputs (X) and corresponding random outputs (y)
# X: (batch_size, sequence_length, input_size)
# y: (batch_size, output_size)
def generate_synthetic_data(batch_size, sequence_length, input_size):
    X = torch.randn(batch_size, sequence_length, input_size)  # Random input tensor
    y = torch.randn(batch_size, output_size)  # Random target output
    return X, y


In [6]:
# Get synthetic data
inputs, targets = generate_synthetic_data(
    batch_size, sequence_length, input_size)

# Training loop
for epoch in range(num_epochs):

    
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    # Backward pass and optimization
    optimizer.zero_grad()  # Clear the gradients
    loss.backward()        # Compute gradients
    optimizer.step()       # Update weights
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training finished!")

Epoch [10/100], Loss: 0.3734
Epoch [20/100], Loss: 0.2098
Epoch [30/100], Loss: 0.0985
Epoch [40/100], Loss: 0.0345
Epoch [50/100], Loss: 0.0067
Epoch [60/100], Loss: 0.0001
Epoch [70/100], Loss: 0.0009
Epoch [80/100], Loss: 0.0005
Epoch [90/100], Loss: 0.0001
Epoch [100/100], Loss: 0.0000
Training finished!


In [7]:
# Generate new synthetic data for testing/prediction
new_input, y = generate_synthetic_data(
    batch_size, sequence_length, input_size)

# Make predictions
with torch.no_grad():  # Disable gradient computation since we're only doing inference
    predictions = model(new_input)
    
print(f"Predictions: {predictions}")


Predictions: tensor([[-0.5498],
        [-0.1748],
        [-0.0134],
        [-0.2525],
        [ 0.1523]])


In [8]:
y

tensor([[ 0.5991],
        [ 0.0954],
        [-0.0533],
        [ 1.3177],
        [-1.0983]])