In [None]:
import torch
import torch.nn as nn

# Set random seed for reproducibility
torch.manual_seed(42)

# Common input parameters
batch_size = 5
seq_len = 7
input_size = 10
hidden_size = 20
num_layers = 1

# Create input tensor
x = torch.randn(batch_size, seq_len, input_size)  # For nn.RNN/LSTM/GRU
x_cell = torch.randn(seq_len, batch_size, input_size)  # For RNNCell/LSTMCell/GRUCell

print("\n========== 1. nn.RNN ==========")
# 1. RNN
rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
h0 = torch.randn(num_layers, batch_size, hidden_size)
out_rnn, hn_rnn = rnn(x, h0)
print("RNN Output Shape:", out_rnn.shape)
print("RNN Hidden State Shape:", hn_rnn.shape)

print("\n========== 2. nn.LSTM ==========")
# 2. LSTM
lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
h0_lstm = torch.randn(num_layers, batch_size, hidden_size)
c0_lstm = torch.randn(num_layers, batch_size, hidden_size)
out_lstm, (hn_lstm, cn_lstm) = lstm(x, (h0_lstm, c0_lstm))
print("LSTM Output Shape:", out_lstm.shape)
print("LSTM Hidden State Shape:", hn_lstm.shape)
print("LSTM Cell State Shape:", cn_lstm.shape)

print("\n========== 3. nn.GRU ==========")
# 3. GRU
gru = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
h0_gru = torch.randn(num_layers, batch_size, hidden_size)
out_gru, hn_gru = gru(x, h0_gru)
print("GRU Output Shape:", out_gru.shape)
print("GRU Hidden State Shape:", hn_gru.shape)

print("\n========== 4. nn.RNNCell ==========")
# 4. RNNCell
rnn_cell = nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
hx_rnn_cell = torch.randn(batch_size, hidden_size)
outputs_rnn_cell = []
for i in range(seq_len):
    hx_rnn_cell = rnn_cell(x_cell[i], hx_rnn_cell)
    outputs_rnn_cell.append(hx_rnn_cell)
outputs_rnn_cell = torch.stack(outputs_rnn_cell, dim=0)
print("RNNCell Output Shape:", outputs_rnn_cell.shape)

print("\n========== 5. nn.LSTMCell ==========")
# 5. LSTMCell
lstm_cell = nn.LSTMCell(input_size=input_size, hidden_size=hidden_size)
hx_lstm_cell = torch.randn(batch_size, hidden_size)  # hidden
cx_lstm_cell = torch.randn(batch_size, hidden_size)  # cell
outputs_lstm_cell = []
for i in range(seq_len):
    hx_lstm_cell, cx_lstm_cell = lstm_cell(x_cell[i], (hx_lstm_cell, cx_lstm_cell))
    outputs_lstm_cell.append(hx_lstm_cell)
outputs_lstm_cell = torch.stack(outputs_lstm_cell, dim=0)
print("LSTMCell Output Shape:", outputs_lstm_cell.shape)

print("\n========== 6. nn.GRUCell ==========")
# 6. GRUCell
gru_cell = nn.GRUCell(input_size=input_size, hidden_size=hidden_size)
hx_gru_cell = torch.randn(batch_size, hidden_size)
outputs_gru_cell = []
for i in range(seq_len):
    hx_gru_cell = gru_cell(x_cell[i], hx_gru_cell)
    outputs_gru_cell.append(hx_gru_cell)
outputs_gru_cell = torch.stack(outputs_gru_cell, dim=0)
print("GRUCell Output Shape:", outputs_gru_cell.shape)


In [None]:
class SimpleLSTM(nn.Module):
    def __init__(self, input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, sequence_length=sequence_length, num_classes=num_classes):
        super(SimpleLSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device=device)
        
        out, _ = self.lstm(x,(h0, c0))
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        return out

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
model = SimpleLSTM().to(device=device)

In [None]:
import pandas as pd
import numpy as np

class MnistDataset(Dataset):
    def __init__(self, datapath):
        super(MnistDataset).__init__()
        df = pd.read_csv(datapath, dtype=np.float)
        
        self.x = torch.from_numpy(df.iloc[:, 1:].values)
        self.x = self.x.reshape(self.x.size(0), 1, 28, 28).squeeze(1) # GRU and RNN expect N * 28 * 28
        self.x = self.x.float()
        
        self.y = torch.from_numpy(df.iloc[:, 0].values)
        self.y = self.y.long()
        
        self.n_samples = df.shape[0]
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

In [None]:
train_dataset = MnistDataset("../input/mnist-in-csv/mnist_train.csv")
test_dataset = MnistDataset("../input/mnist-in-csv/mnist_test.csv")

In [None]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
loss_criterion  = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [None]:
current_loss = 0
for epoch in range(num_epochs):
    for data, target in train_dataloader:
        data = data.to(device=device)
        target = target.to(device=device)
        
        score = model(data)
        loss = loss_criterion(score, target)
        current_loss = loss
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
    print(f"At epoch: {epoch}, loss: {current_loss}")

In [None]:
def check_accuracy(dlr,model):
    
    total_correct = 0
    total_samples = 0
    
    model.eval()
    
    with torch.no_grad():
        for x, y in dlr:
            x = x.to(device=device)
            y = y.to(device=device)
            
            score = model(x)
            _,predictions = score.max(1)
            
            total_correct += (y==predictions).sum()
            total_samples += predictions.size(0)
            
    model.train()
    print(f"total samples: {total_samples} total_correct: {total_correct} accuracy : {float(total_correct/total_samples)* 100}")