In [27]:
import torch
import torch.nn as nn  # for neural network layers
import torch.optim as optim  # for optimization algorithms
import torch.nn.functional as F  # for activation and loss functions
from torch.utils.data import DataLoader  # for batching and loading datasets
import torchvision.datasets as datasets  # for standard datasets like MNIST, CIFAR10, etc.
import torchvision.transforms as transforms  # for data transformations (normalization, augmentation)


In [28]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [29]:
#hyper parameters
input_size=28
sequence_length=28
num_layers=2
hidden_size=256
num_classes=10
learning_rate=0.001
batch_size= 64
num_epochs= 2

In [4]:
class RNN(nn.Module):
  def __init__(self, input_size,hidden_size, num_layers,num_classes,sequence_length):
    super(RNN,self).__init__()
    self.hidden_size=hidden_size
    self.num_layers =num_layers
    self.sequence_length =sequence_length
    self.rnn= nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
    self.fc=nn.Linear(hidden_size*sequence_length,num_classes)
    #time features
  def forward(self,x):
    h0=torch.zeros(self.num_layers, x.size(0),self.hidden_size).to(device)
    out,_=self.rnn(x,h0)
    out=out.reshape(out.shape[0],-1)
    out=self.fc(out)
    return out

In [5]:
#data loading
train_dataset=datasets.MNIST(root='dataset/',train=True, transform=transforms.ToTensor(),download=True)
train_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset= datasets.MNIST(root='dataset/',train=False, transform=transforms.ToTensor(),download=True)
test_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 36.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.11MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 9.87MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.41MB/s]


In [6]:
#model
model=RNN( input_size,hidden_size, num_layers,num_classes,sequence_length).to(device)

In [30]:
#loss and optimizer
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data & targets to device (CPU or GPU)
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)


        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()  # ✅ correct method is zero_grad(), not zerograd()
        loss.backward()

        # Update weights
        optimizer.step()






In [14]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()  # evaluation mode (disables dropout/batchnorm)

    with torch.no_grad():  # no gradient computation needed
        for x, y in loader:
            x = x.to(device=device).squeeze(1)
            y = y.to(device=device)


            scores = model(x)
            _, predictions = scores.max(1)  # predicted class

            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)

    acc = float(num_correct) / float(num_samples)
    print(f'Got {num_correct}/{num_samples} with accuracy {acc*100:.2f}%')

    model.train()  # back to training mode
    return acc




In [15]:
check_accuracy(train_loader,model)

Got 58405/60000 with accuracy 97.34%


0.9734166666666667

In [16]:
check_accuracy(test_loader,model)

Got 58405/60000 with accuracy 97.34%


0.9734166666666667

In [35]:
class GruNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, sequence_length):
        super(GruNet, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.sequence_length = sequence_length

        # GRU layer
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )

        # Fully connected layer (using all timesteps)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        batch_size = x.size(0)

        # Initial hidden state
        h0=torch.zeros(self.num_layers, x.size(0),self.hidden_size).to(device)

        # GRU forward pass
        out, _ = self.gru(x, h0)     # out: [batch, seq_len, hidden_size]

        # Flatten all time steps: [batch, seq_len * hidden]
        out = out.contiguous().view(batch_size, -1)

        # Final classification layer
        out = self.fc(out)

        return out


In [36]:
model_gru=GruNet( input_size,hidden_size, num_layers,num_classes,sequence_length).to(device)

In [38]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data & targets to device (CPU or GPU)
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)


        # Forward pass
        scores = model_gru(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()  # ✅ correct method is zero_grad(), not zerograd()
        loss.backward()

        # Update weights
        optimizer.step()

In [21]:
check_accuracy(test_loader,model_gru)

Got 6903/60000 with accuracy 11.51%


0.11505

In [53]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [55]:
model_lstm=LSTM( input_size,hidden_size, num_layers,num_classes).to(device)

In [56]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data & targets to device (CPU or GPU)
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)


        # Forward pass
        scores = model_lstm(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()  # ✅ correct method is zero_grad(), not zerograd()
        loss.backward()

        # Update weights
        optimizer.step()

In [57]:
check_accuracy(test_loader,model_lstm)

Got 5958/60000 with accuracy 9.93%


0.0993

In [58]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        """
        input_size  : number of features in input at each timestep
        hidden_size : number of features in LSTM hidden state
        num_layers  : number of stacked LSTM layers
        num_classes : output classes (for classification) or 1 for regression
        """
        super(LSTMModel, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True  # batch dimension is first: [batch, seq_len, features]
        )

        # Fully connected layer maps last hidden state to output
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        """
        x: input tensor of shape [batch_size, seq_length, input_size]
        """
        batch_size = x.size(0)

        # Initialize hidden state and cell state with zeros
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=x.device)

        # Pass through LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: [batch, seq_length, hidden_size]

        # Take the last time step's hidden state
        out = self.fc(out[:, -1, :])      # out: [batch, num_classes]

        return out

# Example usage:
if __name__ == "__main__":
    # Hyperparameters
    input_size = 10    # features per timestep
    hidden_size = 32
    num_layers = 2
    num_classes = 3
    seq_length = 5
    batch_size = 8

    # Create dummy input: [batch_size, seq_length, input_size]
    x = torch.randn(batch_size, seq_length, input_size)

    # Create model
    model = LSTMModel(input_size, hidden_size, num_layers, num_classes)

    # Forward pass
    output = model(x)
    print("Output shape:", output.shape)  # should be [batch_size, num_classes]


Output shape: torch.Size([8, 3])


In [59]:
import torch
import torch.nn as nn

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        """
        input_size  : number of features in input at each timestep
        hidden_size : number of features in GRU hidden state
        num_layers  : number of stacked GRU layers
        num_classes : output classes (for classification) or 1 for regression
        """
        super(GRUModel, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU layer
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True  # batch dimension is first: [batch, seq_len, features]
        )

        # Fully connected layer maps last hidden state to output
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        """
        x: input tensor of shape [batch_size, seq_length, input_size]
        """
        batch_size = x.size(0)

        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=x.device)

        # Pass through GRU
        out, _ = self.gru(x, h0)  # out: [batch, seq_length, hidden_size]

        # Take the last time step's hidden state
        out = self.fc(out[:, -1, :])  # out: [batch, num_classes]

        return out

# Example usage:
if __name__ == "__main__":
    # Hyperparameters
    input_size = 10
    hidden_size = 32
    num_layers = 2
    num_classes = 3
    seq_length = 5
    batch_size = 8

    # Create dummy input: [batch_size, seq_length, input_size]
    x = torch.randn(batch_size, seq_length, input_size)

    # Create model
    model = GRUModel(input_size, hidden_size, num_layers, num_classes)

    # Forward pass
    output = model(x)
    print("Output shape:", output.shape)  # should be [batch_size, num_classes]


Output shape: torch.Size([8, 3])


In [None]:
import torch

def compute_accuracy(predictions, targets):
    """
    Compute accuracy for classification.

    predictions: tensor of shape [batch_size, num_classes] (raw logits from model)
    targets: tensor of shape [batch_size] (ground truth labels)
    """
    # Get predicted class (the one with highest score)
    _, predicted_classes = torch.max(predictions, dim=1)

    # Compare with ground truth
    correct = (predicted_classes == targets).sum().item()
    total = targets.size(0)

    accuracy = correct / total
    return accuracy

# Example usage:
if __name__ == "__main__":
    batch_size = 8
    num_classes = 3

    # Dummy predictions (logits)
    outputs = torch.randn(batch_size, num_classes)
    # Dummy ground truth labels
    labels = torch.randint(0, num_classes, (batch_size,))

    acc = compute_accuracy(outputs, labels)
    print("Accuracy:", acc)
