In [None]:
import torch
import torch.nn as nn  # for neural network layers
import torch.optim as optim  # for optimization algorithms
import torch.nn.functional as F  # for activation and loss functions
from torch.utils.data import DataLoader  # for batching and loading datasets
import torchvision.datasets as datasets  # for standard datasets like MNIST, CIFAR10, etc.
import torchvision.transforms as transforms  # for data transformations (normalization, augmentation)


In [None]:

class NN(nn.Module):  # ✅ 'Module' not 'Moulde'
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()  # ✅ correct super() call
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))   # ✅ apply ReLU after first layer
        x = self.fc2(x)           # ✅ last layer usually without activation for classification
        return x

In [None]:
class CNN(nn.Module):
    def __init__(self, in_channel=1, num_classes=10):
        super(CNN, self).__init__()

        # First convolutional layer
        # Input: in_channel x 28 x 28 (e.g., grayscale MNIST image)
        # Output: 8 x 28 x 28 (padding=1 keeps spatial size same)
        # kernel_size=(3,3) scans 3x3 patches, stride=1 moves one pixel at a time
        self.conv1 = nn.Conv2d(
            in_channels=in_channel,
            out_channels=8,
            kernel_size=(3,3),
            stride=(1,1),
            padding=(1,1)
        )

        # Max pooling layer
        # Reduces each feature map spatially by half
        # kernel_size=(2,2), stride=(2,2) → 28x28 becomes 14x14
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))

        # Second convolutional layer
        # Input: 8 x 14 x 14 (from previous pooling)
        # Output: 16 x 14 x 14 (padding=1 keeps spatial size)
        self.conv2 = nn.Conv2d(
            in_channels=8,
            out_channels=16,
            kernel_size=(3,3),
            stride=(1,1),
            padding=(1,1)
        )

        # Fully connected (linear) layer
        # Input: 16 feature maps of size 7x7 → flattened to 16*7*7=784
        # Output: num_classes neurons (e.g., 10 for MNIST)
        self.fc1 = nn.Linear(16*7*7, num_classes)

    def forward(self, x):
        # Apply first convolution + ReLU activation
        # ReLU introduces non-linearity
        x = F.relu(self.conv1(x))

        # Apply max pooling
        x = self.pool(x)  # reduces spatial size from 28x28 → 14x14

        # Apply second convolution + ReLU activation
        x = F.relu(self.conv2(x))

        # Apply second max pooling
        x = self.pool(x)  # reduces spatial size from 14x14 → 7x7

        # Flatten the 16x7x7 feature maps into a single vector per example
        x = torch.flatten(x, 1)  # shape: [batch_size, 16*7*7]

        # Fully connected layer: output logits for each class
        x = self.fc1(x)  # shape: [batch_size, num_classes]

        # Return raw logits (will be used with CrossEntropyLoss)
        return x



In [None]:
model=CNN()
x=torch.randn(64,1,28,28)
print(model(x).shape)

torch.Size([64, 10])


In [None]:
#hyper parameters
in_channel=1
num_classes=10
learning_rate=0.001
batch_size= 64
num_epochs= 1

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
#data loading
train_dataset=datasets.MNIST(root='dataset/',train=True, transform=transforms.ToTensor(),download=True)
train_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset= datasets.MNIST(root='dataset/',train=False, transform=transforms.ToTensor(),download=True)
test_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

In [None]:
#model
model=CNN().to(device)

In [None]:
#loss and optimizer
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data & targets to device (CPU or GPU)
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()  # ✅ correct method is zero_grad(), not zerograd()
        loss.backward()

        # Update weights
        optimizer.step()


In [None]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()  # evaluation mode (disables dropout/batchnorm)

    with torch.no_grad():  # no gradient computation needed
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)


            scores = model(x)
            _, predictions = scores.max(1)  # predicted class

            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)

    acc = float(num_correct) / float(num_samples)
    print(f'Got {num_correct}/{num_samples} with accuracy {acc*100:.2f}%')

    model.train()  # back to training mode
    return acc


In [None]:
check_accuracy(train_loader,model)

Got 57894/60000 with accuracy 96.49%


0.9649

In [None]:
check_accuracy(test_loader,model)

Got 57894/60000 with accuracy 96.49%


0.9649

In [None]:
def main():

    check_accuracy(test_loader,model)

if __name__ == "__main__":
    main()

Got 57894/60000 with accuracy 96.49%


## NN **model**

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
#hyper parameters
input_size=784
num_classes=10
learning_rate=0.001
batch_size= 64
num_epochs= 1

In [None]:
#data loading
train_dataset=datasets.MNIST(root='dataset/',train=True, transform=transforms.ToTensor(),download=True)
train_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset= datasets.MNIST(root='dataset/',train=False, transform=transforms.ToTensor(),download=True)
test_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

In [None]:
#model
model=NN(input_size=input_size,num_classes=num_classes).to(device)

In [None]:
#loss and optimizer
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data & targets to device (CPU or GPU)
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Flatten images for fully connected network
        data = data.reshape(data.shape[0], -1)  # or data.view(data.shape[0], -1)

        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()  # ✅ correct method is zero_grad(), not zerograd()
        loss.backward()

        # Update weights
        optimizer.step()






In [None]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()  # evaluation mode (disables dropout/batchnorm)

    with torch.no_grad():  # no gradient computation needed
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)  # flatten images

            scores = model(x)
            _, predictions = scores.max(1)  # predicted class

            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)

    acc = float(num_correct) / float(num_samples)
    print(f'Got {num_correct}/{num_samples} with accuracy {acc*100:.2f}%')

    model.train()  # back to training mode
    return acc




In [None]:
check_accuracy(train_loader,model)

Got 55858/60000 with accuracy 93.10%


0.9309666666666667

In [None]:
check_accuracy(test_loader,model)

Got 55858/60000 with accuracy 93.10%


0.9309666666666667