In [None]:
import torch
import torch.nn as nn  # for neural network layers
import torch.optim as optim  # for optimization algorithms
import torch.nn.functional as F  # for activation and loss functions
from torch.utils.data import DataLoader  # for batching and loading datasets
import torchvision.datasets as datasets  # for standard datasets like MNIST, CIFAR10, etc.
import torchvision.transforms as transforms  # for data transformations (normalization, augmentation)


In [27]:

class NN(nn.Module):  # ✅ 'Module' not 'Moulde'
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()  # ✅ correct super() call
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))   # ✅ apply ReLU after first layer
        x = self.fc2(x)           # ✅ last layer usually without activation for classification
        return x

In [28]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [29]:
#hyper parameters
input_size=784
num_classes=10
learning_rate=0.001
batch_size= 64
num_epochs= 1

In [30]:
#data loading
train_dataset=datasets.MNIST(root='dataset/',train=True, transform=transforms.ToTensor(),download=True)
train_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset= datasets.MNIST(root='dataset/',train=False, transform=transforms.ToTensor(),download=True)
test_loader= DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

In [31]:
#model
model=NN(input_size=input_size,num_classes=num_classes).to(device)

In [32]:
#loss and optimizer
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)

In [39]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data & targets to device (CPU or GPU)
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Flatten images for fully connected network
        data = data.reshape(data.shape[0], -1)  # or data.view(data.shape[0], -1)

        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass
        optimizer.zero_grad()  # ✅ correct method is zero_grad(), not zerograd()
        loss.backward()

        # Update weights
        optimizer.step()






In [40]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()  # evaluation mode (disables dropout/batchnorm)

    with torch.no_grad():  # no gradient computation needed
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)  # flatten images

            scores = model(x)
            _, predictions = scores.max(1)  # predicted class

            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)

    acc = float(num_correct) / float(num_samples)
    print(f'Got {num_correct}/{num_samples} with accuracy {acc*100:.2f}%')

    model.train()  # back to training mode
    return acc




In [42]:
check_accuracy(train_loader,model)

Got 55858/60000 with accuracy 93.10%


0.9309666666666667

In [43]:
check_accuracy(test_loader,model)

Got 55858/60000 with accuracy 93.10%


0.9309666666666667