In [1]:
import torch
from torch.utils.data import DataLoader # for dividing data into small chunks
from torchvision import datasets # we will be getting data sets from theis module
from torchvision.transforms import ToTensor # the data we will get will be in the form of image using this we will convert it to tensor

In [2]:
train_set = datasets.MNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor()
)

test_set = datasets.MNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.04MB/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 134kB/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:06<00:00, 243kB/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 5.08MB/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [3]:
train_dataloader = DataLoader(train_set, batch_size = 128, shuffle = True)
test_dataloader = DataLoader(test_set, batch_size = 128, shuffle = True)

In [4]:
import torch.nn as nn # neural network library i need

class MNIST_MLP(nn.Module): # this class has to be a child of nn.Module
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten() # convert the matrix to a vector
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28,512),
        nn.ReLU(),
        nn.Linear(512,512), # i can choose any number of intermediate node its totally random
        nn.ReLU(),
        nn.Linear(512,10)
    ) # nn.Sequential creates a pipeline

  def forward(self,x): # forward propogation
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
#if cuda is available then use cuda otherwise cpu
model = MNIST_MLP().to(device)
print(model)
#create instance of  MNIST_MLP , to(device) if you use network is ported into gpu and print the model structure

Using cuda device
MNIST_MLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
# lets create random data
x = torch.rand(1,28,28,device = device)
logits = model(x)
print(logits)

tensor([[-0.0360,  0.0120, -0.0069, -0.0415,  0.0558,  0.0210,  0.1261, -0.0029,
         -0.0372,  0.0680]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [7]:
# create this data into probabilties
pred_prob = nn.Softmax(dim = 1)(logits)
print(pred_prob)
# dim=1 means we use it as a column vector

tensor([[0.0948, 0.0995, 0.0976, 0.0943, 0.1039, 0.1004, 0.1115, 0.0980, 0.0947,
         0.1052]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


In [10]:
# training parameters

learning_rate = 1e-3 # 0.001
max_epochs = 30

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [11]:
def train_mlp(dataloader, model, loss_fn, optimizer,max_epochs):
  size = len(dataloader.dataset)
  model.train()

  for epoch in range(max_epochs):
    for batch, (X,y) in enumerate(dataloader):
      X,y = X.to(device), y.to(device) # porting from CPU to GPU
      pred = model(X) # Forward Prapogation
      loss = loss_fn(pred,y) # Loss Calculation

      optimizer.zero_grad() # gradient reset to zero
      loss.backward() # gradient computation
      optimizer.step() # weight updation
      if batch % 100 == 0:
        loss ,current = loss.item(), batch * len(X)
        print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")


def test_mlp(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0,0
  for X,y in dataloader:
    X,y = X.to(device), y.to(device)
    pred = model(X)
    test_loss += loss_fn(pred,y).item()
    correct += (pred.argmax(1) == y).type(torch.float).sum().item()# index of the max probability same as actual label
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

train_mlp(train_dataloader, model, loss_fn, optimizer, max_epochs)
test_mlp(test_dataloader, model, loss_fn)

loss: 2.310499 [    0/60000]
loss: 2.300726 [12800/60000]
loss: 2.294435 [25600/60000]
loss: 2.287180 [38400/60000]
loss: 2.287267 [51200/60000]
loss: 2.281113 [    0/60000]
loss: 2.285122 [12800/60000]
loss: 2.268475 [25600/60000]
loss: 2.274605 [38400/60000]
loss: 2.268339 [51200/60000]
loss: 2.263160 [    0/60000]
loss: 2.255666 [12800/60000]
loss: 2.254948 [25600/60000]
loss: 2.238776 [38400/60000]
loss: 2.242610 [51200/60000]
loss: 2.235823 [    0/60000]
loss: 2.229156 [12800/60000]
loss: 2.217221 [25600/60000]
loss: 2.200912 [38400/60000]
loss: 2.209281 [51200/60000]
loss: 2.201340 [    0/60000]
loss: 2.185786 [12800/60000]
loss: 2.172340 [25600/60000]
loss: 2.157135 [38400/60000]
loss: 2.166742 [51200/60000]
loss: 2.135669 [    0/60000]
loss: 2.151598 [12800/60000]
loss: 2.113892 [25600/60000]
loss: 2.117150 [38400/60000]
loss: 2.110643 [51200/60000]
loss: 2.090275 [    0/60000]
loss: 2.091150 [12800/60000]
loss: 2.071739 [25600/60000]
loss: 2.039923 [38400/60000]
loss: 2.037581

In [19]:
class LeNet_MNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1,6,5) # in-channel - 1, out-channels - 6, kernel size - 5, stride(default) - 1, padding(default) -0
    self.pool = nn.MaxPool2d(2,2) # kernel size -2, stride - 2
    self.conv2 = nn.Conv2d(6,16,5)
    self.fc1 = nn.Linear(16*4*4,120)
    self.fc2 = nn.Linear(120,84)
    self.fc3 = nn.Linear(84,10)
    # instantiate ReLU here
    self.relu = nn.ReLU()

  def forward(self,x):
    # apply ReLU using the instantiated object
    x = self.pool(self.relu(self.conv1(x)))
    x = self.pool(self.relu(self.conv2(x)))
    x = x.view(-1,16*4*4)
    x = self.relu(self.fc1(x))
    x = self.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [20]:
model_lenet = LeNet_MNIST().to(device)

optimizer_lenet = torch.optim.SGD(model_lenet.parameters(), lr=learning_rate, momentum=0.9)


model_lenet.train()
for epoch in range(3):
  for batch, (X,y) in enumerate(train_dataloader):
    X,y = X.to(device), y.to(device) # porting from CPU to GPU
    pred = model_lenet(X) # Forward Prapogation
    loss = loss_fn(pred,y) # Loss Calculation

    optimizer_lenet.zero_grad() # gradient reset to zero
    loss.backward() # gradient computation
    optimizer_lenet.step() # weight updation
    if batch % 100 == 0:
      cur_loss ,current = loss.item(), batch * len(X)
      print(f"loss: {cur_loss:>7f}")

loss: 2.307795
loss: 2.299750
loss: 2.295894
loss: 2.297123
loss: 2.288498
loss: 2.288259
loss: 2.283167
loss: 2.250010
loss: 2.242318
loss: 2.107315
loss: 1.984722
loss: 1.477033
loss: 0.910455
loss: 0.906018
loss: 0.581109


In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2)  # Input: 1x28x28, Output: 6x28x28
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)  # Input: 6x28x28, Output: 16x24x24
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # Fully connected layer
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # Output layer for 10 classes

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)  # Output: 6x14x14
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)  # Output: 16x5x5
        x = x.view(-1, 16 * 5 * 5)  # Flatten
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)  # Output logits
        return x

# Data preparation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to mean 0, std 1
])

train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Model, loss function, optimizer
model = LeNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Training loop
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # Forward pass
        output = model(data)
        loss = criterion(output, target)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

# Testing loop
def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)

            # Sum up batch loss
            test_loss += criterion(output, target).item()

            # Get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n")

# Main training/testing process
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, device, train_loader, optimizer, criterion, epoch)
    test(model, device, test_loader, criterion)



Test set: Average loss: 0.0001, Accuracy: 9762/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9831/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9856/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9877/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9912/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9881/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9897/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9903/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9895/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9907/10000 (99%)

