In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary
from tqdm import tqdm

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


### Hyperparameters tuning

In [3]:
# Hyperparameters (can be tuned)
num_epochs = 10
batch_size = 512
learning_rate = 0.001
dropout_rate = 0.2

In [5]:
# Data Loaders
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=False)

print(f'Training dataset size: {len(train_loader.dataset)}')
print(f'Validation dataset size: {len(test_loader.dataset)}')

Training dataset size: 60000
Validation dataset size: 10000


### Model Architecture

In [26]:
# Try1
class TinyCNN(nn.Module):
    def __init__(self):
        super(TinyCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input -? OUtput? RF
        self.conv2 = nn.Conv2d(32, 32, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv4 = nn.Conv2d(64, 16, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv5 = nn.Conv2d(16, 8, 3)
        self.conv6 = nn.Conv2d(8, 16, 3)
        self.conv7 = nn.Conv2d(16, 10, 3)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = F.relu(self.conv7(x))
        x = x.view(-1, 10)
        return F.log_softmax(x)


In [27]:
model = TinyCNN().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 32, 28, 28]           9,248
         MaxPool2d-3           [-1, 32, 14, 14]               0
            Conv2d-4           [-1, 64, 14, 14]          18,496
            Conv2d-5           [-1, 16, 14, 14]           9,232
         MaxPool2d-6             [-1, 16, 7, 7]               0
            Conv2d-7              [-1, 8, 5, 5]           1,160
            Conv2d-8             [-1, 16, 3, 3]           1,168
            Conv2d-9             [-1, 10, 1, 1]           1,450
Total params: 41,074
Trainable params: 41,074
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.56
Params size (MB): 0.16
Estimated Total Size (MB): 0.72
---------------------------------------------

  return F.log_softmax(x)


In [18]:
# Model Instance and Optimizer
model = TinyCNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def train(epoch):
    model.train()
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs}")):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
    train_accuracy = 100 * correct / total
    return train_accuracy

In [30]:
# Testing Loop
def test(loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in tqdm(loader, desc="Testing"):
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    test_accuracy = 100 * correct / total
    return test_accuracy

In [28]:
# Hyperparameters (can be tuned)
num_epochs = 10
batch_size = 64
learning_rate = 0.005
dropout_rate = 0.5

In [31]:
for epoch in range(1, num_epochs+1):
    train_acc = train(epoch)
    test_acc = test(test_loader)
    print(f"Epoch {epoch}/{num_epochs}, Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%")

  return F.log_softmax(x)
Epoch 1/10: 100%|██████████| 118/118 [00:09<00:00, 12.78it/s]
Testing: 100%|██████████| 20/20 [00:01<00:00, 12.20it/s]


TypeError: unsupported format string passed to NoneType.__format__

In [6]:
# define the train function

from tqdm import tqdm

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')

In [None]:
# define the test function

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [3]:
# Function to train the model
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    total_loss = 0.0
    correct = 0
    total = 0
    for epoch in range(num_epochs):
        model.train()
        print(f"Training started for epoch {epoch + 1}...")
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            
            if batch_idx % 100 == 0:
                print(f'Epoch {epoch + 1}, Batch {batch_idx}/{len(train_loader)} - Loss: {loss.item():.4f}, Accuracy: {100 * correct / total:.2f}%')
    
    # Calculate the final training accuracy for the last epoch
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(train_loader)
    return avg_loss, accuracy

In [None]:
# Function to validate the model
def validate_model(model, val_loader, criterion):
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    print("Validation started...")
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += criterion(output, target).item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            
            if batch_idx % 100 == 0:
                print(f'Validation Batch {batch_idx}/{len(val_loader)} - Loss: {val_loss/(batch_idx+1):.4f}, Accuracy: {100 * correct / total:.2f}%')

    accuracy = 100 * correct / total
    val_loss /= len(val_loader)
    return val_loss, accuracy

# Function to evaluate the model's performance
def evaluate_accuracy(correct, total):
    accuracy = 100 * correct / total
    return accuracy

In [8]:
# Define the neural network model with convolution and max pooling layers

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1) #input -? OUtput? RF
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(16, 16, 3, padding=1)
        self.conv4 = nn.Conv2d(16, 8, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv5 = nn.Conv2d(8, 8, 3)
        self.conv6 = nn.Conv2d(8, 8, 3)
        self.conv7 = nn.Conv2d(8, 10, 3)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = F.relu(self.conv7(x))
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [9]:

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
            Conv2d-2           [-1, 16, 28, 28]           1,168
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Conv2d-4           [-1, 16, 14, 14]           2,320
            Conv2d-5            [-1, 8, 14, 14]           1,160
         MaxPool2d-6              [-1, 8, 7, 7]               0
            Conv2d-7              [-1, 8, 5, 5]             584
            Conv2d-8              [-1, 8, 3, 3]             584
            Conv2d-9             [-1, 10, 1, 1]             730
Total params: 6,626
Trainable params: 6,626
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.21
Params size (MB): 0.03
Estimated Total Size (MB): 0.24
-----------------------------------------------

  return F.log_softmax(x)


In [10]:
# run the model

batch_size = 8192
num_epochs = 3

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model = Net().to(device)

# Train the model and collect training loss
train_loss, train_accuracy = train_model(model, train_loader, criterion, optimizer, num_epochs)
print(f"Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%")

# Validate the model
val_loss, val_accuracy = validate_model(model, test_loader, criterion)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

Training started for epoch 1...


  return F.log_softmax(x)


Epoch 1, Batch 0/8 - Loss: 2.3021, Accuracy: 9.97%
Training started for epoch 2...
Epoch 2, Batch 0/8 - Loss: 2.3023, Accuracy: 9.79%
Training started for epoch 3...
Epoch 3, Batch 0/8 - Loss: 2.3027, Accuracy: 9.72%
Training Loss: 6.9073, Training Accuracy: 9.75%
Validation started...


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor