In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
import wandb
epochs = 20

In [2]:
def set_seed(seed = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)

In [3]:
wandb.init(project="CNN on MNIST pytorch", save_code=True,
           config={
               "batch_size": 128,
               "epochs": 20,
               "optimizer": "Adam",
               "loss function": "CrossEntropyLoss",
           }
           )

config = wandb.config

[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from C:\Users\SMAYA\_netrc.
[34m[1mwandb[0m: Currently logged in as: [33mb25cs1065[0m ([33mb25cs1065-indian-institute-of-technology-jodhpur[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"using device {device}")

using device cuda:0


In [5]:
#Prepare the MNIST dataset in this cell
''
train_transform = transforms.Compose([transforms.ToTensor(),
                                transforms.RandomRotation(degrees=10),
                                transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), scale=(0.7, 1.3)),
                                transforms.Normalize((0.5,), (0.5,))])
''
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=test_transform)
print(f"training data shape: {trainset.data.shape}")
print(f"training labels shape: {trainset.targets.shape}")
print(f"testing data shape: {testset.data.shape}")
print(f"testing labels shape: {testset.targets.shape}")
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle = True)

training data shape: torch.Size([60000, 28, 28])
training labels shape: torch.Size([60000])
testing data shape: torch.Size([10000, 28, 28])
testing labels shape: torch.Size([10000])


In [6]:
class Neural_network(nn.Module):
    def __init__(self):
        super(Neural_network, self).__init__()
        self.conv1 = nn.Conv2d(1, 12, 3, padding=1)#output dim = 14x14x8
        self.conv2 = nn.Conv2d(12, 24, 3)#output dim = 7x7x16
        self.dropout = nn.Dropout(p=0.1)
        self.normalization1 = nn.BatchNorm2d(12)
        self.normalization2 = nn.BatchNorm2d(24)
        self.normalization3 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(24, 32, 3)#output dim = 3x3x20
        self.fc1 = nn.Linear(4*4*32, 16)
        self.fc2 = nn.Linear(16, 10)
        #self.fc3 = nn.Linear(16, 10)
        self.pool = nn.MaxPool2d(2, 2)
    def forward(self,x):
        x= self.pool(F.leaky_relu(self.normalization1(self.conv1(x))))
        x= self.pool(F.leaky_relu(self.normalization2(self.conv2(x))))
        x= F.leaky_relu(self.normalization3(self.conv3(x)))
        #print(f"Shape after conv3 and pool: {x.shape}")
        x= x.view(-1, 4*4*32)# understand the math of this line
        x= F.leaky_relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        #x = self.fc3(x)
        return x
    
net = Neural_network()
net.to(device)

Neural_network(
  (conv1): Conv2d(1, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1))
  (dropout): Dropout(p=0.1, inplace=False)
  (normalization1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (normalization2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (normalization3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(24, 32, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=10, bias=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [7]:
total_params = sum(p.numel() for p in net.parameters())
print(f"Total number of parameters in the model: {total_params}")

Total number of parameters in the model: 18194


In [8]:
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)#does net.parameters just make a vector of all the parameters in the model?

In [9]:
print(f"{trainloader}")

<torch.utils.data.dataloader.DataLoader object at 0x0000023EF08B60D0>


In [10]:
#have to decide how to train the model in this cell ie epochs, batch size, backpropagation etc.
'''
epochs = 20
for epoch in range(epochs):
    dynamic_loss = 0.0
    for i,data in enumerate(trainloader):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_function(outputs, F.one_hot(labels, num_classes=10).float())
        loss.backward()
        optimizer.step()
        dynamic_loss += loss
        if i%200 ==99:
            print(f"epoch {epoch+1}, batch {i+1}, loss: {dynamic_loss/400}")
            dynamic_loss = 0.0 
'''

'\nepochs = 20\nfor epoch in range(epochs):\n    dynamic_loss = 0.0\n    for i,data in enumerate(trainloader):\n        inputs, labels = data[0].to(device), data[1].to(device)\n        optimizer.zero_grad()\n        outputs = net(inputs)\n        loss = loss_function(outputs, F.one_hot(labels, num_classes=10).float())\n        loss.backward()\n        optimizer.step()\n        dynamic_loss += loss\n        if i%200 ==99:\n            print(f"epoch {epoch+1}, batch {i+1}, loss: {dynamic_loss/400}")\n            dynamic_loss = 0.0 \n'

In [11]:
'''
from tqdm import tqdm
pbar = tqdm(trainloader, desc=f"Epoch {epoch+1}")
for batch_idx, (data,target) in enumerate(pbar):
    inputs, labels = data[0].to(device), data[1].to(device)
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = loss_function(outputs, F.one_hot(labels, num_classes=10).float())
    loss.backward()
    optimizer.step()
    pbar.set_postfix({'loss': loss.item()})
    wandb.log({"train_loss": loss.item()})
    dynamic_loss += loss
    if i%200 ==99:
        print(f"epoch {epoch+1}, batch {i+1}, loss: {dynamic_loss/400}")
        dynamic_loss = 0.0 
'''

'\nfrom tqdm import tqdm\npbar = tqdm(trainloader, desc=f"Epoch {epoch+1}")\nfor batch_idx, (data,target) in enumerate(pbar):\n    inputs, labels = data[0].to(device), data[1].to(device)\n    optimizer.zero_grad()\n    outputs = net(inputs)\n    loss = loss_function(outputs, F.one_hot(labels, num_classes=10).float())\n    loss.backward()\n    optimizer.step()\n    pbar.set_postfix({\'loss\': loss.item()})\n    wandb.log({"train_loss": loss.item()})\n    dynamic_loss += loss\n    if i%200 ==99:\n        print(f"epoch {epoch+1}, batch {i+1}, loss: {dynamic_loss/400}")\n        dynamic_loss = 0.0 \n'

In [12]:
def train(net, device, train_loader, optimizer, epoch):
    net.train()
    # tqdm creates the progress bar
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}")
    train_correct =0
    train_total =0
    for i, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = net(data)
        loss = loss_function(outputs, target)
        loss.backward()
        optimizer.step()
        _, train_predicted = torch.max(outputs, 1)
        train_total += len(target)
        train_correct += (train_predicted == target).sum().item()
        # Update the bar with the latest loss
        pbar.set_description(desc=f'Epoch {epoch} Loss={loss.item():.4f}')
        
        # Log training loss to wandb
        wandb.log({"train_loss": loss.item(), "train_accuracy": 100.0 * train_correct / train_total})
        pbar.set_postfix({'loss': loss.item(), 'train_accuracy': 100.0 * train_correct / train_total})

def test(net, device, testloader):
    net.eval()
    correct_labels = 0
    test_loss = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            test_loss += loss_function(outputs, labels).item()
            #print(f"outputs.data = {outputs.data}")
            _, predicted = torch.max(outputs, 1)
            #if (predicted == labels).sum().item():
            correct_labels += (predicted == labels).sum().item()
    test_loss /= len(testloader.dataset)
    accuracy = 100.0 * correct_labels / len(testloader.dataset)
    wandb.log({"test_accuracy": accuracy, "test_loss": test_loss})
    print(f"\n Test Set: Accuracy: {accuracy:.4f}%, test_loss: {test_loss} \n")
# Log test results to wandb
    return accuracy

In [13]:
'''correct_labels = 0
total_labels = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        probs = F.softmax(outputs, dim =1)
        #print(f"outputs.data = {outputs.data}")
        _, predicted = torch.max(probs, 1)
        total_labels += labels.size(0)
        #if (predicted == labels).sum().item():
        correct_labels += (predicted == labels).sum().item()
        wandb.log({"test_accuracy": 100*correct_labels/total_labels})
print(f"accuracy of the network on the 10000 test images: {100*correct_labels/total_labels} %")
'''

'correct_labels = 0\ntotal_labels = 0\nwith torch.no_grad():\n    for data in testloader:\n        images, labels = data[0].to(device), data[1].to(device)\n        outputs = net(images)\n        probs = F.softmax(outputs, dim =1)\n        #print(f"outputs.data = {outputs.data}")\n        _, predicted = torch.max(probs, 1)\n        total_labels += labels.size(0)\n        #if (predicted == labels).sum().item():\n        correct_labels += (predicted == labels).sum().item()\n        wandb.log({"test_accuracy": 100*correct_labels/total_labels})\nprint(f"accuracy of the network on the 10000 test images: {100*correct_labels/total_labels} %")\n'

In [14]:
for epoch in range(1, config.epochs + 1):
    train(net, device, trainloader, optimizer, epoch)
    test_accuracy = test(net, device, testloader)
wandb.finish()

Epoch 1 Loss=0.4172: 100%|██████████| 469/469 [02:00<00:00,  3.90it/s, loss=0.417, train_accuracy=72.2]



 Test Set: Accuracy: 97.1100%, test_loss: 0.0008295545760542154 



Epoch 2 Loss=0.2389: 100%|██████████| 469/469 [14:37<00:00,  1.87s/it, loss=0.239, train_accuracy=90.4]    



 Test Set: Accuracy: 97.0600%, test_loss: 0.0007285563215147704 



Epoch 3 Loss=0.1658: 100%|██████████| 469/469 [00:33<00:00, 14.07it/s, loss=0.166, train_accuracy=92.9] 



 Test Set: Accuracy: 97.2800%, test_loss: 0.0006223651727195829 



Epoch 4 Loss=0.1888: 100%|██████████| 469/469 [00:53<00:00,  8.77it/s, loss=0.189, train_accuracy=94]   



 Test Set: Accuracy: 98.0600%, test_loss: 0.0004596131012949627 



Epoch 5 Loss=0.1932: 100%|██████████| 469/469 [01:17<00:00,  6.05it/s, loss=0.193, train_accuracy=94.6] 



 Test Set: Accuracy: 98.4600%, test_loss: 0.0003786530995887006 



Epoch 6 Loss=0.0669: 100%|██████████| 469/469 [01:18<00:00,  5.96it/s, loss=0.0669, train_accuracy=95]  



 Test Set: Accuracy: 98.5700%, test_loss: 0.00032161601687257646 



Epoch 7 Loss=0.1185: 100%|██████████| 469/469 [02:06<00:00,  3.70it/s, loss=0.119, train_accuracy=95.4] 



 Test Set: Accuracy: 98.6700%, test_loss: 0.0003000000886851922 



Epoch 8 Loss=0.2862: 100%|██████████| 469/469 [01:32<00:00,  5.08it/s, loss=0.286, train_accuracy=95.5] 



 Test Set: Accuracy: 98.8800%, test_loss: 0.00027982439685147255 



Epoch 9 Loss=0.1249: 100%|██████████| 469/469 [02:04<00:00,  3.77it/s, loss=0.125, train_accuracy=95.6] 



 Test Set: Accuracy: 98.6200%, test_loss: 0.00032700320551521143 



Epoch 10 Loss=0.0841: 100%|██████████| 469/469 [01:59<00:00,  3.93it/s, loss=0.0841, train_accuracy=95.8]



 Test Set: Accuracy: 98.7300%, test_loss: 0.00029795467034564354 



Epoch 11 Loss=0.1264: 100%|██████████| 469/469 [01:58<00:00,  3.95it/s, loss=0.126, train_accuracy=95.9] 



 Test Set: Accuracy: 98.8800%, test_loss: 0.0002849913059733808 



Epoch 12 Loss=0.1277: 100%|██████████| 469/469 [01:58<00:00,  3.97it/s, loss=0.128, train_accuracy=96.1] 



 Test Set: Accuracy: 98.5600%, test_loss: 0.00032570452569634655 



Epoch 13 Loss=0.1768: 100%|██████████| 469/469 [01:51<00:00,  4.22it/s, loss=0.177, train_accuracy=96.5] 



 Test Set: Accuracy: 98.9500%, test_loss: 0.0002497715447192604 



Epoch 14 Loss=0.2030: 100%|██████████| 469/469 [01:48<00:00,  4.33it/s, loss=0.203, train_accuracy=96.3] 



 Test Set: Accuracy: 98.9100%, test_loss: 0.00023391506910702448 



Epoch 15 Loss=0.2424: 100%|██████████| 469/469 [02:03<00:00,  3.78it/s, loss=0.242, train_accuracy=96.4] 



 Test Set: Accuracy: 98.8900%, test_loss: 0.00027147889290499736 



Epoch 16 Loss=0.1461: 100%|██████████| 469/469 [01:12<00:00,  6.46it/s, loss=0.146, train_accuracy=96.4] 



 Test Set: Accuracy: 99.1600%, test_loss: 0.00022495543458207976 



Epoch 17 Loss=0.0613: 100%|██████████| 469/469 [01:07<00:00,  6.91it/s, loss=0.0613, train_accuracy=96.5]



 Test Set: Accuracy: 98.8700%, test_loss: 0.0002667698753393779 



Epoch 18 Loss=0.0529: 100%|██████████| 469/469 [01:22<00:00,  5.69it/s, loss=0.0529, train_accuracy=96.6]



 Test Set: Accuracy: 98.9900%, test_loss: 0.00023505081380644698 



Epoch 19 Loss=0.0849: 100%|██████████| 469/469 [01:03<00:00,  7.40it/s, loss=0.0849, train_accuracy=96.6]



 Test Set: Accuracy: 98.7100%, test_loss: 0.00027698351168219235 



Epoch 20 Loss=0.1187: 100%|██████████| 469/469 [03:32<00:00,  2.21it/s, loss=0.119, train_accuracy=96.6]   



 Test Set: Accuracy: 98.8200%, test_loss: 0.00026225781759640085 





0,1
test_accuracy,▁▁▂▄▆▆▆▇▆▇▇▆▇▇▇█▇▇▆▇
test_loss,█▇▆▄▃▂▂▂▂▂▂▂▁▁▂▁▁▁▂▁
train_accuracy,▁▃▄█████████████████████████████████████
train_loss,█▆▄▃▃▃▂▂▂▂▁▂▃▂▂▂▃▂▁▂▂▂▁▁▂▂▃▂▂▁▁▂▂▁▁▂▁▁▁▂

0,1
test_accuracy,98.82
test_loss,0.00026
train_accuracy,96.61167
train_loss,0.11866
