In [1]:
!pip install torchviz

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchviz
  Downloading torchviz-0.0.2.tar.gz (4.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torchviz
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
  Created wheel for torchviz: filename=torchviz-0.0.2-py3-none-any.whl size=4147 sha256=fcf5b10069c6c223b7c585c5c8575efa5f3a8d5dc1df9399e0ff586703de10b2
  Stored in directory: /root/.cache/pip/wheels/29/65/6e/db2515eb1dc760fecd36b40d54df65c1e18534013f1c037e2e
Successfully built torchviz
Installing collected packages: torchviz
Successfully installed torchviz-0.0.2


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchviz
import torchsummary
from torchvision.models import resnet18
import torchvision.transforms as transforms
from torchviz import make_dot

from torchvision.models.resnet import ResNet18_Weights

import os
import argparse

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

In [4]:
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

==> Preparing data..


In [5]:
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=100, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 83030149.27it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
import torch
import torch.nn as nn
from torchsummary import summary

class DepthwiseSeparableConv2d(nn.Module):
    """Depthwise separable convolution 2d."""
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.depthwise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels, bias=False)
        self.pointwise_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        out = self.depthwise_conv(x)
        out = self.pointwise_conv(out)
        out = self.bn(out)
        out = nn.functional.relu(out)
        return out

class ResNetBlock(nn.Module):
    """Helper class to create a ResNet block."""
    def __init__(self, in_channels, out_channels, stride=1, use_depthwise=True):
        super().__init__()
        if use_depthwise:
            self.conv1 = DepthwiseSeparableConv2d(in_channels, out_channels, stride=stride)
            self.bn1 = nn.BatchNorm2d(out_channels)
            self.conv2 = DepthwiseSeparableConv2d(out_channels, out_channels, stride=1)
        else:
            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(out_channels)
            self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        #self.bn= nn.BatchNorm2d(out_channels)
        #self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            if use_depthwise:
                self.shortcut = nn.Sequential(
                    DepthwiseSeparableConv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0),
                    nn.BatchNorm2d(out_channels)
                )
            else:
                self.shortcut = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                    nn.BatchNorm2d(out_channels)
                )

    def forward(self, x):
        out = self.conv1(x)
        #out=nn.functional.batch_norm(out)
        out = self.bn2(out)
        out = nn.functional.relu(out)
        out = self.conv2(out)
        #out=nn.functional.batch_norm(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = nn.functional.relu(out)
        return out


class ModifiedResNet18(nn.Module):
    """ResNet-18 model for CIFAR-10."""
    def __init__(self, num_classes=10, use_depthwise=True):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layer1 = nn.Sequential(
            ResNetBlock(32, 64, stride=1,use_depthwise=use_depthwise),
            ResNetBlock(64, 64, use_depthwise=use_depthwise),
            ResNetBlock(64, 64,use_depthwise=use_depthwise),
            ResNetBlock(64, 64, use_depthwise=use_depthwise),
            ResNetBlock(64, 64, use_depthwise=use_depthwise)
            
        )
        self.layer2 = nn.Sequential(
            ResNetBlock(64, 128, stride=2,use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128,use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise),
            ResNetBlock(128, 128, use_depthwise=use_depthwise)
        )
        self.layer3 = nn.Sequential(
            ResNetBlock(128, 256, stride=2,use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256,use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise),
            ResNetBlock(256, 256, use_depthwise=use_depthwise)
        )
        self.layer4 = nn.Sequential(
            ResNetBlock(256, 512, stride=1,use_depthwise=use_depthwise),
            ResNetBlock(512, 512, use_depthwise=use_depthwise),
            ResNetBlock(512, 512,use_depthwise=use_depthwise),
            ResNetBlock(512, 512,use_depthwise=use_depthwise),
            ResNetBlock(512, 512,use_depthwise=use_depthwise)
            
            
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.Dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(512, num_classes)
        
       # self.fc2 = nn.Linear(1024, num_classes)
        #self.fc = nn.Linear(512, num_classes)
        #self.fc = nn.Linear(512, num_classes)
        

    def forward(self, x):
        out = self.conv1(x)
       #out=nn.functional.batch_norm(out)
        out = self.bn1(out)
        out = nn.functional.relu(out)
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = self.Dropout(out)
        out = torch.flatten(out, 1)
        out = self.fc1(out)
        #out = self.functional.softmax(out)
        #out = self.fc2(out)
        return out


In [7]:
model = ModifiedResNet18()

In [8]:
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(num_trainable_params)

4906794


In [9]:
from torchsummary import summary

summary(model.cuda(), (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]           4,704
       BatchNorm2d-2           [-1, 32, 32, 32]              64
         MaxPool2d-3           [-1, 32, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          18,432
       BatchNorm2d-5           [-1, 64, 32, 32]             128
            Conv2d-6           [-1, 64, 32, 32]          36,864
       BatchNorm2d-7           [-1, 64, 32, 32]             128
            Conv2d-8           [-1, 64, 32, 32]           2,048
       BatchNorm2d-9           [-1, 64, 32, 32]             128
      ResNetBlock-10           [-1, 64, 32, 32]               0
           Conv2d-11           [-1, 64, 32, 32]             576
           Conv2d-12           [-1, 64, 32, 32]           4,096
      BatchNorm2d-13           [-1, 64, 32, 32]             128
DepthwiseSeparableConv2d-14           [

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR


# Define your loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

#optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
#scheduler = StepLR(optimizer, step_size=50, gamma=0.1)

num_epochs = 100

# Train your model
for epoch in range(num_epochs):
    running_loss = 0.0
    train_acc = 0.0
    for i, data in enumerate(trainloader, 0):
        # Get the inputs and labels from the data loader
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass, backward pass, and optimization
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        train_acc += (outputs.argmax(dim=1) == labels).sum().item()
        if i % 100 == 99:
            print('[EPOCH: %d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
            train_acc = 0.0

    # Test your model
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            # Get the inputs and labels from the data loader
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass and prediction
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            # Compute accuracy
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy on test set: %d %%' % (100 * correct / total))

[EPOCH: 1,   100] loss: 2.526
[EPOCH: 1,   200] loss: 2.045
[EPOCH: 1,   300] loss: 1.882
[EPOCH: 1,   400] loss: 1.801
[EPOCH: 1,   500] loss: 1.686
Accuracy on test set: 44 %
[EPOCH: 2,   100] loss: 1.507
[EPOCH: 2,   200] loss: 1.483
[EPOCH: 2,   300] loss: 1.414
[EPOCH: 2,   400] loss: 1.281
[EPOCH: 2,   500] loss: 1.219
Accuracy on test set: 57 %
[EPOCH: 3,   100] loss: 1.176
[EPOCH: 3,   200] loss: 1.162
[EPOCH: 3,   300] loss: 1.053
[EPOCH: 3,   400] loss: 1.112
[EPOCH: 3,   500] loss: 1.060
Accuracy on test set: 64 %
[EPOCH: 4,   100] loss: 0.953
[EPOCH: 4,   200] loss: 0.955
[EPOCH: 4,   300] loss: 0.913
[EPOCH: 4,   400] loss: 0.881
[EPOCH: 4,   500] loss: 0.862
Accuracy on test set: 64 %
[EPOCH: 5,   100] loss: 0.825
[EPOCH: 5,   200] loss: 0.838
[EPOCH: 5,   300] loss: 0.809
[EPOCH: 5,   400] loss: 0.784
[EPOCH: 5,   500] loss: 0.769
Accuracy on test set: 69 %
[EPOCH: 6,   100] loss: 0.700
[EPOCH: 6,   200] loss: 0.689
[EPOCH: 6,   300] loss: 0.691
[EPOCH: 6,   400] loss: 0