<a href="https://colab.research.google.com/github/mahen2-cmd/neuralnetworkcompression/blob/main/Deep_Learning_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import copy


In [None]:
resnet = models.resnet101(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:03<00:00, 45.1MB/s]


In [None]:

num_classes = 10 # example number of classes
resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)


In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


In [None]:

trainset = datasets.CIFAR10(root='./data', train=True,
                            download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False,
                           download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 76786706.57it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
resnet.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)


In [None]:
# resnet.fc = nn.Linear(512, 10)
resnet.fc = nn.Linear(2048, 10)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# Define the number of epochs
num_epochs = 20

# Define the learning rate schedule
lr_schedule = {0: 0.1, 81: 0.01, 122: 0.001, 164: 0.0001}

# Define the optimizer
optimizer = optim.SGD(resnet.parameters(), lr=lr_schedule[0], momentum=0.9, weight_decay=5e-4)

# Training loop
for epoch in range(num_epochs):
    # Update the learning rate
    if epoch in lr_schedule:
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr_schedule[epoch]

    # Train the model for one epoch
    resnet.train()
    train_loss = 0.0
    correct = 0
    total = 0
    for i, (inputs, targets) in enumerate(trainloader):
        # Move the inputs and targets to the device
        inputs, targets = inputs.to(device), targets.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = resnet(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Update statistics
        train_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    # Print training statistics
    print('Epoch %d: Train Loss: %.3f | Train Acc: %.3f%% (%d/%d)' %
          (epoch+1, train_loss/len(trainloader.dataset), 100.*correct/total, correct, total))

    # Evaluate the model on the test set
    resnet.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (inputs, targets) in enumerate(testloader):
            # Move the inputs and targets to the device
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = resnet(inputs)
            loss = criterion(outputs, targets)

            # Update statistics
            test_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    # Print test statistics
    print('Epoch %d: Test Loss: %.3f | Test Acc: %.3f%% (%d/%d)' %
          (epoch+1, test_loss/len(testloader.dataset), 100.*correct/total, correct, total))


Epoch 1: Train Loss: 2.171 | Train Acc: 23.376% (11688/50000)
Epoch 1: Test Loss: 1.779 | Test Acc: 34.300% (3430/10000)
Epoch 2: Train Loss: 1.688 | Train Acc: 36.822% (18411/50000)
Epoch 2: Test Loss: 1.499 | Test Acc: 44.720% (4472/10000)
Epoch 3: Train Loss: 1.532 | Train Acc: 43.964% (21982/50000)
Epoch 3: Test Loss: 1.462 | Test Acc: 46.760% (4676/10000)
Epoch 4: Train Loss: 1.391 | Train Acc: 49.188% (24594/50000)
Epoch 4: Test Loss: 1.283 | Test Acc: 52.550% (5255/10000)
Epoch 5: Train Loss: 1.261 | Train Acc: 54.512% (27256/50000)
Epoch 5: Test Loss: 1.316 | Test Acc: 53.270% (5327/10000)
Epoch 6: Train Loss: 1.138 | Train Acc: 59.408% (29704/50000)
Epoch 6: Test Loss: 1.118 | Test Acc: 60.140% (6014/10000)
Epoch 7: Train Loss: 1.051 | Train Acc: 62.604% (31302/50000)
Epoch 7: Test Loss: 1.159 | Test Acc: 58.980% (5898/10000)
Epoch 8: Train Loss: 0.993 | Train Acc: 64.864% (32432/50000)
Epoch 8: Test Loss: 1.211 | Test Acc: 59.360% (5936/10000)
Epoch 9: Train Loss: 0.947 | Tra

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# After training
PATH = "/content/drive/MyDrive/Deep Learning Project/resnet101.pth"
torch.save(resnet.state_dict(), PATH)

In [None]:
import torch.nn.utils.prune as prune

PRUNED_RESNET_PATH = "/content/drive/MyDrive/Deep Learning Project/pruned_resnet101.pth"

# Prune the model
parameters_to_prune = [(resnet.layer1[0].conv1, 'weight'), (resnet.layer2[0].conv1, 'weight')]
prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2,
)

# Save the pruned model
torch.save(resnet.state_dict(), PRUNED_RESNET_PATH)


## Using Knowledge Distillation to prune

In [None]:
import copy
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision.transforms import transforms
from torchsummary import summary
import torchvision

In [None]:
# Define student model (ResNet18)
student_model = torchvision.models.resnet18(pretrained=False)

student_model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
student_model.fc = nn.Linear(512, 10)
student_model.cuda()




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
# Define loss function (knowledge distillation)
class DistillationLoss(nn.Module):
    def __init__(self, alpha):
        super(DistillationLoss, self).__init__()
        self.alpha = alpha

    def forward(self, y_student, y_teacher, labels):
        loss_ce = F.cross_entropy(y_student, labels)
        loss_kd = F.kl_div(F.log_softmax(y_student/self.alpha, dim=1), F.softmax(y_teacher/self.alpha, dim=1), reduction='batchmean') * self.alpha**2
        return loss_ce + loss_kd

# Define optimizer
optimizer = optim.SGD(student_model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)


# Train student model with knowledge distillation
alpha = 0.1
num_epochs = 10
for epoch in range(num_epochs):
    student_model.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.cuda(), labels.cuda()

        optimizer.zero_grad()
        y_teacher = resnet(inputs).detach()
        y_student = student_model(inputs)
        loss = DistillationLoss(alpha)(y_student, y_teacher, labels)
        loss.backward()
        optimizer.step()

    # Evaluate student model on test set
    student_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.cuda(), labels.cuda()
            outputs = student_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    acc = 100 * correct / total
    print('Epoch %d test accuracy: %.2f%%' % (epoch + 1, acc))

# Fine-tune student model with standard supervised learning
optimizer = optim.SGD(student_model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
num_epochs = 50
for epoch in range(num_epochs):
    student_model.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.cuda(), labels.cuda()

        optimizer.zero_grad()
        outputs = student_model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()
        optimizer.step()

    # Evaluate student model on test set
    student_model.eval()
    correct = 0


Epoch 1 test accuracy: 35.78%
Epoch 2 test accuracy: 48.76%
Epoch 3 test accuracy: 61.19%
Epoch 4 test accuracy: 65.82%
Epoch 5 test accuracy: 66.84%
Epoch 6 test accuracy: 67.48%
Epoch 7 test accuracy: 72.53%
Epoch 8 test accuracy: 73.68%
Epoch 9 test accuracy: 73.18%
Epoch 10 test accuracy: 78.49%


In [None]:

# After training
PATH = "/content/drive/MyDrive/Deep Learning Project/knowledge_distillation_resnet18.pth"
torch.save(student_model.state_dict(), PATH)


## Quantization

In [None]:
import torch
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms

model = models.resnet101(pretrained=True)
model.eval()
resnet.qconfig = torch.quantization.get_default_qconfig()

# Calibrate the resnet
torch.quantization.prepare(resnet, inplace=True)
with torch.no_grad():
    for images, _ in calib_loader:
        resnet(images)

# Convert the resnet to a quantized version
torch.quantization.convert(resnet, inplace=True)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# # Save the quantized resnet
PATH_QUANTIZED_RESNET = "/content/drive/MyDrive/Deep Learning Project/quantized_resnet18.pth"
torch.save(resnet.state_dict(), 'quantized_resnet101.pth')


## Compression with Reinforcement Learning

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch
import torchvision.models as models

model = models.resnet101(pretrained=True)
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
model.fc = nn.Linear(2048, 10)
# Load pre-trained weights
PATH = "/content/drive/MyDrive/Deep Learning Project/resnet101.pth"
model.load_state_dict(torch.load(PATH))
model.add_module("conv2", nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False))
model.add_module("relu2", nn.ReLU(inplace=True))
model.add_module("fc2", nn.Linear(128*7*7, 1024))
model.add_module("relu3", nn.ReLU(inplace=True))
model.add_module("fc3", nn.Linear(1024, 5))
print(model)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
target_params = 44545160
accuracy_threshold = 0.7
# Define the reward function
def reward_function(params, accuracy, target_params, accuracy_threshold):
    if params <= target_params and accuracy >= accuracy_threshold:
        return 1.0
    elif params <= target_params:
        return 0.5
    else:
        return -1.0

In [None]:
import numpy as np
import random
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# Define the reinforcement learning agent
class PruningAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = nn.Sequential(
            nn.Linear(self.state_size, 128),
            nn.ReLU(),
            nn.Linear(128, self.action_size),
            nn.Softmax(dim=-1)
        )
        optimizer = optim.Adam(model.parameters(), lr=self.learning_rate)
        return model, optimizer

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            state = torch.FloatTensor(state)
            act_probs = self.model(state)
            action = np.random.choice(np.arange(self.action_size), p=act_probs.detach().numpy())
            return action


    def update_policy(self, state, action, reward, learning_rate=0.1):

        state = torch.FloatTensor(state)
        action = torch.LongTensor([action])
        reward = torch.FloatTensor([reward])

        model, optimizer = self.model
        # Compute the loss
        log_probs = torch.log(model(state))
        selected_log_prob = log_probs[action]
        loss = -selected_log_prob * reward

        # Optimize the model
        optimizer.zero_grad()
        loss.requires_grad = True
        loss.backward()
        optimizer.step()

        # Update the exploration probability
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)



In [None]:
state_size = 3072
action_size = 100000
agent = PruningAgent(state_size, action_size)
print(agent)

<__main__.PruningAgent object at 0x7fb241b5c280>


In [None]:
def prune(resnet, agent):
    flattened_resnet = nn.Sequential(
        nn.Flatten(),
        nn.Linear(150528, 10)
    )
    current_params = sum(p.numel() for p in resnet.parameters() if p.requires_grad)
    print(f"Current number of parameters: {current_params}")
    mask = np.zeros(current_params)
    state = flattened_resnet(torch.randn(1, 3, 224, 224))
    state = state.detach().numpy()[0]
    count = 0
    while np.count_nonzero(mask) < current_params - target_params and count < 10000:
        action = agent.act(state)
        mask[action] = 1
        state[action % 10] = 0
        count +=1

    # Apply the mask to the ResNet101 architecture
    connection_count = 0
    for param in resnet.parameters():
        if param.requires_grad:
            shape = param.data.shape
            param.data = param.data.flatten()
            m = mask[connection_count:connection_count+param.numel()]
            param.data *= torch.tensor(1 - m).float()
            param.data = param.data.reshape(shape)
            connection_count += np.count_nonzero(m)

    # Get the pruned number of parameters
    print(f"Pruned number of parameters: {connection_count}")
    print(f"Remaining number of parameters: {current_params - connection_count}")
    return resnet


pruned_resnet = prune(model, agent)

Current number of parameters: 49015375
Pruned number of parameters: 99998
Remaining number of parameters: 48915377


In [None]:
# Retrain the pruned ResNet101 architecture
# We use transfer learning to fine-tune the pruned ResNet101 architecture on our specific task.
def fine_tune(pruned_resnet, train_loader, val_loader, num_epochs, learning_rate, agent):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    pruned_resnet.to(device)
    # Freeze the weights of the pruned ResNet101 architecture
    for param in pruned_resnet.parameters():
        param.requires_grad = False

    # Replace the last fully connected layer with a new one for our specific task
    num_classes = 10
    in_features = pruned_resnet.fc.in_features
    pruned_resnet.fc = nn.Linear(in_features, num_classes)
    pruned_resnet.fc.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(pruned_resnet.fc.parameters(), lr=learning_rate, momentum=0.9)

    for epoch in range(num_epochs):
        pruned_resnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = pruned_resnet(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1} training loss: {running_loss/len(train_loader)}")

        # Evaluate the pruned ResNet101 architecture on the validation set
        pruned_resnet.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for data in val_loader:
                images, labels = data
                images = images.to(device)
                labels = labels.to(device)
                outputs = pruned_resnet(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            accuracy = correct / total
            print(f"Epoch {epoch+1} validation accuracy: {accuracy}")
            params = sum(p.numel() for p in pruned_resnet.parameters() if p.requires_grad)

            reward = reward_function(params, accuracy, target_params, accuracy_threshold)
            state = np.random.randn(state_size)
            action = agent.act(state)
            agent.update_policy(state, action, reward)
    return pruned_resnet


In [None]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

trainset = datasets.CIFAR10(root='./data', train=True,
                            download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False,
                           download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
fine_tune(pruned_resnet, trainloader, testloader, 10, 0.01, agent)

Epoch 1 training loss: 0.9787377791331552
Epoch 1 validation accuracy: 0.7383
Epoch 2 training loss: 0.7851004425217124
Epoch 2 validation accuracy: 0.7415
Epoch 3 training loss: 0.7647568668855731
Epoch 3 validation accuracy: 0.7436
Epoch 4 training loss: 0.7632532397194591
Epoch 4 validation accuracy: 0.7451
Epoch 5 training loss: 0.7622494689948723
Epoch 5 validation accuracy: 0.7458
Epoch 6 training loss: 0.7565470947633923
Epoch 6 validation accuracy: 0.7443
Epoch 7 training loss: 0.754063592161364
Epoch 7 validation accuracy: 0.7475
Epoch 8 training loss: 0.7582376152658097
Epoch 8 validation accuracy: 0.7486
Epoch 9 training loss: 0.7546914966819841
Epoch 9 validation accuracy: 0.7478
Epoch 10 training loss: 0.7487817040032438
Epoch 10 validation accuracy: 0.7468


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# Define compression operations
pruning = utils.prune.L1Unstructured(amount=0.2)
quantization = torch.quantization.QuantStub()
dequantization = torch.quantization.DeQuantStub()
# Define reward function
def reward_function(model):
    accuracy = ...
    inference_time = ...
    return accuracy - inference_time

# Define reinforcement learning environment
class CompressionEnv(gym.Env):
    def __init__(self):
        self.model = model
        # self.compression_ops = [pruning, quantization, ...]
        # self.current_state = self.model.state_dict()

    def reset(self):
        self.current_state = self.model.state_dict()
        return self.current_state

    def step(self, action):
       pass

# Train reinforcement learning agent
ray.init(ignore_reinit_error=True)
configval = {
    "env": CompressionEnv,
    "framework": "torch",
    "num_workers": 4,
    "num_envs_per_worker": 1,
    "train_batch_size": 1000,
    "lr": 1e-4,
}
trainer = ppo.PPOTrainer(
        config={"framework": "torch", "num_workers": 0},
        env=CompressionEnv,
    )


<font color = 'yellow'>

<font color = 'yellow'>Lottery ticket hypothesis with single shot network pruning

In [None]:
# Pruning the resnet101 pretrained model using lotery ticket hypothesis with single shot network pruning and then fine tuning the pruned model on the cifar10 dataset and then use the pruned model to compress the model using reinforcement learning and then evaluate the compressed model's performance.
model = resnet
pruning_rate = 0.5
num_itrations = 100
compression_rate = 0.5
batch_size = 128
learning_rate = 0.01
weight_decay = 0.0005
num_episdes = 100
exploration_prob = 0.2
replay_memory_size = 1000
replay_batch_size = 128 # batch size for Reinforcement Learning
discount_factor = 0.9
target_update_frequency = 10
initial_exploration_prob = 1.0
final_exploration_prob = 0.01
exploration_prob_decay_rate = 0.001

In [None]:
def get_sparsity(model):
    total = 0
    pruned = 0
    for name, module in model.named_modules():
        if 'weight' in module._parameters:
            total += module.weight.nelement()
            pruned += module.weight.data.eq(0).sum()
    return pruned/total

def get_reward(model, accuracy, target_accuracy, target_sparsity):
    sparsity = get_sparsity(model)
    reward = (accuracy - target_accuracy) + (target_sparsity - sparsity)
    return reward

def prune_model(model, pruning_rate):
    pruned_model = copy.deepcopy(model)
    importance_scores = {}
    for name, module in pruned_model.named_modules():
        if 'weight' in module._parameters:
            importance_scores[name] = torch.abs(module.weight.data).clone()

    wining_tickets = {}
    for name, importance_score in importance_scores.items():
        num_to_prune = int(pruning_rate * importance_score.nelement())
        threshold = torch.topk(importance_score.view(-1), num_to_prune, largest=False).values.max()
        mask = importance_score.gt(threshold).float()
        wining_tickets[name] = mask

    for name, module in pruned_model.named_parameters():
        if 'weight' in name:
            mask = wining_tickets[name.split('.')[0]]
            module.data.mul_(mask)
    return pruned_model

In [None]:
def fine_tune(model, trainloader, testloader, num_epochs, learning_rate, weight_decay):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print('Epoch: %d, Loss: %.3f' % (epoch + 1, running_loss / i))
        running_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
    return model

In [None]:
def evaluate_model(model, testloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

In [None]:
def get_compressed_model(model, compression_rate):
    compressed_model = copy.deepcopy(model)
    return compressed_model

def get_compression_reward(model, accuracy, target_accuracy, target_sparsity):
    sparsity = get_sparsity(model)
    reward = (accuracy - target_accuracy) + (target_sparsity - sparsity)
    return reward

def get_compression_action(model, compression_rate, exploration_prob):
    if np.random.rand() < exploration_prob:
        action = np.random.randint(0, 2)
    else:
        action = 0
    return action

def get_compression_state(model):
    state = {}
    for name, module in model.named_modules():
        if 'weight' in module._parameters:
            state[name] = module.weight.data.clone()
    return state

def get_compression_next_state(model, action):
    next_state = {}
    for name, module in model.named_modules():
        if 'weight' in module._parameters:
            next_state[name] = module.weight.data.clone()
    return next_state

In [None]:
num_epochs =10
train_loader = trainloader
test_loader = testloader
model = fine_tune(model, train_loader, test_loader, num_epochs, learning_rate, weight_decay)
accuracy = evaluate_model(model, test_loader)

Epoch: 1, Loss: 2.338
Accuracy of the network on the 10000 test images: 13 %
Epoch: 2, Loss: 2.159
Accuracy of the network on the 10000 test images: 32 %
Epoch: 3, Loss: 1.892
Accuracy of the network on the 10000 test images: 36 %
Epoch: 4, Loss: 1.688
Accuracy of the network on the 10000 test images: 42 %
Epoch: 5, Loss: 1.569
Accuracy of the network on the 10000 test images: 48 %
Epoch: 6, Loss: 1.467
Accuracy of the network on the 10000 test images: 51 %
Epoch: 7, Loss: 1.368
Accuracy of the network on the 10000 test images: 53 %
Epoch: 8, Loss: 1.286
Accuracy of the network on the 10000 test images: 55 %
Epoch: 9, Loss: 1.202
Accuracy of the network on the 10000 test images: 60 %
