## Importing Packages

In [1]:
import torch
import torch.nn as nn
from torch.nn import Linear, Conv2d, CrossEntropyLoss
from torch.optim import AdamW, Adam, SGD, RMSprop
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from torchvision.datasets import MNIST
import pytorch_lightning as pl
import numpy as np
from tqdm.autonotebook import tqdm
from sklearn.metrics import classification_report
import warnings
from IPython.display import Image
warnings.filterwarnings(action='once')
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from math import ceil
from torchsummary import summary

  warn(f"Failed to load image Python extension: {e}")


# Random Intro Stuff

In [2]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')

Using cpu for inference


In [4]:
# Helper function to subset data
def subset_data(dataset,k): 
    subset = torch.utils.data.Subset(dataset, range(0, int(2**(-k)*(len(dataset)))))
    return subset

# Helper function to see didstribution of data subset
def analyze_subset(dataset):
    labels = []
    counts = {}
    for i in range(0,len(dataset)):
        labels.append(dataset[i][1])

    for i in range(0,10):
        counts[i] = labels.count(i)
    plt.bar(counts.keys(), counts.values(), tick_label=range(0,10))
    plt.xlabel('Integers')
    plt.ylabel('Frequency')
    plt.title(f'Total # of Digits: {len(dataset)}')
    
# return number of total trainable parameters for model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())


def combine_dataset(train_ds, test_ds):
    full_train_ds = ConcatDataset([train_ds, test_ds])
    return full_train_ds

## Downloading MNIST Data

In [5]:
train_ds = MNIST("mnist", train=True, download=True, transform=transform) # size = 60,000
test_ds = MNIST("mnist", train=False, download=True, transform=transform) # size = 10,000
full_train_ds = combine_dataset(train_ds, test_ds)
subset_ds = subset_data(full_train_ds, 0)


train_dl = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=10) # size = 938
test_dl = DataLoader(test_ds, batch_size=64, num_workers=10) # size = 157
full_train_dl = DataLoader(full_train_ds, batch_size=16, num_workers=0) # size = 157
subset_dl = DataLoader(subset_ds, batch_size=16, shuffle=True, num_workers=0)

## Defining ResNets + Training Loops

In [6]:
class ResNet50(pl.LightningModule):
  def __init__(self):
    super().__init__()
    self.model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=True)
#     self.model.fc = Linear(in_features=10, out_features=10)
#     self.model.conv1 = Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    self.loss = CrossEntropyLoss()
    self.epoch = self.current_epoch

  def forward(self, x):
    return self.model(x)

  def training_step(self, batch, batch_no):
    x, y = batch
    logits = self(x)
    loss = self.loss(logits, y)
    logs={"train_loss:", loss}
    batch_dictionary={"loss": loss, "log": logs}
    return batch_dictionary

  def training_epoch_end(self,outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        self.logger.experiment.add_scalar("Train_Loss/Epoch", avg_loss, self.current_epoch)
        epoch_dictionary={'loss': avg_loss}
#         return epoch_dictionary

  def configure_optimizers(self):
    optimizer = SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)
    return [optimizer], [scheduler]

In [7]:
mdl = ResNet50()
mdl.eval

Using cache found in /Users/snehpandya/.cache/torch/hub/pytorch_vision_v0.10.0


<bound method Module.eval of ResNet50(
  (model): DenseNet(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace=True)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (denselayer1): _DenseLayer(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (denselayer2): _DenseLayer(
          (norm1): BatchNorm2d

In [31]:
class VGG16_old(pl.LightningModule):
  def __init__(self):
    super().__init__()
    self.model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=False)
#     self.model.aux_logits = False
#     self.model.model.Conv2d
    self.model.classifier[0] = nn.Linear(in_features=100000, out_features=10)
    self.loss = CrossEntropyLoss()
    self.epoch = self.current_epoch

  def forward(self, x):
    return self.model(x)

  def training_step(self, batch, batch_no):
    x, y = batch
    logits = self(x)
    loss = self.loss(logits, y)
    logs={"train_loss:", loss}
    batch_dictionary={"loss": loss, "log": logs}
    return batch_dictionary

  def training_epoch_end(self,outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        self.logger.experiment.add_scalar("Train_Loss/Epoch", avg_loss, self.current_epoch)
        epoch_dictionary={'loss': avg_loss}
#         return epoch_dictionary

  def configure_optimizers(self):
    optimizer = SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)
    return [optimizer], [scheduler]

In [33]:
model1 = VGG16_old()
count_parameters(model1)
model1.eval

Using cache found in /Users/snehpandya/.cache/torch/hub/pytorch_vision_v0.10.0


<bound method Module.eval of VGG16_old(
  (model): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): 

In [66]:
class VGG16(pl.LightningModule):
    def __init__(self):
        super(VGG16, self).__init__()
        self.scale = 1
        self.in_channels = 3
        self.num_classes = 10
        self.loss = CrossEntropyLoss()
        self.epoch = self.current_epoch
    
        self.features = nn.Sequential(
            nn.Conv2d(self.in_channels, int(ceil(64*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(64*self.scale)), int(ceil(64*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(int(ceil(64*self.scale)), int(ceil(128*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(128*self.scale)), int(ceil(128*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(int(ceil(128*self.scale)), int(ceil(256*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(256*self.scale)), int(ceil(256*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(256*self.scale)), int(ceil(256*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(int(ceil(256*self.scale)), int(ceil(512*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(512*self.scale)), int(ceil(512*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(512*self.scale)), int(ceil(512*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(int(ceil(512*self.scale)), int(ceil(512*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(512*self.scale)), int(ceil(512*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(int(ceil(512*self.scale)), int(ceil(512*self.scale)), kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        
        self.classifier = nn.Sequential(
            nn.Linear(in_features=int(ceil(25088*self.scale)), out_features=int(ceil(4096*self.scale))),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(in_features=int(ceil(4096*self.scale)), out_features=int(ceil(4096*self.scale))),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(in_features=int(ceil(4096*self.scale)), out_features=self.num_classes)
            )
        
        self.model = nn.Sequential(self.features, self.avgpool, self.classifier)

    def forward(self, x):
        x = self.model(x)
        x = self.features(x)
        x = self.avgpool(x)
        x =  x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def training_step(self, batch, batch_no):
        x, y = batch
        logits = self(x)
        loss = self.loss(logits, y)
        logs={"train_loss:", loss}
        batch_dictionary={"loss": loss, "log": logs}
        return batch_dictionary

    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        self.logger.experiment.add_scalar("Train Loss/Epoch", avg_loss, self.current_epoch)
        epoch_dictionary={'loss': avg_loss}
            # return epoch_dictionary

    def configure_optimizers(self):
        optimizer = SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)
        return [optimizer], [scheduler]

In [68]:
model = VGG16()
model2= VGG16_old()
print(model.eval)
# print(model2.eval)
# print(count_parameters(model))
# summary(model, (3, 256, 224))
# print([len(x) for x in model.model.parameters()])
# model = VGG(k=3)
# print([len(x) for x in model.model.parameters()])

Using cache found in /Users/snehpandya/.cache/torch/hub/pytorch_vision_v0.10.0


<bound method Module.eval of VGG16(
  (loss): CrossEntropyLoss()
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (1

In [31]:
# [x.shape for x in model.model.parameters()]

In [32]:
list(list(model.model.named_children())[0][1][0].parameters())

AttributeError: 'VGG16' object has no attribute 'model'

In [33]:
# model.model.features[0]

In [34]:
# model = VGG()
# print(f"There are {count_parameters(model):.2e} model paramaters.")
# # model.eval
# summary(model, (3, 256, 224))

In [35]:
trainer = pl.Trainer(max_epochs=1)
trainer.fit(model, subset_dl)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name        | Type              | Params
--------------------------------------------------
0 | loss        | CrossEntropyLoss  | 0     
1 | conv_layers | Sequential        | 14.7 M
2 | avgpool     | AdaptiveAvgPool2d | 0     
3 | classifier  | Sequential        | 119 M 
--------------------------------------------------
134 M     Trainable params
0         Non-trainable params
134 M     Total params
537.206   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], grad_fn=<ReshapeAliasBackward0>)
tensor([[-1.3457e-02,  3.3513e-03,  1.4712e-02,  8.3505e-03, -5.1217e-03,
         -1.8041e-03, -3.4484e-03,  6.0992e-03, -1.4257e-02,  9.3481e-03],
        [-2.0773e-02,  1.3340e-03,  1.6078e-02,  1.0465e-02, -8.1106e-04,
         -8.4145e-03, -4.0394e-03,  7.9348e-03, -1.2325e-02,  1.2095e-02],
        [-1.4212e-02,  8.5107e-04,  1.0457e-02,  2.6875e-03, -5.5397e-03,
         -2.4614e-04,  3.3724e-03,  9.6113e-03, -1.2815e-02,  1.1921e-02],
        [-1.3818e-02, -3.3008e-03,  1.4450e-02,  7.0167e-03,  4.0302e-03,
          1.8420e-03,  5.0243e-03,  4.5468e-03, -1.2527e-02,  9.9529e-03],
        [-1.5432e-02,  2.4791e-03,  9.9781e-03,  2.1912e-03, -1.8043e-03,
          6.0457e-04, -5.0882e-03,  6.7034e

In [None]:
trainer.save_checkpoint("resnet50_mnist.pt")

## Testing Model

In [None]:
def get_prediction(x, model: pl.LightningModule):
  model.freeze() # prepares model for predicting
  probabilities = torch.softmax(model(x), dim=1)
  predicted_class = torch.argmax(probabilities, dim=1)
  return predicted_class, probabilities

inference_model = ResNet50.load_from_checkpoint("resnet50_mnist.pt", map_location=torch.device('cpu'))

In [None]:
true_y, pred_y = [], []
for batch in tqdm(iter(test_dl), total=len(test_dl)):
  x, y = batch
  true_y.extend(y)
  preds, probs = get_prediction(x, inference_model)
  pred_y.extend(preds.cpu())

In [None]:
print(classification_report(true_y, pred_y, digits=3))

## Unused Code

In [None]:
# n_train = 2**(-1)*len(train_ds)
# n_test = 2**(-1)*len(test_ds)
    

# X_train_1, X_test_1, Y_train_1, Y_test_1 = train_test_split(train_ds.data, train_ds.targets, stratify=train_ds.targets, train_size=int(n_train),random_state=1)
# X_train_2, X_test_2, Y_train_2, Y_test_2 = train_test_split(test_ds.data, test_ds.targets, stratify=test_ds.targets, train_size=int(n_test),random_state=1)


# subset_X = torch.cat((X_train_1, X_train_2),dim=0)
# subset_Y = torch.cat((Y_train_1, Y_train_2),dim=0)
# # final_subset = torch.utils.data.ConcatDataset([subset_X, subset_Y], dim=1)


# print(subset_X.shape)
# print(subset_Y.shape)