## Importing Packages

In [10]:
import torch
import torch.nn as nn
from torch.nn import Linear, Conv2d, CrossEntropyLoss, BatchNorm2d
from torch.optim import AdamW, Adam, SGD, RMSprop
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from torchvision.datasets import MNIST
import pytorch_lightning as pl
import numpy as np
from tqdm.autonotebook import tqdm
from sklearn.metrics import classification_report
import warnings
from IPython.display import Image
warnings.filterwarnings(action='once')
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from math import ceil
from torchsummary import summary

# Random Intro Stuff

In [2]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
#     transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')

Using cpu for inference


In [4]:
# Helper function to subset data
def subset_data(dataset,k): 
    subset = torch.utils.data.Subset(dataset, range(0, int(2**(-k)*(len(dataset)))))
    return subset

# Helper function to see didstribution of data subset
def analyze_subset(dataset):
    labels = []
    counts = {}
    for i in range(0,len(dataset)):
        labels.append(dataset[i][1])

    for i in range(0,10):
        counts[i] = labels.count(i)
    plt.bar(counts.keys(), counts.values(), tick_label=range(0,10))
    plt.xlabel('Integers')
    plt.ylabel('Frequency')
    plt.title(f'Total # of Digits: {len(dataset)}')
    
# return number of total trainable parameters for model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())


def combine_dataset(train_ds, test_ds):
    full_train_ds = ConcatDataset([train_ds, test_ds])
    return full_train_ds

## Downloading MNIST Data

In [5]:
train_ds = MNIST("mnist", train=True, download=True, transform=transform) # size = 60,000
test_ds = MNIST("mnist", train=False, download=True, transform=transform) # size = 10,000
full_train_ds = combine_dataset(train_ds, test_ds)
subset_ds = subset_data(full_train_ds, 6)


train_dl = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=10) # size = 938
test_dl = DataLoader(test_ds, batch_size=64, num_workers=10) # size = 157
full_train_dl = DataLoader(full_train_ds, batch_size=16, num_workers=0) # size = 157
subset_dl = DataLoader(subset_ds, batch_size=16, shuffle=True, num_workers=0)

## Defining ResNets + Training Loops

In [27]:
class ResNet50(pl.LightningModule):
  def __init__(self, k=2):
    super().__init__()
    self.k = k
    self.scale =2**(-self.k) 
    self.model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
    
    self.model.conv1 = Conv2d(3, int(64*self.scale), kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=True)
    self.model.bn1 = BatchNorm2d(int(self.scale*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[0].conv1 = Conv2d(int(self.scale*64), int(self.scale*64), kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.model.layer1[0].bn1 = BatchNorm2d(int(self.scale*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[0].conv2 = Conv2d(int(self.scale*64), int(self.scale*64), kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    self.model.layer1[0].bn2 = BatchNorm2d(int(self.scale*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[0].conv3 = Conv2d(int(self.scale*64), int(self.scale*256), kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.model.layer1[0].bn3 = BatchNorm2d(int(self.scale*256), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[0].downsample[0] = Conv2d(int(self.scale*64), int(self.scale*256), kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.model.layer1[0].downsample[1] = BatchNorm2d(int(self.scale*256), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    
    self.model.layer1[1].conv1 = Conv2d(int(self.scale*256), int(self.scale*64), kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.model.layer1[1].bn1 = BatchNorm2d(int(self.scale*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[1].conv2 = Conv2d(int(self.scale*64), int(self.scale*64), kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    self.model.layer1[1].bn2 = BatchNorm2d(int(self.scale*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[1].conv3 = Conv2d(int(self.scale*64), int(self.scale*256), kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.model.layer1[1].bn3 = BatchNorm2d(int(self.scale*256), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                       
    self.model.layer1[2].conv1 = Conv2d(int(self.scale*256), int(self.scale*64), kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.model.layer1[2].bn1 = BatchNorm2d(int(self.scale*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[2].conv2 = Conv2d(int(self.scale*64), int(self.scale*64), kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    self.model.layer1[2].bn2 = BatchNorm2d(int(self.scale*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.model.layer1[2].conv3 = Conv2d(int(self.scale*64), int(self.scale*256), kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.model.layer1[2].bn3 = BatchNorm2d(int(self.scale*256), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    #   int(self.scale*
    
    self.loss = CrossEntropyLoss()
    self.epoch = self.current_epoch

  def forward(self, x):
    return self.model(x)

  def training_step(self, batch, batch_no):
    x, y = batch
    logits = self(x)
    loss = self.loss(logits, y)
    logs={"train_loss:", loss}
    batch_dictionary={"loss": loss, "log": logs}
    return batch_dictionary

  def training_epoch_end(self,outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        self.logger.experiment.add_scalar("Train_Loss/Epoch", avg_loss, self.current_epoch)
        epoch_dictionary={'loss': avg_loss}
#         return epoch_dictionary

  def configure_optimizers(self):
    optimizer = SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)
    return [optimizer], [scheduler]

In [28]:
model = ResNet50()
model.eval

Using cache found in /Users/snehpandya/.cache/torch/hub/pytorch_vision_v0.10.0


<bound method Module.eval of ResNet50(
  (model): ResNet(
    (conv1): Conv2d(3, 16, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequenti

In [92]:
# model2= VGG16_old()
# print(model.eval)
# print(model2.eval)
# print(count_parameters(model))
# summary(model, (3, 256, 224))
# print([len(x) for x in model.model.parameters()])
# model = VGG(k=3)
# print([len(x) for x in model.model.parameters()])

In [93]:
[x.shape for x in model1.model.parameters()]

[torch.Size([64, 3, 3, 3]),
 torch.Size([64]),
 torch.Size([64, 64, 3, 3]),
 torch.Size([64]),
 torch.Size([128, 64, 3, 3]),
 torch.Size([128]),
 torch.Size([128, 128, 3, 3]),
 torch.Size([128]),
 torch.Size([256, 128, 3, 3]),
 torch.Size([256]),
 torch.Size([256, 256, 3, 3]),
 torch.Size([256]),
 torch.Size([256, 256, 3, 3]),
 torch.Size([256]),
 torch.Size([512, 256, 3, 3]),
 torch.Size([512]),
 torch.Size([512, 512, 3, 3]),
 torch.Size([512]),
 torch.Size([512, 512, 3, 3]),
 torch.Size([512]),
 torch.Size([512, 512, 3, 3]),
 torch.Size([512]),
 torch.Size([512, 512, 3, 3]),
 torch.Size([512]),
 torch.Size([512, 512, 3, 3]),
 torch.Size([512]),
 torch.Size([4096, 25088]),
 torch.Size([4096]),
 torch.Size([4096, 4096]),
 torch.Size([4096]),
 torch.Size([10, 4096]),
 torch.Size([10])]

In [95]:
# list(list(model.model.named_children())[0][1][0].parameters())

In [96]:
# model.model.features[0]

In [97]:
# model = VGG()
# print(f"There are {count_parameters(model):.2e} model paramaters.")
# # model.eval
# summary(model, (3, 256, 224))

In [111]:
trainer = pl.Trainer(max_epochs=1)
trainer.fit(model1, subset_dl)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name  | Type             | Params
-------------------------------------------
0 | model | VGG              | 134 M 
1 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
134 M     Trainable params
0         Non-trainable params
134 M     Total params
537.206   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

In [112]:
trainer.save_checkpoint("resnet50_mnist.pt")

## Testing Model

In [115]:
def get_prediction(x, model: pl.LightningModule):
  model.freeze() # prepares model for predicting
  probabilities = torch.softmax(model(x), dim=1)
  predicted_class = torch.argmax(probabilities, dim=1)
  return predicted_class, probabilities

inference_model = VGG16().load_from_checkpoint("resnet50_mnist.pt", map_location=torch.device('cpu'))

Using cache found in /Users/snehpandya/.cache/torch/hub/pytorch_vision_v0.10.0
Using cache found in /Users/snehpandya/.cache/torch/hub/pytorch_vision_v0.10.0


In [118]:
true_y, pred_y = [], []
for batch in tqdm(iter(test_dl), total=len(test_dl)):
  x, y = batch
  true_y.extend(y)
  preds, probs = get_prediction(x, inference_model)
  pred_y.extend(preds.cpu())

PicklingError: Can't pickle <function <lambda> at 0x16e011c10>: attribute lookup <lambda> on __main__ failed

In [None]:
print(classification_report(true_y, pred_y, digits=3))

## Unused Code

In [None]:
# n_train = 2**(-1)*len(train_ds)
# n_test = 2**(-1)*len(test_ds)
    

# X_train_1, X_test_1, Y_train_1, Y_test_1 = train_test_split(train_ds.data, train_ds.targets, stratify=train_ds.targets, train_size=int(n_train),random_state=1)
# X_train_2, X_test_2, Y_train_2, Y_test_2 = train_test_split(test_ds.data, test_ds.targets, stratify=test_ds.targets, train_size=int(n_test),random_state=1)


# subset_X = torch.cat((X_train_1, X_train_2),dim=0)
# subset_Y = torch.cat((Y_train_1, Y_train_2),dim=0)
# # final_subset = torch.utils.data.ConcatDataset([subset_X, subset_Y], dim=1)


# print(subset_X.shape)
# print(subset_Y.shape)