In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading https://files.pythonhosted.org/packages/e7/d3/11f9901d75f4d105b2b1700c81f83579fd33c4cf0ec88bb7a165d96c7bb4/torchinfo-0.1.5-py3-none-any.whl
Installing collected packages: torchinfo
Successfully installed torchinfo-0.1.5


In [None]:
import os
import gc
from copy import deepcopy

import numpy as np

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary

from tqdm import tqdm
from torchvision import datasets, transforms
import torchvision.models as models

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [None]:
torch.manual_seed(0)

In [None]:
!bash get_data.sh

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Utils

In [None]:
def save_model(path, model):
    example = torch.rand(1, 3, 224, 224)
    traced_script_module = torch.jit.trace(model.cpu(), example)
    traced_script_module.save(path+"_trace")
    torch.save(model, path+"_torch")

## Training
This cell uses starter code from University of Wrocław Neural Network Course - [Assignment 3](https://github.com/janchorowski/dl_uwr)

In [None]:
def compute_error_rate(model, data_loader, cuda=True, verbose = False, delta = 0.5, infer = False):
    model.eval()
    num_errs = 0.0
    num_examples = 0
    pred_fun = nn.Sigmoid()
    
    resmap = []
    for x, y in data_loader:
        if cuda:
            x = x.cuda()
            y = y.cuda()

        with torch.no_grad():
            outputs = model.forward(x).view(-1)
            predictions = pred_fun(outputs) > delta
            num_errs += (predictions != y).sum().item()
            num_examples += x.size(0)
            
            if infer:
                for out, dey in zip(outputs, y):
                    resmap.append((float(pred_fun(out)), int(dey)))
    
    if infer:
        sorted_resmap = list(sorted(resmap))
        all_zero_labels, all_one_labels = 0, 0
        for _, label in sorted_resmap:
            if label == 0:
                all_zero_labels += 1
            else:
                all_one_labels += 1
                
        all_labels = all_zero_labels + all_one_labels
        
        
        # na początku mówimy że delta = 0.0
        # mówimy że wszystko > 0.0 traktujemy jako 1.
        # wtedy, poprawnie klasyfikujemy wszystkie 1, ale wszystkie zera mylimy.
        
        missclasified_ones = 0
        missclasified_zeros = all_zero_labels
        
        best_err = 10.0
        delta = 0.0
        for prob, label in sorted_resmap:
            if label == 0:
                #przesunęliśmy się w prawo, i dodaliśmy zero w lewo. To znaczy że teraz je poprawnie klasyf.
                missclasified_zeros -= 1
            else:
                #przesunęliśmy się w prawo, i dodaliśmy jeden w lewo. To znaczy że teraz je źle klasyf.
                missclasified_ones += 1
            
            t_rate = (missclasified_zeros + missclasified_ones)/all_labels
            if t_rate < best_err:
                best_err = t_rate
                delta = prob
                
        return delta, best_err
        #print(f"all {all_labels} zero {all_zero_labels} ones {all_one_labels}")
                        
    return 100.0 * num_errs / num_examples


def train(
    model, 
    data_loaders, 
    optimizer, 
    criterion, 
    num_epochs=1, 
    log_every=100, 
    cuda=True,
    verbose=True):

    if cuda:
        model.cuda()

    iter_ = 0
    epoch = 0
    best_params = None
    best_val_err = np.inf
    history = {"train_losses": [], "train_errs": [], "val_errs": []}
    pred_fun = nn.Sigmoid()

    if verbose:
        print("Training the model!")
        print("You can interrupt it at any time.")
    try:
        while epoch < num_epochs:
            model.train()
            gc.collect()
            epoch += 1

            for x, y in data_loaders["train"]:

                if cuda:
                    x = x.cuda()
                    y = y.cuda()

                iter_ += 1
                optimizer.zero_grad()
                out = model(x).view(-1)

                loss = criterion(out, y.float())
                loss.backward()
                optimizer.step()
                predictions = pred_fun(out) > 0.5
                err_rate = 100.0 * (predictions != y).sum() / y.size(0)

                history["train_losses"].append(loss.item())
                history["train_errs"].append(err_rate.item())

                                
                if iter_ % log_every == 0 and verbose:
                    print(
                        "Minibatch {0: >6}  | loss {1: >5.2f} | err rate {2: >5.2f}%".format(
                            iter_, loss.item(), err_rate
                        )
                    )

            val_err_rate = compute_error_rate(model, data_loaders["val"], cuda)
            history["val_errs"].append((iter_, val_err_rate))

            if val_err_rate < best_val_err:
        
                best_epoch = epoch
                best_val_err = val_err_rate
                best_params = [p.detach().cpu() for p in model.parameters()]
                
            m = "After epoch {0: >2} | valid err rate: {1: >5.2f}% | doing {2: >3} epochs".format(
                epoch, val_err_rate, num_epochs
            )
            if verbose:
                print("{0}\n{1}\n{0}".format("-" * len(m), m))

    except KeyboardInterrupt:
        pass
    if best_params is not None and verbose:
        print("\nLoading best params on validation set (epoch %d)\n" % (best_epoch))
        with torch.no_grad():
            for param, best_param in zip(model.parameters(), best_params):
                param[...] = best_param
    if verbose:
       plot_history(history)

def plot_history(history):
    figsize(16, 4)
    subplot(1, 2, 1)
    train_loss = np.array(history["train_losses"])
    semilogy(np.arange(train_loss.shape[0]), train_loss, label="batch train loss")
    legend()

    subplot(1, 2, 2)
    train_errs = np.array(history["train_errs"])
    plot(np.arange(train_errs.shape[0]), train_errs, label="batch train error rate")
    val_errs = np.array(history["val_errs"])
    plot(val_errs[:, 0], val_errs[:, 1], label="validation error rate", color="r")
    ylim(0, 20)
    legend()

# Dataset

In [None]:
class Subset(Dataset):
    r"""
    Subset of a dataset at specified indices.

    Arguments:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
    """
    def __init__(self, dataset, indices, transform):
        self.dataset = dataset
        self.indices = indices
        self.transform = transform

    def __getitem__(self, idx):
        im, labels = self.dataset[self.indices[idx]]
        return self.transform(im), labels

    def __len__(self):
        return len(self.indices)

### Cats from catsvsdogs kaggledataset for Cat images and CalltechDataset for NonCats images

In [None]:
path = 'Dataset'
caltech_and_cats = datasets.ImageFolder(path)
print(f"Dataset Size: {len(caltech_and_cats)}")
print(f"Dataset Classes: {caltech_and_cats.classes}")
print(f"Number of Cats: {sum(np.array(caltech_and_cats.targets)==0)}")
print(f"Number of NonCats: {sum(np.array(caltech_and_cats.targets)==1)}")
assert len(caltech_and_cats.classes) == 2, "if not restart notebook"

Dataset Size: 21643
Dataset Classes: ['Cats', 'NonCats']
Number of Cats: 12499
Number of NonCats: 9144


### Standard Imagenet Transforms ( We are using model pretrained on imagenet)

In [None]:
image_net_transforms = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ]
    )

### Dataset split 80-10-10

In [None]:
all_indexes = np.random.permutation(len(caltech_and_cats))
idxs = len(all_indexes)
train_indexes = all_indexes[:int(idxs * 0.8)]
val_indexes = all_indexes[int(idxs * 0.9):]


# Train/Test Loaders

In [None]:
train_dataset = Subset(
    caltech_and_cats,
    train_indexes,
    image_net_transforms
)


test_dataset = Subset(
    caltech_and_cats,
    val_indexes,
    image_net_transforms
)


datasets_ = {
    "train": train_dataset,
    "val": train_dataset,
}


batch_size = 64

train_data_loaders = {
    "train": torch.utils.data.DataLoader(
        datasets_["train"], batch_size=batch_size, num_workers=2, shuffle=True),
    "val": torch.utils.data.DataLoader(
        datasets_["val"], batch_size=batch_size, num_workers=2, shuffle=False)}

## Plain Pretrained Models

In [None]:
model = torch.load('/content/drive/MyDrive/Models/MobileNetV3_small_torch')
device = 'cuda'
model = model.to(device)

In [None]:
summary(model)

Layer (type:depth-idx)                        Param #
Sequential                                    --
├─Sequential: 1-1                             --
│    └─ConvBNActivation: 2-1                  --
│    │    └─Conv2d: 3-1                       (432)
│    │    └─BatchNorm2d: 3-2                  (32)
│    │    └─Hardswish: 3-3                    --
│    └─InvertedResidual: 2-2                  --
│    │    └─Sequential: 3-4                   (744)
│    └─InvertedResidual: 2-3                  --
│    │    └─Sequential: 3-5                   (3,864)
│    └─InvertedResidual: 2-4                  --
│    │    └─Sequential: 3-6                   (5,416)
│    └─InvertedResidual: 2-5                  --
│    │    └─Sequential: 3-7                   (13,736)
│    └─InvertedResidual: 2-6                  --
│    │    └─Sequential: 3-8                   (57,264)
│    └─InvertedResidual: 2-7                  --
│    │    └─Sequential: 3-9                   (57,264)
│    └─InvertedResidual: 2-8

In [None]:
def verify_prune(imodel):
    allzero = 0
    gallw = 0

    for k, v in dict(imodel.named_modules()).items():
        if ((len(list(v.children())) == 0)):
            if str(v) not in [
                "Hardswish()", 
                "ReLU(inplace=True)", 
                "Identity()", 
                "AdaptiveAvgPool2d(output_size=1)",
                "Flatten(start_dim=1, end_dim=-1)"]:

                zerow = float(torch.sum(v.weight == 0))
                allw = float(torch.sum(v.weight > -10000000.0))
                if v.bias != None:
                    zerow += float(torch.sum(v.bias == 0))
                    allw += float(torch.sum(v.bias > -10000000.0))

                #print(str(v).split("(")[0], 100.0 * zerow/allw)
                allzero += zerow
                gallw += allw

    return(allzero/gallw * 100.0)

print("prune factor", verify_prune(model))

prune factor 0.0


In [None]:
val_err_rate = compute_error_rate(model, train_data_loaders["val"], cuda = True, verbose=True)
print("model val err rate", val_err_rate)

model val err rate 2.500866350929883


In [None]:
import torch.nn.utils.prune as prune

def testprune(startmodel, prune_factor, cuda = True):
    testmodel = deepcopy(startmodel)
    parameter_to_prune = []
    for k, v in dict(testmodel.named_modules()).items():
        if ((len(list(v.children())) == 0)):
            if str(v) not in [
                "Hardswish()", 
                "ReLU(inplace=True)", 
                "Identity()", 
                "AdaptiveAvgPool2d(output_size=1)",
                "Flatten(start_dim=1, end_dim=-1)"]:

                parameter_to_prune.append((v, "weight"))

    prune.global_unstructured(
        parameter_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=prune_factor,
    )
    
    print(f"prune target {prune_factor}")
    print(f"actually pruned {verify_prune(testmodel)}")
    
    delta, best_err = compute_error_rate(testmodel, train_data_loaders["val"], cuda = cuda, verbose=True, infer=True)
    print("best test err", best_err, "at", delta)

    print("naive delta=0.5 for validation")
    val_err = compute_error_rate(testmodel, train_data_loaders["val"], cuda = cuda, verbose=True, delta=0.5)
    print(f"error {val_err}")
    print(f"infered delta={delta} for validation")
    val_err = compute_error_rate(testmodel, train_data_loaders["val"], cuda = cuda, verbose=True, delta=delta)
    print(f"error {val_err}")

    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(testmodel.parameters(), lr=0.0001)
    
    print("retraining model")    
    train(testmodel, train_data_loaders, optimizer, criterion, num_epochs=3, log_every=50, cuda = True, verbose=False)
    print("verify prune")
    print(verify_prune(testmodel))

    val_err = compute_error_rate(testmodel, train_data_loaders["val"], cuda = cuda, verbose=True, delta=0.5)
    print("best test err", best_err, "at", delta)

    print("naive delta=0.5 for validation")
    val_err = compute_error_rate(testmodel, train_data_loaders["val"], cuda = cuda, verbose=True, delta=0.5)
    print(f"error {val_err}")
    print(f"infered delta={delta} for validation")
    val_err = compute_error_rate(testmodel, train_data_loaders["val"], cuda = cuda, verbose=True, delta=delta)
    print(f"error {val_err}")
    
    
    return testmodel
    
outmodel = testprune(model, 0.50)

prune target 0.5
actually pruned 49.5178339451371
best test err 0.16056370567171074 at 0.6114173531532288
naive delta=0.5 for validation
error 31.673789996534595
infered delta=0.6114173531532288 for validation
error 16.056370567171076
retraining model
verify prune
49.5178339451371
best test err 0.16056370567171074 at 0.6114173531532288
naive delta=0.5 for validation
error 4.9844056832621
infered delta=0.6114173531532288 for validation
error 7.716298948827538


In [None]:
def save_model(path, model):
    example = torch.rand(1, 3, 224, 224)
    traced_script_module = torch.jit.trace(model.cpu(), example)
    traced_script_module.save(path+"_trace")
    torch.save(model, path+"_torch")

In [None]:
save_model('mobilenet_v3_pruned', outmodel)