# MobileNetV2 Compression (Kaggle)
### Real parameter reduction + structured pruning + INT8 quantization (PTQ + QAT)
- Parameter decrease: `width_mult`
- Structured pruning: channel pruning with `torch-pruning`
- Quantization: INT8 PTQ + QAT (CPU / fbgemm)

> If you use your own dataset, replace **CELL 3** only.


## CELL 1 — Install (Kaggle)


In [3]:
!pip -q install torchinfo thop pandas torch-pruning


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.2/70.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h

## CELL 2 — Imports + Setup


In [4]:
import os, time, copy, random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
from torchvision import datasets, transforms
from torchvision.models import mobilenet_v2
from torchvision.models.quantization import mobilenet_v2 as qmobilenet_v2

from thop import profile
import torch_pruning as tp

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)

def seed_all(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

seed_all(42)




DEVICE: cuda


## CELL 3 — Dataset (CIFAR-10 example)
Replace this cell if you have a custom Kaggle dataset.


In [5]:
BATCH = 128
NUM_WORKERS = 2
NUM_CLASSES = 10

# MobileNetV2 = ImageNet normalization + 224 resize
train_tf = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
])
test_tf = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
])

train_ds = datasets.CIFAR10("./data", train=True, download=True, transform=train_tf)
test_ds  = datasets.CIFAR10("./data", train=False, download=True, transform=test_tf)

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)


100%|██████████| 170M/170M [00:02<00:00, 57.1MB/s] 


## CELL 4 — Helpers (Acc, Params, Size, Latency, FLOPs)


In [6]:
@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    correct, total = 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        out = model(x)
        pred = out.argmax(1)
        correct += (pred == y).sum().item()
        total += y.numel()
    return correct / total

def count_params(model):
    return sum(p.numel() for p in model.parameters())

def size_mb_fp32(model):
    return count_params(model) * 4 / (1024**2)

@torch.no_grad()
def latency_ms(model, device, input_shape=(1,3,224,224), warmup=30, iters=100):
    model.eval()
    x = torch.randn(*input_shape).to(device)
    for _ in range(warmup):
        _ = model(x)
    if device == "cuda":
        torch.cuda.synchronize()
    t0 = time.time()
    for _ in range(iters):
        _ = model(x)
    if device == "cuda":
        torch.cuda.synchronize()
    t1 = time.time()
    return (t1-t0)*1000/iters

def flops_params(model, device, input_shape=(1,3,224,224)):
    model.eval()
    x = torch.randn(*input_shape).to(device)
    macs, params = profile(model, inputs=(x,), verbose=False)
    return 2*macs, params


## CELL 5 — Train / Fine-tune


In [7]:
def train_model(model, train_loader, test_loader, epochs=3, lr=1e-4, device=DEVICE):
    model = model.to(device)
    opt = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    best_acc = 0.0
    best = copy.deepcopy(model.state_dict())

    for ep in range(1, epochs+1):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            opt.zero_grad()
            loss = loss_fn(model(x), y)
            loss.backward()
            opt.step()

        acc = evaluate(model, test_loader, device)
        print(f"epoch {ep}/{epochs} | acc={acc:.4f}")
        if acc > best_acc:
            best_acc = acc
            best = copy.deepcopy(model.state_dict())

    model.load_state_dict(best)
    return model, best_acc


# PART A — Parameter Decrease (width_mult)


## CELL 6 — Build MobileNetV2 with width_mult


In [9]:
import torch
import torch.nn as nn
import torchvision
from torchvision.models import mobilenet_v2

# Utility to count parameters
def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Build MobileNetV2 with safe pretrained loading
def build_mobilenet(num_classes=10, width_mult=1.0, pretrained=True):
    weights = torchvision.models.MobileNet_V2_Weights.DEFAULT if pretrained else None
    m = mobilenet_v2(weights=None, width_mult=width_mult)  # start with no weights
    m.classifier[1] = nn.Linear(m.last_channel, num_classes)

    if pretrained:
        # load only compatible pretrained weights
        pretrained_model = mobilenet_v2(weights=weights)
        pretrained_dict = pretrained_model.state_dict()
        model_dict = m.state_dict()
        # keep only matching layers
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and v.size() == model_dict[k].size()}
        model_dict.update(pretrained_dict)
        m.load_state_dict(model_dict)

    return m

# Example usage
NUM_CLASSES = 10

mb_full  = build_mobilenet(NUM_CLASSES, width_mult=1.0, pretrained=True)
mb_small = build_mobilenet(NUM_CLASSES, width_mult=0.5, pretrained=True)

print("params full :", count_params(mb_full))
print("params small:", count_params(mb_small))


params full : 2236682
params small: 700490


## CELL 7 — Fine-tune baselines


In [11]:
mb_full,  acc_full  = train_model(mb_full,  train_loader, test_loader, epochs=3, lr=1e-4)
mb_small, acc_small = train_model(mb_small, train_loader, test_loader, epochs=3, lr=1e-4)

print("acc_full :", acc_full)
print("acc_small:", acc_small)


epoch 1/3 | acc=0.9076
epoch 2/3 | acc=0.9254
epoch 3/3 | acc=0.9338
epoch 1/3 | acc=0.3407
epoch 2/3 | acc=0.4325
epoch 3/3 | acc=0.4696
acc_full : 0.9338
acc_small: 0.4696


# PART B — Structured Pruning 
This removes channels (not just zeroing weights) so params actually go down.


## CELL 8 — Structured Channel Pruning with torch-pruning


In [12]:

def structured_prune_mobilenet(model, example_input, prune_ratio=0.30):
    """Real parameter reduction by removing channels."""
    model = copy.deepcopy(model).cpu().eval()

    imp = tp.importance.MagnitudeImportance(p=1)

    ignored_layers = []
    for m in model.modules():
        if isinstance(m, nn.Linear) and m.out_features == NUM_CLASSES:
            ignored_layers.append(m)

    pruner = tp.pruner.MagnitudePruner(
        model,
        example_inputs=example_input,
        importance=imp,
        pruning_ratio=prune_ratio,
        ignored_layers=ignored_layers,
    )

    pruner.step()
    return model

example_inp = torch.randn(1,3,224,224)
mb_struct_pruned = structured_prune_mobilenet(mb_full, example_inp, prune_ratio=0.30).to(DEVICE)

acc_struct = evaluate(mb_struct_pruned, test_loader, DEVICE)
print("Structured-pruned acc (before finetune):", acc_struct)
print("Params after structured prune:", count_params(mb_struct_pruned))


Structured-pruned acc (before finetune): 0.0976
Params after structured prune: 1116172


## CELL 9 — Fine-tune after structured pruning


In [13]:
mb_struct_pruned, acc_struct_ft = train_model(mb_struct_pruned, train_loader, test_loader, epochs=2, lr=5e-5)
print("Structured-pruned + finetune acc:", acc_struct_ft)


epoch 1/2 | acc=0.7309
epoch 2/2 | acc=0.7839
Structured-pruned + finetune acc: 0.7839


# PART C — Quantization (INT8) on CPU
- PTQ = faster, simpler
- QAT = best INT8 accuracy

Note: INT8 runs on CPU (fbgemm).


## CELL 10 — Build quant-ready MobileNetV2 and load weights


In [15]:

import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torch.ao.quantization as tq
import torchvision
from torchvision.models import mobilenet_v2, qmobilenet_v2

# ---- 1. Remove pruning masks BEFORE quantization ----
def remove_pruning(model):
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            if hasattr(m, "weight_orig"):
                prune.remove(m, "weight")
    return model

mb_struct_pruned = remove_pruning(mb_struct_pruned).cpu().eval()

# ---- 2. Build quant-ready model with SAME architecture ----
def build_quantready_mobilenet(num_classes=10, width_mult=1.0, from_float_model=None):
    qmodel = qmobilenet_v2(
        weights=None,
        quantize=False,
        width_mult=width_mult
    )
    qmodel.classifier[1] = nn.Linear(qmodel.last_channel, num_classes)

    if from_float_model is not None:
        # architectures MUST match
        qmodel.load_state_dict(from_float_model.state_dict(), strict=True)

    return qmodel

# ---- 3. Create quant-ready model ----
mb_qready = build_quantready_mobilenet(
    NUM_CLASSES,
    width_mult=1.0,            # MUST match pruned model
    from_float_model=mb_struct_pruned
)

# ---- 4. Prepare for quantization ----
mb_qready.qconfig = tq.get_default_qat_qconfig("fbgemm")
tq.prepare_qat(mb_qready, inplace=True)

print("✅ Quant-ready model built successfully")


ImportError: cannot import name 'qmobilenet_v2' from 'torchvision.models' (/usr/local/lib/python3.12/dist-packages/torchvision/models/__init__.py)

In [1]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torch.ao.quantization as tq
import torchvision
from torchvision.models import mobilenet_v2

# ---- 1. Remove pruning masks BEFORE quantization ----
def remove_pruning(model):
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            if hasattr(m, "weight_orig"):
                prune.remove(m, "weight")
    return model

mb_struct_pruned = remove_pruning(mb_struct_pruned).cpu().eval()

# ---- 2. Fuse Conv + BN + ReLU (REQUIRED for quantization) ----
def fuse_mobilenet(model):
    for m in model.modules():
        if isinstance(m, torchvision.models.mobilenetv2.InvertedResidual):
            if len(m.conv) == 3:
                tq.fuse_modules(m.conv, ["0", "1", "2"], inplace=True)
            elif len(m.conv) == 6:
                tq.fuse_modules(m.conv, ["0", "1", "2"], inplace=True)
                tq.fuse_modules(m.conv, ["3", "4", "5"], inplace=True)
    return model

mb_struct_pruned = fuse_mobilenet(mb_struct_pruned)

# ---- 3. Assign Quantization-Aware Training config ----
mb_struct_pruned.qconfig = tq.get_default_qat_qconfig("fbgemm")

# ---- 4. Prepare for QAT ----
tq.prepare_qat(mb_struct_pruned, inplace=True)

print("✅ Quantization-ready MobileNetV2 (qmobilenet_v2 NOT required)")


NameError: name 'mb_struct_pruned' is not defined

In [16]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torch.ao.quantization as tq
import torchvision
from torchvision.models import mobilenet_v2

# ---- 1. Remove pruning reparameterization ----
def remove_pruning(model):
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            if hasattr(m, "weight_orig"):
                prune.remove(m, "weight")
    return model

mb_pruned = remove_pruning(mb_struct_pruned).cpu().eval()

# ---- 2. Fuse MobileNetV2 modules (required for quantization) ----
def fuse_mobilenet(model):
    for m in model.modules():
        if isinstance(m, torchvision.models.mobilenetv2.InvertedResidual):
            if m.use_res_connect:
                tq.fuse_modules(m.conv, ["0", "1"], inplace=True)
            else:
                tq.fuse_modules(m.conv, ["0", "1"], inplace=True)
                tq.fuse_modules(m.conv, ["3", "4"], inplace=True)
    return model

mb_pruned = fuse_mobilenet(mb_pruned)

# ---- 3. Assign QAT configuration ----
mb_pruned.qconfig = tq.get_default_qat_qconfig("fbgemm")

# ---- 4. Prepare for Quantization-Aware Training ----
tq.prepare_qat(mb_pruned, inplace=True)

print("✅ Quantization-ready MobileNetV2 (no qmobilenet_v2 needed)")


AssertionError: did not find fuser method for: (<class 'torchvision.ops.misc.Conv2dNormActivation'>, <class 'torch.nn.modules.conv.Conv2d'>) 

In [14]:
import torch.ao.quantization as tq

def build_quantready_mobilenet(num_classes=10, width_mult=1.0, from_float_model=None):
    q = qmobilenet_v2(weights=None, quantize=False, width_mult=width_mult)  # float, quant-ready
    q.classifier[1] = nn.Linear(q.last_channel, num_classes)
    if from_float_model is not None:
        q.load_state_dict(from_float_model.state_dict(), strict=False)
    return q

# Best practice: quantize the pruned+finetuned model
mb_qready = build_quantready_mobilenet(NUM_CLASSES, width_mult=1.0, from_float_model=mb_struct_pruned.to("cpu"))


RuntimeError: Error(s) in loading state_dict for QuantizableMobileNetV2:
	size mismatch for features.0.0.weight: copying a param with shape torch.Size([22, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 3, 3, 3]).
	size mismatch for features.0.1.weight: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.0.1.bias: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.0.1.running_mean: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.0.1.running_var: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.1.conv.0.0.weight: copying a param with shape torch.Size([22, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 1, 3, 3]).
	size mismatch for features.1.conv.0.1.weight: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.1.conv.0.1.bias: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.1.conv.0.1.running_mean: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.1.conv.0.1.running_var: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.1.conv.1.weight: copying a param with shape torch.Size([11, 22, 1, 1]) from checkpoint, the shape in current model is torch.Size([16, 32, 1, 1]).
	size mismatch for features.1.conv.2.weight: copying a param with shape torch.Size([11]) from checkpoint, the shape in current model is torch.Size([16]).
	size mismatch for features.1.conv.2.bias: copying a param with shape torch.Size([11]) from checkpoint, the shape in current model is torch.Size([16]).
	size mismatch for features.1.conv.2.running_mean: copying a param with shape torch.Size([11]) from checkpoint, the shape in current model is torch.Size([16]).
	size mismatch for features.1.conv.2.running_var: copying a param with shape torch.Size([11]) from checkpoint, the shape in current model is torch.Size([16]).
	size mismatch for features.2.conv.0.0.weight: copying a param with shape torch.Size([67, 11, 1, 1]) from checkpoint, the shape in current model is torch.Size([96, 16, 1, 1]).
	size mismatch for features.2.conv.0.1.weight: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.0.1.bias: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.0.1.running_mean: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.0.1.running_var: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.1.0.weight: copying a param with shape torch.Size([67, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([96, 1, 3, 3]).
	size mismatch for features.2.conv.1.1.weight: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.1.1.bias: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.1.1.running_mean: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.1.1.running_var: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.2.conv.2.weight: copying a param with shape torch.Size([16, 67, 1, 1]) from checkpoint, the shape in current model is torch.Size([24, 96, 1, 1]).
	size mismatch for features.2.conv.3.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.2.conv.3.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.2.conv.3.running_mean: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.2.conv.3.running_var: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.3.conv.0.0.weight: copying a param with shape torch.Size([100, 16, 1, 1]) from checkpoint, the shape in current model is torch.Size([144, 24, 1, 1]).
	size mismatch for features.3.conv.0.1.weight: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.0.1.bias: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.0.1.running_mean: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.0.1.running_var: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.1.0.weight: copying a param with shape torch.Size([100, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([144, 1, 3, 3]).
	size mismatch for features.3.conv.1.1.weight: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.1.1.bias: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.1.1.running_mean: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.1.1.running_var: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.3.conv.2.weight: copying a param with shape torch.Size([16, 100, 1, 1]) from checkpoint, the shape in current model is torch.Size([24, 144, 1, 1]).
	size mismatch for features.3.conv.3.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.3.conv.3.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.3.conv.3.running_mean: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.3.conv.3.running_var: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([24]).
	size mismatch for features.4.conv.0.0.weight: copying a param with shape torch.Size([100, 16, 1, 1]) from checkpoint, the shape in current model is torch.Size([144, 24, 1, 1]).
	size mismatch for features.4.conv.0.1.weight: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.0.1.bias: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.0.1.running_mean: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.0.1.running_var: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.1.0.weight: copying a param with shape torch.Size([100, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([144, 1, 3, 3]).
	size mismatch for features.4.conv.1.1.weight: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.1.1.bias: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.1.1.running_mean: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.1.1.running_var: copying a param with shape torch.Size([100]) from checkpoint, the shape in current model is torch.Size([144]).
	size mismatch for features.4.conv.2.weight: copying a param with shape torch.Size([22, 100, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 144, 1, 1]).
	size mismatch for features.4.conv.3.weight: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.4.conv.3.bias: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.4.conv.3.running_mean: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.4.conv.3.running_var: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.5.conv.0.0.weight: copying a param with shape torch.Size([134, 22, 1, 1]) from checkpoint, the shape in current model is torch.Size([192, 32, 1, 1]).
	size mismatch for features.5.conv.0.1.weight: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.0.1.bias: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.0.1.running_mean: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.0.1.running_var: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.1.0.weight: copying a param with shape torch.Size([134, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([192, 1, 3, 3]).
	size mismatch for features.5.conv.1.1.weight: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.1.1.bias: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.1.1.running_mean: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.1.1.running_var: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.5.conv.2.weight: copying a param with shape torch.Size([22, 134, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 192, 1, 1]).
	size mismatch for features.5.conv.3.weight: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.5.conv.3.bias: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.5.conv.3.running_mean: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.5.conv.3.running_var: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.6.conv.0.0.weight: copying a param with shape torch.Size([134, 22, 1, 1]) from checkpoint, the shape in current model is torch.Size([192, 32, 1, 1]).
	size mismatch for features.6.conv.0.1.weight: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.0.1.bias: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.0.1.running_mean: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.0.1.running_var: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.1.0.weight: copying a param with shape torch.Size([134, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([192, 1, 3, 3]).
	size mismatch for features.6.conv.1.1.weight: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.1.1.bias: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.1.1.running_mean: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.1.1.running_var: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.6.conv.2.weight: copying a param with shape torch.Size([22, 134, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 192, 1, 1]).
	size mismatch for features.6.conv.3.weight: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.6.conv.3.bias: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.6.conv.3.running_mean: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.6.conv.3.running_var: copying a param with shape torch.Size([22]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for features.7.conv.0.0.weight: copying a param with shape torch.Size([134, 22, 1, 1]) from checkpoint, the shape in current model is torch.Size([192, 32, 1, 1]).
	size mismatch for features.7.conv.0.1.weight: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.0.1.bias: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.0.1.running_mean: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.0.1.running_var: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.1.0.weight: copying a param with shape torch.Size([134, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([192, 1, 3, 3]).
	size mismatch for features.7.conv.1.1.weight: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.1.1.bias: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.1.1.running_mean: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.1.1.running_var: copying a param with shape torch.Size([134]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for features.7.conv.2.weight: copying a param with shape torch.Size([44, 134, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 192, 1, 1]).
	size mismatch for features.7.conv.3.weight: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.7.conv.3.bias: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.7.conv.3.running_mean: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.7.conv.3.running_var: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.8.conv.0.0.weight: copying a param with shape torch.Size([268, 44, 1, 1]) from checkpoint, the shape in current model is torch.Size([384, 64, 1, 1]).
	size mismatch for features.8.conv.0.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.0.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.0.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.0.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.1.0.weight: copying a param with shape torch.Size([268, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([384, 1, 3, 3]).
	size mismatch for features.8.conv.1.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.1.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.1.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.1.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.8.conv.2.weight: copying a param with shape torch.Size([44, 268, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 384, 1, 1]).
	size mismatch for features.8.conv.3.weight: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.8.conv.3.bias: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.8.conv.3.running_mean: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.8.conv.3.running_var: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.9.conv.0.0.weight: copying a param with shape torch.Size([268, 44, 1, 1]) from checkpoint, the shape in current model is torch.Size([384, 64, 1, 1]).
	size mismatch for features.9.conv.0.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.0.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.0.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.0.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.1.0.weight: copying a param with shape torch.Size([268, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([384, 1, 3, 3]).
	size mismatch for features.9.conv.1.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.1.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.1.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.1.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.9.conv.2.weight: copying a param with shape torch.Size([44, 268, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 384, 1, 1]).
	size mismatch for features.9.conv.3.weight: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.9.conv.3.bias: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.9.conv.3.running_mean: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.9.conv.3.running_var: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.10.conv.0.0.weight: copying a param with shape torch.Size([268, 44, 1, 1]) from checkpoint, the shape in current model is torch.Size([384, 64, 1, 1]).
	size mismatch for features.10.conv.0.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.0.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.0.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.0.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.1.0.weight: copying a param with shape torch.Size([268, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([384, 1, 3, 3]).
	size mismatch for features.10.conv.1.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.1.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.1.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.1.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.10.conv.2.weight: copying a param with shape torch.Size([44, 268, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 384, 1, 1]).
	size mismatch for features.10.conv.3.weight: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.10.conv.3.bias: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.10.conv.3.running_mean: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.10.conv.3.running_var: copying a param with shape torch.Size([44]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for features.11.conv.0.0.weight: copying a param with shape torch.Size([268, 44, 1, 1]) from checkpoint, the shape in current model is torch.Size([384, 64, 1, 1]).
	size mismatch for features.11.conv.0.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.0.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.0.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.0.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.1.0.weight: copying a param with shape torch.Size([268, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([384, 1, 3, 3]).
	size mismatch for features.11.conv.1.1.weight: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.1.1.bias: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.1.1.running_mean: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.1.1.running_var: copying a param with shape torch.Size([268]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for features.11.conv.2.weight: copying a param with shape torch.Size([67, 268, 1, 1]) from checkpoint, the shape in current model is torch.Size([96, 384, 1, 1]).
	size mismatch for features.11.conv.3.weight: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.11.conv.3.bias: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.11.conv.3.running_mean: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.11.conv.3.running_var: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.12.conv.0.0.weight: copying a param with shape torch.Size([403, 67, 1, 1]) from checkpoint, the shape in current model is torch.Size([576, 96, 1, 1]).
	size mismatch for features.12.conv.0.1.weight: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.0.1.bias: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.0.1.running_mean: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.0.1.running_var: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.1.0.weight: copying a param with shape torch.Size([403, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([576, 1, 3, 3]).
	size mismatch for features.12.conv.1.1.weight: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.1.1.bias: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.1.1.running_mean: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.1.1.running_var: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.12.conv.2.weight: copying a param with shape torch.Size([67, 403, 1, 1]) from checkpoint, the shape in current model is torch.Size([96, 576, 1, 1]).
	size mismatch for features.12.conv.3.weight: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.12.conv.3.bias: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.12.conv.3.running_mean: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.12.conv.3.running_var: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.13.conv.0.0.weight: copying a param with shape torch.Size([403, 67, 1, 1]) from checkpoint, the shape in current model is torch.Size([576, 96, 1, 1]).
	size mismatch for features.13.conv.0.1.weight: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.0.1.bias: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.0.1.running_mean: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.0.1.running_var: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.1.0.weight: copying a param with shape torch.Size([403, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([576, 1, 3, 3]).
	size mismatch for features.13.conv.1.1.weight: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.1.1.bias: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.1.1.running_mean: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.1.1.running_var: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.13.conv.2.weight: copying a param with shape torch.Size([67, 403, 1, 1]) from checkpoint, the shape in current model is torch.Size([96, 576, 1, 1]).
	size mismatch for features.13.conv.3.weight: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.13.conv.3.bias: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.13.conv.3.running_mean: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.13.conv.3.running_var: copying a param with shape torch.Size([67]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for features.14.conv.0.0.weight: copying a param with shape torch.Size([403, 67, 1, 1]) from checkpoint, the shape in current model is torch.Size([576, 96, 1, 1]).
	size mismatch for features.14.conv.0.1.weight: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.0.1.bias: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.0.1.running_mean: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.0.1.running_var: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.1.0.weight: copying a param with shape torch.Size([403, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([576, 1, 3, 3]).
	size mismatch for features.14.conv.1.1.weight: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.1.1.bias: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.1.1.running_mean: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.1.1.running_var: copying a param with shape torch.Size([403]) from checkpoint, the shape in current model is torch.Size([576]).
	size mismatch for features.14.conv.2.weight: copying a param with shape torch.Size([112, 403, 1, 1]) from checkpoint, the shape in current model is torch.Size([160, 576, 1, 1]).
	size mismatch for features.14.conv.3.weight: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.14.conv.3.bias: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.14.conv.3.running_mean: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.14.conv.3.running_var: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.15.conv.0.0.weight: copying a param with shape torch.Size([672, 112, 1, 1]) from checkpoint, the shape in current model is torch.Size([960, 160, 1, 1]).
	size mismatch for features.15.conv.0.1.weight: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.0.1.bias: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.0.1.running_mean: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.0.1.running_var: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.1.0.weight: copying a param with shape torch.Size([672, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([960, 1, 3, 3]).
	size mismatch for features.15.conv.1.1.weight: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.1.1.bias: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.1.1.running_mean: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.1.1.running_var: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.15.conv.2.weight: copying a param with shape torch.Size([112, 672, 1, 1]) from checkpoint, the shape in current model is torch.Size([160, 960, 1, 1]).
	size mismatch for features.15.conv.3.weight: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.15.conv.3.bias: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.15.conv.3.running_mean: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.15.conv.3.running_var: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.16.conv.0.0.weight: copying a param with shape torch.Size([672, 112, 1, 1]) from checkpoint, the shape in current model is torch.Size([960, 160, 1, 1]).
	size mismatch for features.16.conv.0.1.weight: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.0.1.bias: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.0.1.running_mean: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.0.1.running_var: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.1.0.weight: copying a param with shape torch.Size([672, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([960, 1, 3, 3]).
	size mismatch for features.16.conv.1.1.weight: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.1.1.bias: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.1.1.running_mean: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.1.1.running_var: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.16.conv.2.weight: copying a param with shape torch.Size([112, 672, 1, 1]) from checkpoint, the shape in current model is torch.Size([160, 960, 1, 1]).
	size mismatch for features.16.conv.3.weight: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.16.conv.3.bias: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.16.conv.3.running_mean: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.16.conv.3.running_var: copying a param with shape torch.Size([112]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for features.17.conv.0.0.weight: copying a param with shape torch.Size([672, 112, 1, 1]) from checkpoint, the shape in current model is torch.Size([960, 160, 1, 1]).
	size mismatch for features.17.conv.0.1.weight: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.0.1.bias: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.0.1.running_mean: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.0.1.running_var: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.1.0.weight: copying a param with shape torch.Size([672, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([960, 1, 3, 3]).
	size mismatch for features.17.conv.1.1.weight: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.1.1.bias: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.1.1.running_mean: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.1.1.running_var: copying a param with shape torch.Size([672]) from checkpoint, the shape in current model is torch.Size([960]).
	size mismatch for features.17.conv.2.weight: copying a param with shape torch.Size([224, 672, 1, 1]) from checkpoint, the shape in current model is torch.Size([320, 960, 1, 1]).
	size mismatch for features.17.conv.3.weight: copying a param with shape torch.Size([224]) from checkpoint, the shape in current model is torch.Size([320]).
	size mismatch for features.17.conv.3.bias: copying a param with shape torch.Size([224]) from checkpoint, the shape in current model is torch.Size([320]).
	size mismatch for features.17.conv.3.running_mean: copying a param with shape torch.Size([224]) from checkpoint, the shape in current model is torch.Size([320]).
	size mismatch for features.17.conv.3.running_var: copying a param with shape torch.Size([224]) from checkpoint, the shape in current model is torch.Size([320]).
	size mismatch for features.18.0.weight: copying a param with shape torch.Size([896, 224, 1, 1]) from checkpoint, the shape in current model is torch.Size([1280, 320, 1, 1]).
	size mismatch for features.18.1.weight: copying a param with shape torch.Size([896]) from checkpoint, the shape in current model is torch.Size([1280]).
	size mismatch for features.18.1.bias: copying a param with shape torch.Size([896]) from checkpoint, the shape in current model is torch.Size([1280]).
	size mismatch for features.18.1.running_mean: copying a param with shape torch.Size([896]) from checkpoint, the shape in current model is torch.Size([1280]).
	size mismatch for features.18.1.running_var: copying a param with shape torch.Size([896]) from checkpoint, the shape in current model is torch.Size([1280]).
	size mismatch for classifier.1.weight: copying a param with shape torch.Size([10, 896]) from checkpoint, the shape in current model is torch.Size([10, 1280]).

## CELL 11 — PTQ Static INT8 (CPU)


In [None]:
def calibrate(model, loader, num_batches=30):
    model.eval()
    with torch.no_grad():
        for i, (x, _) in enumerate(loader):
            if i >= num_batches: break
            model(x)

def ptq_int8(model_fp32_quantready, train_loader):
    m = copy.deepcopy(model_fp32_quantready).cpu().eval()
    torch.backends.quantized.engine = "fbgemm"

    m.qconfig = tq.get_default_qconfig("fbgemm")
    m_prepared = tq.prepare(m, inplace=False)

    calibrate(m_prepared, train_loader, num_batches=30)
    m_int8 = tq.convert(m_prepared, inplace=False)
    return m_int8

mb_ptq_int8 = ptq_int8(mb_qready, train_loader)
acc_ptq = evaluate(mb_ptq_int8, test_loader, device="cpu")
print("PTQ INT8 acc (CPU):", acc_ptq)


## CELL 12 — QAT INT8 (CPU)


In [None]:
def qat_int8(model_fp32_quantready, train_loader, test_loader, epochs=2, lr=5e-5):
    m = copy.deepcopy(model_fp32_quantready).cpu()
    torch.backends.quantized.engine = "fbgemm"

    m.qconfig = tq.get_default_qat_qconfig("fbgemm")
    m_prepared = tq.prepare_qat(m, inplace=False)

    opt = optim.Adam(m_prepared.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    for ep in range(1, epochs+1):
        m_prepared.train()
        for x, y in train_loader:
            x, y = x.cpu(), y.cpu()
            opt.zero_grad()
            loss = loss_fn(m_prepared(x), y)
            loss.backward()
            opt.step()

        m_prepared.eval()
        acc = evaluate(m_prepared, test_loader, device="cpu")
        print(f"QAT epoch {ep}/{epochs} | acc={acc:.4f}")

    m_int8 = tq.convert(m_prepared.eval(), inplace=False)
    return m_int8

mb_qat_int8 = qat_int8(mb_qready, train_loader, test_loader, epochs=2, lr=5e-5)
acc_qat = evaluate(mb_qat_int8, test_loader, device="cpu")
print("QAT INT8 acc (CPU):", acc_qat)


# PART D — Final Comparison Table


## CELL 13 — Report (Accuracy, Params, Size, FLOPs, Latency)


In [None]:
def row(name, model, device, note=""):
    model = model.to(device)
    acc = evaluate(model, test_loader, device)
    params = count_params(model)
    sz = round(size_mb_fp32(model), 2) if device != "cpu_int8" else "INT8"
    flops, _ = flops_params(model, device=("cpu" if device=="cpu" else device))
    lat = latency_ms(model, device=("cpu" if device=="cpu" else device))
    return {
        "model": name,
        "acc": round(acc, 4),
        "params": params,
        "size_mb(fp32 approx)": sz,
        "FLOPs": int(flops),
        "lat_ms": round(lat, 2),
        "note": note
    }

rows = []
rows.append(row("MobileNetV2 fp32 (width=1.0)", mb_full, DEVICE, "baseline"))
rows.append(row("MobileNetV2 fp32 (width=0.5)", mb_small, DEVICE, "parameter decrease via width_mult"))
rows.append(row("MobileNetV2 structured pruned fp32", mb_struct_pruned, DEVICE, "real params reduced (channel prune)"))
rows.append(row("MobileNetV2 PTQ INT8 (CPU)", mb_ptq_int8, "cpu", "post-training int8"))
rows.append(row("MobileNetV2 QAT INT8 (CPU)", mb_qat_int8, "cpu", "quantization-aware training"))

df = pd.DataFrame(rows).sort_values("acc", ascending=False)
df
