In [1]:
#Imports 
import torch 
import torch.nn as nn 
from conf import global_settings 
from model_opt import  evaluate, model_size, create_profiler 
from copy import deepcopy
from utils import get_test_dataloader
#from models.vgg import vgg19_bn
from torch.nn.utils.fusion import fuse_conv_bn_eval
from torch.quantization import quantize_dynamic


In [6]:
def report(model, testloader, device, input = None): 
    if not input:
        input = torch.randn((1,1,64,64), device = device) 

    top1error, top5error, t= evaluate(device, testloader, model= model, onnx = False, ort_session=None)
    print(f"Top-1 error: {top1error}")
    print(f"Top-5 error: {top5error}")
    print(f"Time per image: {t} ms")
    size_model = model_size(model)
    print(f"Model size: {size_model/1e3} MB")
    create_profiler(model, input, device)


In [15]:
import torch
import torch.nn as nn

cfg = {
    'A' : [64,     'M', 128,      'M', 256, 256,           'M', 512, 512,           'M', 512, 512,           'M'],
    'B' : [64, 64, 'M', 128, 128, 'M', 256, 256,           'M', 512, 512,           'M', 512, 512,           'M'],
    'D' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256,      'M', 512, 512, 512,      'M', 512, 512, 512,      'M'],
    'E' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
}

class VGG(nn.Module):

    def __init__(self, features, num_class=952):
        super().__init__()
        self.features = features

        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_class)
        )

    def forward(self, x):
        output = self.features(x)
        output = output.reshape(output.size()[0], -1)  # Changed from .view to .reshape
        output = self.classifier(output)

        return output

def make_layers(cfg, batch_norm=False):
    layers = []
    input_channel = 1  
    for l in cfg:
        if l == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            continue

        layers += [nn.Conv2d(input_channel, l, kernel_size=3, padding=1)]

        if batch_norm:
            layers += [nn.BatchNorm2d(l)]

        layers += [nn.ReLU(inplace=True)]
        input_channel = l

    return nn.Sequential(*layers)

def vgg11_bn():
    return VGG(make_layers(cfg['A'], batch_norm=True))

def vgg13_bn():
    return VGG(make_layers(cfg['B'], batch_norm=True))

def vgg16_bn():
    return VGG(make_layers(cfg['D'], batch_norm=True))

def vgg19_bn():
    return VGG(make_layers(cfg['E'], batch_norm=True))


In [13]:
device = 'cpu'
test_loader = get_test_dataloader(
        root_dir='data/chinese_char/952_test',
        batch_size=16,
        num_workers=4,
        shuffle=False
    )

In [16]:
baseline_model = vgg19_bn()
baseline_model.load_state_dict(torch.load('checkpoint/vgg19/Thursday_11_July_2024_15h_22m_43s/vgg19-80-best.pth' , map_location='cpu'))

  baseline_model.load_state_dict(torch.load('checkpoint/vgg19/Thursday_11_July_2024_15h_22m_43s/vgg19-80-best.pth' , map_location='cpu'))


<All keys matched successfully>

In [17]:
report(baseline_model, test_loader, device)

Evaluating...: 100%|██████████| 1015/1015 [04:56<00:00,  3.42it/s]


Top-1 error: 0.001970946788787842
Top-5 error: 0.00030797719955444336
Time per image: 18.11855486620527 ms
Model size: 196.508568 MB
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.94%     314.211us         0.94%     314.211us       2.455us       9.87 Mb       9.87 Mb           128  
                     aten::conv2d         0.35%     117.108us        74.38%      24.753ms       1.547ms       4.62 Mb           0 b            16  
                aten::convolution         0.91%     303.076us        74.03%      24.636ms       1.540ms       4.62 Mb          

In [18]:
#Quantize: Post training weight only quantization 
model_quantized_dynamic_float16 = quantize_dynamic(
    model=baseline_model, qconfig_spec={torch.nn.Linear}, dtype=torch.float16,
)
report(model_quantized_dynamic_float16, test_loader, device)



Evaluating...: 100%|██████████| 1015/1015 [04:26<00:00,  3.81it/s]


Top-1 error: 0.001970946788787842
Top-5 error: 0.00030797719955444336
Time per image: 16.23766715669315 ms
Model size: 196.51065 MB
----------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                              Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
----------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                       aten::empty         0.98%     299.071us         0.98%     299.071us       2.283us       9.87 Mb       9.87 Mb           131  
                      aten::conv2d         0.28%      86.099us        80.35%      24.604ms       1.538ms       4.62 Mb           0 b            16  
                 aten::convolution         0.79%     241.881us        80.07%      24.518ms       1.532ms       4.62 Mb     

In [19]:
model_quantized_dynamic_int8 = quantize_dynamic(
    model=baseline_model, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8,
)
report(model_quantized_dynamic_int8, test_loader, device)

Evaluating...: 100%|██████████| 1015/1015 [04:25<00:00,  3.82it/s]


Top-1 error: 0.001970946788787842
Top-5 error: 0.00030797719955444336
Time per image: 16.176997440499782 ms
Model size: 109.31513000000001 MB
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         1.17%     347.078us         1.17%     347.078us       2.590us       9.94 Mb       9.94 Mb           134  
                 aten::empty_like         0.23%      67.829us         0.43%     129.012us       6.790us       4.66 Mb           0 b            19  
                     aten::conv2d         0.30%      88.707us        83.39%      24.735ms       1.546ms       4.62 Mb 

In [20]:
from torch.nn.utils.fusion import fuse_conv_bn_eval


def fuse_all_conv_bn(model):
    stack = []
    for name, module in model.named_children(): # immediate children
        if list(module.named_children()): # is not empty (not a leaf)
            fuse_all_conv_bn(module)

        if isinstance(module, nn.BatchNorm2d):
            if isinstance(stack[-1][1], nn.Conv2d):
                setattr(model, stack[-1][0], fuse_conv_bn_eval(stack[-1][1], module))
                setattr(model, name, nn.Identity())
        else:
            stack.append((name, module))

def ptq(model, sample_loader, device='cpu', backend='fbgemm', fuse_bn=True):
    # running on a x86 CPU. Use backend="qnnpack" if running on ARM.
    m = deepcopy(model)
    m.eval()

    # Fuse
    if fuse_bn:
        fuse_all_conv_bn(m)

    # Insert stubs
    m = nn.Sequential(
        torch.quantization.QuantStub(),
        m,
        torch.quantization.DeQuantStub()
    )

    # Prepare
    m.qconfig = torch.quantization.get_default_qconfig(backend)
    torch.quantization.prepare(m, inplace=True)

    
    # Calibrate
    m.to(device)
    m.eval()
    with torch.no_grad():
        for data, target in sample_loader:
            data = data.to(device)
            m(data)

    # Convert
    torch.quantization.convert(m, inplace=True)

    return m

In [21]:
model_quantized_static_int8 = ptq(baseline_model, sample_loader=test_loader, device=device, backend='fbgemm', fuse_bn=False)
report(model_quantized_static_int8, test_loader, device)



Evaluating...: 100%|██████████| 1015/1015 [01:36<00:00, 10.57it/s]


Top-1 error: 0.0021557211875915527
Top-5 error: 0.00030797719955444336
Time per image: 5.769365510496491 ms
Model size: 49.523506000000005 MB
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
    aten::_empty_affine_quantized         1.37%     159.708us         1.37%     159.708us       3.895us       2.43 Mb       2.43 Mb            41  
                quantized::conv2d        67.04%       7.842ms        67.86%       7.939ms     496.164us       1.16 Mb           0 b            16  
          quantized::batch_norm2d         5.69%     665.653us         8.35%     976.859us      61.054us       1.16 Mb 

In [22]:
model_quantized_static_fuse_int8 = ptq(baseline_model, sample_loader=test_loader, device=device, backend='fbgemm', fuse_bn=True)
report(model_quantized_static_fuse_int8, test_loader, device)

Evaluating...: 100%|██████████| 1015/1015 [01:28<00:00, 11.41it/s]


Top-1 error: 0.0018477439880371094
Top-5 error: 0.00030797719955444336
Time per image: 5.336659710581709 ms
Model size: 49.406881999999996 MB
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
    aten::_empty_affine_quantized         1.62%     148.425us         1.62%     148.425us       5.937us       1.28 Mb       1.28 Mb            25  
                quantized::conv2d        67.29%       6.165ms        68.75%       6.299ms     393.688us       1.16 Mb           0 b            16  
                 aten::max_pool2d         0.22%      20.384us         1.73%     158.440us      31.688us     122.00 Kb 