In [1]:
# -*- coding: utf-8 -*-
'''Train CIFAR10 with PyTorch.'''

from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import os
import argparse

from models import *

from torch2trt import torch2trt

In [2]:
from torchvision.models import *

In [19]:
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

bs = 256

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True, num_workers=8)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=8)

print(trainset[0][0].shape)

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified
torch.Size([3, 32, 32])


In [23]:
x = trainset[0][0].unsqueeze(0)
for i in range(100):
    x = torch.cat((x, trainset[i][0].unsqueeze(0)))
x.size()

torch.Size([101, 3, 32, 32])

# make model

In [4]:
class mymodel(nn.Module):
    def __init__(self, basemodeel):
        super(mymodel, self).__init__()
        self.features = basemodel
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False)
        if model_name == "res34" or model_name == "res18":
            num_ch = 512
        else:
            num_ch = 2048
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(num_ch, 10, 1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.features(x)
        x = self.avgpool(x)#.squeeze(2).squeeze(2)
        x = self.fc1(x)
        return x

In [5]:
model_name = "res18"
basemodel = resnet18(pretrained=True)
basemodel = nn.Sequential(*list(basemodel.children())[1:-2])
net = mymodel(basemodel) 

inp = torch.rand(1,3,32,32)
net(inp).size()

torch.Size([1, 10, 1, 1])

In [6]:
net = net.to("cuda")
device = "cuda"

In [7]:
# load checkpoint
checkpoint = torch.load('./checkpoint/res18-ckpt.t7')
net.load_state_dict(checkpoint['net'])
best_acc = checkpoint['acc']
start_epoch = checkpoint['epoch']

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)

In [8]:
# test eval
def test(epoch, net):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs).squeeze(2).squeeze(2)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    # Save checkpoint.
    acc = 100.*correct/total
    print("acc:", acc)
    return acc

In [9]:
accbefore=test(0, net)

acc: 88.79


In [10]:
# make FP32 TensorRT model
# define input
input_size = [1, 3, 32, 32]
x = torch.zeros(input_size).cuda()

# convert to tensorrt models
net_trt = torch2trt(net, [x], max_batch_size=256)

fp32acc = test(0, net_trt)

acc: 88.79


In [11]:
# make FP16 TensorRT model
# define input
input_size = [1, 3, 32, 32]
x = torch.zeros(input_size).cuda()

# convert to tensorrt models
net_trt = torch2trt(net, [x], fp16_mode=True, max_batch_size=256)

In [12]:
fp16acc = test(0, net_trt)

acc: 88.77


In [14]:
# make INT8 TensorRT model
# define input
input_size = [1, 3, 32, 32]
x = torch.zeros(input_size).cuda()

# convert to tensorrt models
net_trt = torch2trt(net, [testset[0][0]], fp16_mode=True, int8_mode=True, max_batch_size=1), 

RuntimeError: Expected 4-dimensional input for 4-dimensional weight 64 3 3 3, but got 3-dimensional input of size [1, 32, 32] instead

In [None]:
int8acc = test(0, net_trt)