In [0]:
!pip install mxnet-cu100
!pip install torch torchvision

In [2]:
import mxnet as mx
import mxnet.gluon.nn as mxnn
from mxnet import nd
from mxnet import gluon, autograd
from mxnet.gluon.data.vision import transforms as mxT
import time, os, shutil

import torch
import torch.nn as pytorchnn
import math
import torch.nn.functional as F
import torchvision
import torchvision.transforms as pytorchT
from torch import optim as pytorchoptim
from torch.backends import cudnn

cudnn.benchmark = True
ctx = mx.gpu()

print(mx.__version__)
print(torch.__version__)

1.4.1
1.1.0


In [0]:
def mx_conv3x3(out_planes, stride=1):
    " 3x3 convolution with padding "
    return mxnn.Conv2D(out_planes, kernel_size=3, strides=stride, padding=1)


class mxBasicBlock(mxnn.HybridBlock):
    expansion = 1

    def __init__(self, planes, stride=1, downsample=None, **kwargs):
        super(mxBasicBlock, self).__init__(**kwargs)
        self.conv1 = mx_conv3x3(planes, stride)
        self.bn1 = mxnn.BatchNorm()
        self.conv2 = mx_conv3x3(planes)
        self.bn2 = mxnn.BatchNorm()
        self.downsample = downsample
        self.stride = stride

    def hybrid_forward(self, F, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out = F.relu(residual + out)

        return out


class mxResNet_Cifar(mxnn.HybridBlock):

    def __init__(self, block, layers, num_classes=10, **kwargs):
        super(mxResNet_Cifar, self).__init__(**kwargs)

        self.inplanes = 16
        self.conv1 = mxnn.Conv2D(
            16, kernel_size=3, strides=1, padding=1)
        self.bn1 = mxnn.BatchNorm()
        self.layer1 = self._make_layer(block, 16, layers[0])
        self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
        self.avgpool = mxnn.AvgPool2D(8, strides=1)
        self.fc = mxnn.Dense(num_classes)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = mxnn.HybridSequential()
            downsample.add(
                mxnn.Conv2D(planes * block.expansion,
                          kernel_size=1, strides=stride)
            )

        layers = mxnn.HybridSequential()
        layers.add(block(planes, stride, downsample))
        for _ in range(1, blocks):
            layers.add(block(planes))

        return layers

    def hybrid_forward(self, F, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = self.fc(x)

        return x


def mx_resnet20_cifar(**kwargs):
    model = mxResNet_Cifar(mxBasicBlock, [3, 3, 3], **kwargs)
    return model

In [0]:
del mxnet

In [6]:
%%time
mxnet = mx_resnet20_cifar()
mxnet.initialize(ctx = ctx)

CPU times: user 24.6 ms, sys: 9.17 ms, total: 33.8 ms
Wall time: 26.6 ms


In [7]:
x = nd.ones((1000, 3, 32, 32))
x.shape, x.dtype

((1000, 3, 32, 32), numpy.float32)

In [0]:
%%time
#before hybridization
mxnet(x.as_in_context(mx.gpu()))

In [0]:
mxnet.hybridize()

In [0]:
%%time
#after hybridization
mxnet(x.as_in_context(mx.gpu()))

In [0]:
def torch_conv3x3(in_planes, out_planes, stride=1):
    " 3x3 convolution with padding "
    return pytorchnn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1)


class pytorchBasicBlock(pytorchnn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(pytorchBasicBlock, self).__init__()
        self.conv1 = torch_conv3x3(inplanes, planes, stride)
        self.bn1 = pytorchnn.BatchNorm2d(planes)
        self.relu = pytorchnn.ReLU(inplace=True)
        self.conv2 = torch_conv3x3(planes, planes)
        self.bn2 = pytorchnn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class pytorchResNet_Cifar(pytorchnn.Module):

    def __init__(self, block, layers, num_classes=10):
        super(pytorchResNet_Cifar, self).__init__()
        self.inplanes = 16
        self.conv1 = pytorchnn.Conv2d(3, 16, kernel_size=3,
                               stride=1, padding=1)
        self.bn1 = pytorchnn.BatchNorm2d(16)
        self.relu = pytorchnn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, layers[0])
        self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
        self.avgpool = pytorchnn.AvgPool2d(8, stride=1)
        self.fc = pytorchnn.Linear(64 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, pytorchnn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, pytorchnn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = pytorchnn.Sequential(
                pytorchnn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride),
                pytorchnn.BatchNorm2d(planes * block.expansion)
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return pytorchnn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


def pytorch_resnet20_cifar(**kwargs):
    model = pytorchResNet_Cifar(pytorchBasicBlock, [3, 3, 3], **kwargs)
    return model

In [36]:
%%time
pytorch = pytorch_resnet20_cifar()
pytorch.cuda()

CPU times: user 17.6 ms, sys: 1.71 ms, total: 19.3 ms
Wall time: 23.2 ms


In [0]:
x = torch.ones((1000, 3, 32, 32), dtype = torch.float32)

In [0]:
%%time
pytorch(x.cuda())

#Training MXNet model

##Getting Dataset and Data loader 

In [16]:
normalize = mxT.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])

train_transfrom = mxT.Compose([
    mxT.RandomFlipLeftRight(),
    mxT.ToTensor(),
    normalize
])

val_transform = mxT.Compose([
    mxT.ToTensor(),
    normalize
])

mxtrainset = gluon.data.vision.datasets.CIFAR10(
    './data', train=True).transform_first(train_transfrom)
mxtrainloader = gluon.data.DataLoader(
    mxtrainset, batch_size=128, shuffle=True, num_workers=2)

mxtestset = gluon.data.vision.datasets.CIFAR10(
    './data', train=False).transform_first(val_transform)
mxtestloader = gluon.data.DataLoader(
    mxtestset, batch_size=128, shuffle=False, num_workers=2)

Downloading ./data/cifar-10-binary.tar.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/cifar10/cifar-10-binary.tar.gz...


In [18]:
%%time
mxobjective = gluon.loss.SoftmaxCrossEntropyLoss()
mxoptimizer = gluon.Trainer(mxnet.collect_params(), 'adam', {'learning_rate': 0.001})

CPU times: user 2.69 ms, sys: 0 ns, total: 2.69 ms
Wall time: 2.7 ms


In [19]:
%%time
for epoch in range(10):
    for features, labels in mxtrainloader:
        with autograd.record():
            output = mxnet(features.as_in_context(ctx))
            loss = mxobjective(output, labels.as_in_context(ctx))
        loss.backward()
        mxoptimizer.step(features.shape[0])
    print('Epoch:', epoch)

Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
CPU times: user 1min 38s, sys: 29.2 s, total: 2min 7s
Wall time: 1min 44s


In [0]:
metric = mx.metric.Accuracy()
metric.reset()

In [22]:
%%time
for features, labels in mxtestloader:
    output = mxnet(features.as_in_context(ctx))
    metric.update(labels.as_in_context(ctx), output)

CPU times: user 657 ms, sys: 284 ms, total: 942 ms
Wall time: 1.22 s


In [23]:
metric.get()

('accuracy', 0.8023)

#Training Pytorchh Model

In [29]:
normalize = pytorchT.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])

train_transform = pytorchT.Compose([
    pytorchT.RandomHorizontalFlip(),
    pytorchT.ToTensor(),
    normalize
])

val_transform = pytorchT.Compose([
    pytorchT.ToTensor(),
    normalize
])

pytorch_trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=train_transform)
pytorch_trainloader = torch.utils.data.DataLoader(
    pytorch_trainset, batch_size=128, shuffle=True, num_workers=2)

pytorch_testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=val_transform)
pytorch_testloader = torch.utils.data.DataLoader(
    pytorch_testset, batch_size=128, shuffle=False, num_workers=2)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


170500096it [00:06, 27162209.60it/s]                               


Files already downloaded and verified


In [37]:
%%time
pytorch_criterion = pytorchnn.CrossEntropyLoss()
pytorch_optimizer = pytorchoptim.Adam(pytorch.parameters(), 0.001)

CPU times: user 1.2 ms, sys: 43 µs, total: 1.24 ms
Wall time: 1.25 ms


In [38]:
%%time
pytorch.train()
for epoch in range(10):
    for features, labels in pytorch_trainloader:
        output = pytorch(features.cuda())
        loss = pytorch_criterion(output, labels.cuda())
        pytorch_optimizer.zero_grad()
        loss.backward()
        pytorch_optimizer.step()
    print('Epoch:', epoch)

Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
CPU times: user 2min 4s, sys: 6.25 s, total: 2min 10s
Wall time: 2min 32s


In [39]:
%%time
pytorch.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in pytorch_testloader:
        outputs = pytorch(features.cuda())
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum().item()

CPU times: user 579 ms, sys: 238 ms, total: 817 ms
Wall time: 1.79 s


In [40]:
print('Test Accuracy:', correct / total)

Test Accuracy: 0.7896
