# MobileNet

**Load pretrained mobilenet model on Tiny Imagenet dataset**



## Pretrain Model

### Get pretrain model

In [1]:
import os
import time
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from utils_pretrain import *

# Define your data path here
IMAGENET_PATH = '/home/yang/dataset/imagenet/tiny-imagenet-200'

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        def conv_bn(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True)
            )

        def conv_dw(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                nn.BatchNorm2d(inp),
                nn.ReLU(inplace=True),
    
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True),
            )

        self.model = nn.Sequential(
            conv_bn(  3,  32, 2), 
            conv_dw( 32,  64, 1),
            conv_dw( 64, 128, 2),
            conv_dw(128, 128, 1),
            conv_dw(128, 256, 2),
            conv_dw(256, 256, 1),
            conv_dw(256, 512, 2),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 1024, 2),
            conv_dw(1024, 1024, 1),
            nn.AvgPool2d(7),
        )
        self.fc = nn.Linear(1024, 1000)

    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc(x)
        return x

## Create a moblienet object

In [3]:
mobilenet_model1 = Net()
mobilenet_model2 = Net()
mobilenet_model3 = Net()

## Load model by params

There are two ways to Load model by params.

### Method 1

First, transform the model type to nn.DataParallel and load params

In [4]:
# transform the model to DataParallel
mobilenet_model1 = torch.nn.DataParallel(mobilenet_model1).cuda()

# load params into a variable
params = torch.load('mobilenet_params.pth.tar')['state_dict']

# load params to model
mobilenet_model1.load_state_dict(params)

### Method 2

Remove the model prefix in paramters, then load the params

Reference: 

https://discuss.pytorch.org/t/solved-keyerror-unexpected-key-module-encoder-embedding-weight-in-state-dict/1686/4?u=qiaoyang_luo

In [None]:
# original saved file with DataParallel
state_dict = torch.load('mobilenet_params.pth.tar')

# create new OrderedDict that does not contain `module.`
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] # remove `module.`
    new_state_dict[name] = v

# load params
mobilenet_model2.load_state_dict(new_state_dict)

## Load the whole model

In [None]:
mobilenet_model3 = torch.load('mobilenet_model.pth.tar')

## Validate

**Validate model without trainning**


### Loss function

In [5]:
criterion = nn.CrossEntropyLoss().cuda()

### Preprocess val dataset and set hyper params

In [6]:
batch_size = 10
workers = 4
epochs = 1
print_freq = 100

valdir = os.path.join(IMAGENET_PATH, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=batch_size, shuffle=False,
    num_workers=workers, pin_memory=True)


### validate function

In [7]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return top1.avg

### Validate 

In [8]:
best_prec1 = 0

for epoch in range(0, epochs):
        # evaluate on validation set
        prec1 = validate(val_loader, mobilenet_model1, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

Test: [0/1000]	Time 0.804 (0.804)	Loss 8.0602 (8.0602)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
Test: [100/1000]	Time 0.015 (0.023)	Loss 8.3371 (9.0941)	Prec@1 10.000 (1.782)	Prec@5 10.000 (5.545)
Test: [200/1000]	Time 0.015 (0.019)	Loss 11.1474 (9.1375)	Prec@1 0.000 (1.791)	Prec@5 0.000 (5.274)
Test: [300/1000]	Time 0.015 (0.018)	Loss 8.9534 (9.1365)	Prec@1 0.000 (1.761)	Prec@5 0.000 (5.482)
Test: [400/1000]	Time 0.015 (0.017)	Loss 9.0318 (9.1166)	Prec@1 0.000 (1.870)	Prec@5 10.000 (5.586)
Test: [500/1000]	Time 0.015 (0.017)	Loss 8.2050 (9.1135)	Prec@1 0.000 (1.856)	Prec@5 10.000 (5.569)
Test: [600/1000]	Time 0.015 (0.016)	Loss 8.2810 (9.0646)	Prec@1 0.000 (1.847)	Prec@5 20.000 (5.707)
Test: [700/1000]	Time 0.015 (0.016)	Loss 8.3021 (9.0373)	Prec@1 0.000 (2.026)	Prec@5 0.000 (5.863)
Test: [800/1000]	Time 0.018 (0.016)	Loss 9.6433 (9.0575)	Prec@1 0.000 (1.998)	Prec@5 0.000 (5.693)
Test: [900/1000]	Time 0.015 (0.016)	Loss 10.4045 (9.0789)	Prec@1 0.000 (1.964)	Prec@5 0.000 (5.694)
 * Pr