https://arxiv.org/pdf/1409.1556.pdf


### Notes
- "MobileNet models were trained [...] using RMSprop with asynchronous gradient descent similar to Inception V3. However, contrary to training large models we use less regularization and data augmentation techniques because small models have less trouble with overfitting." 
- "Additionally, we found that it was important to put very little or no weight decay (l2 regularization) on the depthwise filters since their are so few parameters in them."
    

In [1]:
import os
import re
import sys
import glob
import numpy as np
import matplotlib.pyplot as plt
import unittest
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T

%matplotlib inline
%load_ext autoreload
%autoreload 2

seed = 42
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'
plt.rcParams.update({'font.size': 12})
test = unittest.TestCase()

In [2]:
data_dir = os.path.expanduser('~/.pytorch-datasets')

# resizing only to fit sizes of original paper
transform = T.Compose([
    T.Resize((224,224)),
    T.ToTensor()
])

# ds_train = torchvision.datasets.ImageNet(root=data_dir, download=True, train=True, transform=tvtf.ToTensor())
ds_train = torchvision.datasets.CIFAR10(root=data_dir, download=True, train=True, transform=transform)
ds_test = torchvision.datasets.CIFAR10(root=data_dir, download=True, train=False, transform=transform)

print(f'Train: {len(ds_train)} samples')
print(f'Test: {len(ds_test)} samples')

x0,_ = ds_train[0]
in_size = x0.shape
num_classes = 10
print('input image size =', in_size)

Files already downloaded and verified
Files already downloaded and verified
Train: 50000 samples
Test: 10000 samples
input image size = torch.Size([3, 224, 224])


In [17]:
import utils.training as training
import mobilenet
torch.manual_seed(seed)

# Define a tiny part of the CIFAR-10 dataset to overfit it
batch_size = 256
max_batches = 25
dl_train = torch.utils.data.DataLoader(ds_train, batch_size, shuffle=False)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size, shuffle=False)

# Create model, loss and optimizer instances
model = mobilenet.MobileNetV1(in_size=(3,224,224), out_classes=10)
model2 = mobilenet.MobileNetV2(in_size=(3,224,224), out_classes=10)

In [18]:
# weights initialization
# model = model.apply(vgg.weights_init)
# conv_2_4_5 = [4,11,14]
# for i in conv_2_4_5:
#     model.feature_extractor[i].bias.data.fill_(1)

In [19]:
# print amount of parameters
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{pytorch_total_params:,}") 

pytorch_total_params = sum(p.numel() for p in model2.parameters() if p.requires_grad)
print(f"{pytorch_total_params:,}") 

3,222,218
2,237,994


In [6]:
from utils.train_results import FitResult

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005)

# Use TorchTrainer to run only the training loop a few times.
trainer = training.TorchTrainer(model, loss_fn, optimizer, device)
best_acc = 0
# for i in range(30):
#     res = trainer.train_epoch(dl_train, max_batches=max_batches, verbose=(i%2==0))
#     best_acc = res.accuracy if res.accuracy > best_acc else best_acc
    
    
fit_res = trainer.fit(dl_train, dl_test, num_epochs=30, checkpoints=None, early_stopping=3, max_batches=25)
epo, loss_tr, acc_tr, loss_te, acc_te = fit_res
fit_res = FitResult(epo, [loss.item() for loss in loss_tr], acc_tr, [loss.item() for loss in loss_te], acc_te)

--- EPOCH 1/30 ---
train_batch:   0%|                                                                                                                                                                                                    | 0/25 [01:42<?, ?it/s]


KeyboardInterrupt: 

In [20]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model2.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005)

# Use TorchTrainer to run only the training loop a few times.
trainer = training.TorchTrainer(model2, loss_fn, optimizer, device)
best_acc = 0
    
fit_res = trainer.fit(dl_train, dl_test, num_epochs=30, checkpoints=None, early_stopping=3, max_batches=25)
epo, loss_tr, acc_tr, loss_te, acc_te = fit_res
fit_res = FitResult(epo, [loss.item() for loss in loss_tr], acc_tr, [loss.item() for loss in loss_te], acc_te)

--- EPOCH 1/30 ---
train_batch:   0%|                                                                                                                                                                                                    | 0/25 [00:00<?, ?it/s]

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Noam\Desktop\AI\noam-ai\deep learning\vision\playground\utils\training.py", line 180, in _foreach_batch
    batch_res = forward_fn(data)
  File "C:\Users\Noam\Desktop\AI\noam-ai\deep learning\vision\playground\utils\training.py", line 252, in train_batch
    y_out = self.model.forward(X)
  File "C:\Users\Noam\Desktop\AI\noam-ai\deep learning\vision\playground\mobilenet.py", line 218, in forward
    features = self.feature_extractor(x)
  File "C:\Users\Noam\Anaconda3\envs\noam-ai\lib\site-packages\torch\nn\modules\module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\Noam\Anaconda3\envs\noam-ai\lib\site-packages\torch\nn\modules\container.py", line 92, in forward
    input = module(input)
  File "C:\Users\Noam\Anaconda3\envs\noam-ai\lib\site-packages\torch\nn\modules\module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\Noam\Desktop\AI\noam-ai\deep learni

TypeError: object of type 'NoneType' has no len()