<a href="https://colab.research.google.com/github/palver7/deeplearning/blob/master/FashionMNIST_with_VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%matplotlib inline

In [0]:
from torchvision.datasets import FashionMNIST
import torch.optim as optim
from torch.optim import lr_scheduler
import torch
import torch.nn as nn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
from torch.utils.data import random_split,DataLoader
import os
import copy

plt.ion()

In [0]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(224),
        transforms.RandomHorizontalFlip(),
        transforms.Grayscale(3), 
        transforms.ToTensor(), 
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'test': transforms.Compose([
        transforms.Resize(224),
        transforms.Grayscale(3),
        transforms.ToTensor(), 
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
}

trainset = FashionMNIST(root='./data', train=True, 
                        download=True, transform=data_transforms['train'])

trainset, valset = random_split(trainset, (50000,10000))

trainloader = DataLoader(trainset, batch_size=64, shuffle=True, 
                                          num_workers=4)

testset = FashionMNIST(root='./data', train=False, 
                       download=True,transform=data_transforms['test'])

testloader = DataLoader(testset, batch_size=64, shuffle=False, 
                                         num_workers=4)

valloader = DataLoader(valset, batch_size=64, shuffle=False, 
                                         num_workers=4)

class_names = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
           'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')


image_datasets = {'train': trainset, 'val': valset}
dataloaders = {'train': trainloader, 'val': valloader}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [0]:
# lists for plotting loss and accuracy
val_acc = []
val_loss = []
train_acc = []
train_loss = []

In [0]:
def train_model(model, criterion, optimizer, scheduler, val_acc, 
                val_loss, train_acc, train_loss, 
                num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    list = {'train': {'acc': train_acc, 'loss': train_loss}, 
        'val':{'acc': val_acc, 'loss': val_loss}}

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
        
            running_loss = 0.0
            running_corrects = 0
        
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
        
                # zero the parameter gradients
                optimizer.zero_grad()
        
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
        
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
        
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            list[phase]['loss'].append(epoch_loss)
            list[phase]['acc'].append(epoch_acc.item())
        
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
        
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
        
        print()
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
        
    # load best model weights
    model.load_state_dict(best_model_wts)
        
    return model

In [0]:
model = torchvision.models.vgg16(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs, 10)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer = optim.SGD(model.classifier[6].parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [76]:
model = train_model(model, criterion, optimizer, scheduler, val_acc, val_loss, 
                    train_acc, train_loss, num_epochs=1)

Epoch 0/0
----------
train Loss: 0.6885 Acc: 0.7598
val Loss: 0.4896 Acc: 0.8285

Training complete in 5m 11s
Best val Acc: 0.828500


Comparing training and validation accuracy and the training time between original normalization, 3 different values for each channel, and the same normalization values for 3 channels. 

###the first results are:

Epoch 0/4
----------
train Loss: 0.9629 Acc: 0.6750
val Loss: 0.5418 Acc: 0.8126

Epoch 1/4
----------
train Loss: 0.8339 Acc: 0.7141
val Loss: 0.5160 Acc: 0.8164

Epoch 2/4
----------
train Loss: 0.8155 Acc: 0.7226
val Loss: 0.5120 Acc: 0.8215

Epoch 3/4
----------
train Loss: 0.8101 Acc: 0.7248
val Loss: 0.4970 Acc: 0.8263

Epoch 4/4
----------
train Loss: 0.8097 Acc: 0.7244
val Loss: 0.5103 Acc: 0.8220

Training complete in 30m 45s
Best val Acc: 0.826300 

###the second results are:

Epoch 0/4
----------
train Loss: 0.9374 Acc: 0.6826
val Loss: 0.5086 Acc: 0.8197

Epoch 1/4
----------
train Loss: 0.8178 Acc: 0.7212
val Loss: 0.4702 Acc: 0.8323

Epoch 2/4
----------
train Loss: 0.7922 Acc: 0.7286
val Loss: 0.4886 Acc: 0.8323

Epoch 3/4
----------
train Loss: 0.7769 Acc: 0.7355
val Loss: 0.4691 Acc: 0.8311

Epoch 4/4
----------
train Loss: 0.7859 Acc: 0.7318
val Loss: 0.4946 Acc: 0.8280

Training complete in 21m 9s
Best val Acc: 0.832300

It turns out that the validation and training accuracy increases for a bit and the training time completion is faster by 9 mins.

what happens if I increase the number of samples in a batch from 4 to 64 like in the original model?

Epoch 0/1
----------
train Loss: 1.1336 Acc: 0.7011
val Loss: 0.7553 Acc: 0.7829

Epoch 1/1
----------
train Loss: 0.7099 Acc: 0.7825
val Loss: 0.6215 Acc: 0.8052

Training complete in 4m 2s
Best val Acc: 0.805200

the loss increased and the accuracy decreased. however the training time becomes faster; previously with 2 epochs and 4 batches the training was completed in 8 mins, when the batch is increased to 64, the training was completed in 4 mins.