# A Gentle Introduction to Transfer Learning

In [11]:
import sys
import os
import numpy as np
from collections import Counter
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torchvision import datasets, transforms
from utils import get_gpu_name, get_number_processors

print("OS: ", sys.platform)
print("Python: ", sys.version)
print("PyTorch: ", torch.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())

%load_ext autoreload
%autoreload 2

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
PyTorch:  0.2.0_3
Numpy:  1.13.3
GPU:  ['Tesla M60', 'Tesla M60', 'Tesla M60', 'Tesla M60']
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
torch.backends.cudnn.benchmark=True # enables cudnn's auto-tuner

In [None]:
BATCH_SIZE=64
SETS = ['train', 'val']

## Model and utilities

In [3]:
def finetune():
    pass

In [None]:
def freeze_and_train():
    pass

## Datasets
We are going to use the dataset [hymenoptera](https://download.pytorch.org/tutorial/hymenoptera_data.zip)

In [9]:
DATA_ROOT = 'data'
HYMENOPTERA_ROOT = os.path.join(DATA_ROOT, 'hymenoptera_data')

In [24]:
def create_dataset(data_dir, batch_size=32, sets=['train', 'val'], verbose=True):
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Scale(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in sets}
    dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=get_number_processors) for x in sets}

    if verbose:
        dataset_sizes = {x: len(image_datasets[x]) for x in sets}
        class_names = image_datasets[sets[0]].classes
        print("There are {} clases in the dataset: {}".format(len(class_names), class_names))
        print("Sets sizes: ", dataset_sizes)
        for x in sets:   
            c = Counter(item[1] for item in image_datasets[x])
            c = dict(c)
            print("Number of items in set {}: {}".format(x, c))
    return dataloaders

In [25]:
dataloaders = create_dataset(HYMENOPTERA_ROOT)

There are 2 clases in the dataset: ['ants', 'bees']
Sets sizes:  {'train': 244, 'val': 153}
Number of items in set train: {0: 123, 1: 121}
Number of items in set val: {0: 70, 1: 83}
