# Classifying Caltech-101 with fixed pre-trained features

### 1. Preparing Caltech-101

* Download Caltech-101 data

In [None]:
%%bash
mkdir -p ./Caltech101  # dataset dir
axel -n 5 http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz  -o ./Caltech101/101_ObjectCategories.tar.gz
tar xzf ./Caltech101/101_ObjectCategories.tar.gz  -C ./Caltech101/

* Scan the dataset and setup training and testing splits

In [None]:
import os, random

# fix the seed to have same training and testing samples
random.seed(20) 

# 30 training samples per class and at most 20 testing samples per class
training_sample=30

dataset_dir = '{}/Caltech101/101_ObjectCategories'.format(os.getcwd())
labels = os.listdir(dataset_dir)
train_x, train_y, test_x, test_y = [], [], [], []

total = 0
for c, category in enumerate(labels):
    files = os.listdir('{}/{}'.format(dataset_dir, category))
    total += len(files)
    random.shuffle(files)
    for img in files[:training_sample]:
        train_x.append('{}/{}/{}'.format(dataset_dir, category, img))
    train_y += [c for _ in range(len(files[:training_sample]))]
    for img in files[training_sample: training_sample + 20]: 
        test_x.append('{}/{}/{}'.format(dataset_dir, category, img))
    test_y += [c for _ in range(len(files[training_sample: training_sample + 20]))]

print('Total images: {}'.format(total))
print('Train images: {}: {}'.format(len(train_x), len(train_y)))
print('Validation images: {}: {}'.format(len(test_x), len(test_y)))

# random.shuffle(train_list) # Be sure to shuffle training images (otherwise fine-tuning will fail)

### 2. Setup pre-trained network

* Just the same as in the previous exercise

In [2]:
import chainer
from scipy.misc import imresize


# aspect ratio is kept after resizing
def resize_image(img, minimum_length=256):
        y, x = img.shape[:2]
        # keep aspect ratio
        if y <= x:
            scale = float(minimum_length) / y
            sizes = (minimum_length, int(scale * x))
        else:
            scale = float(minimum_length) / x
            sizes = (int(scale * y), minimum_length)
        # If grey picture
        if img.ndim == 2:
            img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
        return imresize(img, sizes, interp='bilinear', mode='RGB')

def crop_center(img, sizes=(224, 224)):
        y, x, channel = img.shape
        center_y, center_x = int(y / 2), int(x / 2)
        frame_y, frame_x = sizes
        up, down = -int((frame_y + 1) / 2), int(frame_y / 2)
        left, right = -int((frame_x + 1) / 2), int(frame_x / 2)
        start_h, end_h = max(center_y + up, 0), min(center_y + down, y)
        start_w, end_w = max(center_x + left, 0), min(center_x + right, x)
        return img[start_h:end_h, start_w:end_w]

# define ResNet and load weights
model = chainer.links.model.vision.resnet.ResNetLayers('/root/userspace/readonly/chapter1/resnet_50.caffemodel',  50)

### 3. Extract features 

In [None]:
from skimage import io
import numpy as np
from tqdm import tqdm
# Batch size
batch = 10

# Send to cpu
model.to_gpu()

# Prepare
features = {'train': np.empty((len(train_x), 2048)),'validation': np.empty((len(test_x), 2048))}
crits = {'train': train_y,'validation': test_y}
imgs = []

# Extract features from training dataset
for i in tqdm(range(0, len(train_x), batch), desc='Extracting features (train)'):
    imgs = []
    for path in train_x[i: i + batch]:
        imgs.append(crop_center(resize_image(io.imread(path), 256)))
    x = np.array(imgs, dtype=np.float32)
    with chainer.using_config('train', False):
        y = model.extract(x,  layers=['pool5'])
    y['pool5'].to_cpu()
    features['train'][i:i + y['pool5'].data.shape[0]] = y['pool5'].data

# Extract features from validation dataset
for i in tqdm(range(0, len(test_x), batch), desc='Extracting features (validation)'):
    imgs = []
    for path in test_x[i: i + batch]:
        imgs.append(crop_center(resize_image(io.imread(path), 256)))
    x = np.array(imgs, dtype=np.float32)
    with chainer.using_config('train', False):
        y = model.extract(x,  layers=['pool5'])
    y['pool5'].to_cpu()
    features['validation'][i:i + y['pool5'].data.shape[0]] = y['pool5'].data

print('Done extracting features!')

## 4. Run SVM

In [None]:
import numpy as np
from sklearn import svm

print('The number of training dataset: {}'.format(features['train'].shape[0]))
print('The number of validation dataset: {}'.format(features['validation'].shape[0]))

# Train a linear SVM classifier
clf = svm.LinearSVC(C=1)
clf.fit(features['train'], crits['train'])
yPredTrain = clf.predict(features['train'])
yPredTest = clf.predict(features['validation'])

print('Training score: {}'.format(np.sum(np.equal(yPredTrain, crits['train'])) / features['train'].shape[0]))
print('Validation score: {}'.format(np.sum(np.equal(yPredTest, crits['validation'])) / features['validation'].shape[0]))

## 5. Exercise
* Change the layer from which features are extracted and see how it affects the performance.
* Change the pre-trained network to the VGG-16 model.