In [1]:
import sys
sys.path.append(".") 

In [2]:
from dataset import get_dataset, get_handler
from model import get_net
from training import Training
from torchvision import transforms

import torch
import pickle
import random
import matplotlib.pyplot as plt
import numpy as np

### setting up args

In [3]:
seed = 123
num_images = 10
dataset_name = 'CALTECH'
    
args_pool = {
    'CALTECH':
    {
        'n_epoch': 10,
        'n_classes': 10,
        'fc_only': True,
        'transform':
        {
            'train': transforms.Compose([transforms.CenterCrop(224),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]),
            'test': transforms.Compose([transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
        },
        'loader_tr_args': {'batch_size': 25, 'num_workers': 1},
        'loader_te_args': {'batch_size': 25, 'num_workers': 1},
        'loader_sample_args': {'batch_size': 25, 'num_workers': 1},
        'optimizer_args': {'lr': 0.001}
    }
}
   
args = args_pool[dataset_name]

np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f7a14124470>

### original training data

In [4]:
# load dataset
x_train, y_train, x_test, y_test = get_dataset(dataset_name)
print("x_train: ", len(x_train))
print("y_train: ", y_train.shape)
print("x_test: ", len(x_test))
print("y_test: ", y_test.shape)

['/026.cake', '/057.dolphin-101', '/064.elephant-101', '/086.golden-gate-bridge', '/087.goldfish', '/170.rainbow', '/212.teapot', '/213.teddy-bear', '/239.washing-machine', '/241.waterfall']
['/home/nisha/data/256_ObjectCategoriesSubset/train/170.rainbow/170_0003.jpg'
 '/home/nisha/data/256_ObjectCategoriesSubset/train/170.rainbow/170_0005.jpg'
 '/home/nisha/data/256_ObjectCategoriesSubset/train/170.rainbow/170_0007.jpg'
 '/home/nisha/data/256_ObjectCategoriesSubset/train/170.rainbow/170_0012.jpg']
['/026.cake', '/057.dolphin-101', '/064.elephant-101', '/086.golden-gate-bridge', '/087.goldfish', '/170.rainbow', '/212.teapot', '/213.teddy-bear', '/239.washing-machine', '/241.waterfall']
['/home/nisha/data/256_ObjectCategoriesSubset/valid/241.waterfall/241_0015.jpg'
 '/home/nisha/data/256_ObjectCategoriesSubset/valid/241.waterfall/241_0020.jpg'
 '/home/nisha/data/256_ObjectCategoriesSubset/valid/241.waterfall/241_0023.jpg'
 '/home/nisha/data/256_ObjectCategoriesSubset/valid/241.waterfall

In [5]:
net = get_net(dataset_name)
handler = get_handler(dataset_name)

### training for 10 epochs

In [6]:
training = Training(x_train, y_train, x_test, y_test, net, handler, args)
training.train()

feature extraction
epoch	train_loss	test_loss	train_acc	test_acc
1	2.3224		2.1863		0.224771		0.238095
2	2.1032		1.9929		0.334862		0.309524
3	1.9386		1.8545		0.434251		0.446429
4	1.795		1.704		0.555046		0.535714
5	1.6807		1.5911		0.617737		0.589286
6	1.5571		1.4856		0.685015		0.660714
7	1.4336		1.3761		0.752294		0.708333
8	1.3627		1.2896		0.799694		0.767857
9	1.2709		1.2043		0.827217		0.77381
10	1.176		1.1567		0.847095		0.767857


In [7]:
training.check_accuracy(x_test, y_test)

0.7678571428571429

### replicating training set 10 times

In [8]:
x_train_replicated = np.concatenate([x_train, x_train, x_train, x_train, x_train, x_train, x_train, x_train, x_train, x_train])
y_train_replicated = torch.cat([y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train])
print("x_train_replicated: ", len(x_train_replicated))
print("y_train_replicated: ", y_train_replicated.shape)

x_train_replicated:  6540
y_train_replicated:  torch.Size([6540])


In [9]:
net_replicated = get_net(dataset_name)
handler_replicated = get_handler(dataset_name)

### the performance is way better!

In [None]:
rep_training = Training(x_train_replicated, y_train_replicated, x_test, y_test, net_replicated, handler_replicated, args)
rep_training.train()

feature extraction
epoch	train_loss	test_loss	train_acc	test_acc
1	1.6335		1.1347		0.853211		0.821429
2	0.9167		0.73		0.940367		0.892857
3	0.6355		0.558		0.954128		0.928571


In [None]:
rep_training.check_accuracy(x_test, y_test)

### how about using original data but 10*10 epochs?

In [None]:
args_pool = {
    'CALTECH':
    {
        'n_epoch': 10*10,
        'n_classes': 10,
        'fc_only': True,
        'transform':
        {
            'train': transforms.Compose([transforms.CenterCrop(224),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]),
            'test': transforms.Compose([transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
        },
        'loader_tr_args': {'batch_size': 25, 'num_workers': 1},
        'loader_te_args': {'batch_size': 25, 'num_workers': 1},
        'loader_sample_args': {'batch_size': 25, 'num_workers': 1},
        'optimizer_args': {'lr': 0.001}
    }
}
   
args = args_pool[dataset_name]

In [None]:
net_100epochs = get_net(dataset_name)
handler_100epochs = get_handler(dataset_name)

In [None]:
training_100epochs = Training(x_train, y_train, x_test, y_test, net_100epochs, handler_100epochs, args)
training_100epochs.train()

In [None]:
training_100epochs.check_accuracy(x_test, y_test)