In [1]:
import sys
sys.path.append(".") 

In [2]:
from dataset import get_dataset, get_handler
from model import get_net
from training import Training
from torchvision import transforms

import torch
import pickle
import random
import matplotlib.pyplot as plt
import numpy as np

### setting up args

In [3]:
seed = 123
num_images = 10
dataset_name = 'CALTECH'
    
args_pool = {
    'CALTECH':
    {
        'n_epoch': 10,
        'n_classes': 10,
        'fc_only': True,
        'transform':
        {
            'train': transforms.Compose([transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
                                         transforms.RandomRotation(degrees=15),
                                         transforms.ColorJitter(),
                                         transforms.RandomHorizontalFlip(),
                                         transforms.CenterCrop(224),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]),
            'test': transforms.Compose([transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
        },
        'loader_tr_args': {'batch_size': 25, 'num_workers': 1},
        'loader_te_args': {'batch_size': 25, 'num_workers': 1},
        'loader_sample_args': {'batch_size': 25, 'num_workers': 1},
        'optimizer_args': {'lr': 0.001}
    }
}
   
args = args_pool[dataset_name]

np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f3f9a889490>

### original training data

In [4]:
# load dataset
x_train, y_train, x_test, y_test = get_dataset(dataset_name)
print("x_train: ", len(x_train))
print("y_train: ", y_train.shape)
print("x_test: ", len(x_test))
print("y_test: ", y_test.shape)

/datapool/256_ObjectCategoriesSubset/train
== Found 822 items 
== Found 10 classes
['/026.cake', '/057.dolphin-101', '/064.elephant-101', '/086.golden-gate-bridge', '/087.goldfish', '/170.rainbow', '/212.teapot', '/213.teddy-bear', '/239.washing-machine', '/241.waterfall']
['/datapool/256_ObjectCategoriesSubset/train/026.cake/026_0003.jpg'
 '/datapool/256_ObjectCategoriesSubset/train/026.cake/026_0004.jpg'
 '/datapool/256_ObjectCategoriesSubset/train/026.cake/026_0006.jpg'
 '/datapool/256_ObjectCategoriesSubset/train/026.cake/026_0008.jpg']
/datapool/256_ObjectCategoriesSubset/valid
== Found 212 items 
== Found 10 classes
['/026.cake', '/057.dolphin-101', '/064.elephant-101', '/086.golden-gate-bridge', '/087.goldfish', '/170.rainbow', '/212.teapot', '/213.teddy-bear', '/239.washing-machine', '/241.waterfall']
['/datapool/256_ObjectCategoriesSubset/valid/213.teddy-bear/213_0005.jpg'
 '/datapool/256_ObjectCategoriesSubset/valid/213.teddy-bear/213_0007.jpg'
 '/datapool/256_ObjectCategorie

In [None]:
net = get_net(dataset_name)
handler = get_handler(dataset_name)

### training for 10 epochs

In [None]:
training = Training(x_train, y_train, x_test, y_test, net, handler, args)
training.train()

feature extraction
epoch	train_loss	test_loss	train_acc	test_acc
1	2.3141		2.1218		0.237226		0.254717
2	2.0545		1.9216		0.36618		0.382075


In [None]:
training.check_accuracy(x_test, y_test)

### replicating training set 10 times

In [None]:
x_train_replicated = np.concatenate([x_train, x_train, x_train, x_train, x_train, x_train, x_train, x_train, x_train, x_train])
y_train_replicated = torch.cat([y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train])
print("x_train_replicated: ", len(x_train_replicated))
print("y_train_replicated: ", y_train_replicated.shape)

In [None]:
net_replicated = get_net(dataset_name)
handler_replicated = get_handler(dataset_name)

### the performance is way better!

In [None]:
rep_training = Training(x_train_replicated, y_train_replicated, x_test, y_test, net_replicated, handler_replicated, args)
rep_training.train()

In [None]:
rep_training.check_accuracy(x_test, y_test)

In [None]:
### how about using original data but 10*10 epochs?

In [None]:
args_pool = {
    'CALTECH':
    {
        'n_epoch': 10*10,
        'n_classes': 10,
        'fc_only': True,
        'transform':
        {
            'train': transforms.Compose([transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
                                         transforms.RandomRotation(degrees=15),
                                         transforms.ColorJitter(),
                                         transforms.RandomHorizontalFlip(),
                                         transforms.CenterCrop(224),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]),
            'test': transforms.Compose([transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
        },
        'loader_tr_args': {'batch_size': 25, 'num_workers': 1},
        'loader_te_args': {'batch_size': 25, 'num_workers': 1},
        'loader_sample_args': {'batch_size': 25, 'num_workers': 1},
        'optimizer_args': {'lr': 0.001}
    }
}
   
args = args_pool[dataset_name]

In [None]:
net_100epochs = get_net(dataset_name)
handler_100epochs = get_handler(dataset_name)

In [None]:
training_100epochs = Training(x_train, y_train, x_test, y_test, net_100epochs, handler_100epochs, args)
training_100epochs.train()

In [None]:
training_100epochs.check_accuracy(x_test, y_test)