In [1]:
import os
from time import time

import numpy as np

from sklearn.linear_model import RidgeClassifier

import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from tlopu.model_utils import pick_model
from tlopu.features import fast_conv_features, decoding, get_random_features
from tlopu.dataset import Animals10

In [2]:
def get_mean_std(train_loader):
    mean, std = torch.zeros(3), torch.zeros(3)


    for batch_id, (image, target) in enumerate(train_loader):
        mean += torch.mean(image, dim=(0,2,3))
        std += torch.std(image, dim=(0,2,3))

    mean = mean / len(train_loader)
    std = std / len(train_loader)
    
    return mean, std

In [3]:
def get_loaders(dataset_path, batch_size=32, num_workers=12, mean=None, std=None):
    """
    Function to load the train/test loaders.

    Parameters
    ----------
    dataset_path: str, dataset path.

    batch_size: int, batch size.
    num_workers: int, number of workers.
    mean:None or torch.Tensor, mean per channel
    std:None or torch.Tensor, std per channel

    Returns
    -------
    train_loader: Pytorch dataloader, dataloader for the train set.
    test_loader: Pytorch dataloader, dataloader for the test set.
    """

    transform_list = [transforms.Resize((224, 224)), transforms.ToTensor()]
    if mean is not None:
        transform_list.append(transforms.Normalize(mean=mean, std=std))
    data_transform = transforms.Compose(transform_list)

    dataset_path = os.path.join(dataset_path, "animals10/raw-img/")

    train_dataset = Animals10(dataset_path, test_ratio=20, mode="train", transform=data_transform)
    test_dataset = Animals10(dataset_path, test_ratio=20, mode="test", transform=data_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

    return train_loader, test_loader

In [4]:
batch_size = 32
num_workers = 12
dataset_path = "/data/home/luca/datasets/"

train_loader, test_loader = get_loaders(dataset_path, batch_size=batch_size, num_workers=num_workers)
print("Computing dataset mean...")
mean, std = get_mean_std(train_loader)
train_loader, test_loader = get_loaders(dataset_path, batch_size=batch_size, num_workers=num_workers,
                                        mean=mean, std=std)

Computing dataset mean...


In [5]:
model_name, model_options = "densenet169", "full"
device = "cuda:0"
encode_type = "positive"
model_dtype = "float32"

model, output_size = pick_model(model_name=model_name, model_options=model_options, device=device, dtype=model_dtype)

densenet169 model loaded successfully.


In [6]:
enc_train_features, train_labels, train_conv_time, train_encode_time = fast_conv_features(train_loader, model,
                                                                                          output_size,
                                                                                          device=device,
                                                                                          encode_type=encode_type,
                                                                                          dtype=model_dtype)
print("{0} - train conv features time = {1:3.2f} s\tencoding = {2:1.5f} s\tshape = {3}"
      .format(model_name, train_conv_time, train_encode_time, enc_train_features.shape))

enc_test_features, test_labels, test_conv_time, test_encode_time = fast_conv_features(test_loader, model,
                                                                                      output_size, device=device,
                                                                                      encode_type=encode_type,
                                                                                      dtype=model_dtype)
print("{0} - test conv features time  = {1:3.2f} s\tencoding = {2:1.5f} s\tshape = {3}"
      .format(model_name, test_conv_time, test_encode_time, enc_test_features.shape))

densenet169 - train conv features time = 44.78 s	encoding = 0.01891 s	shape = (19363, 81536)
densenet169 - test conv features time  = 11.96 s	encoding = 0.00421 s	shape = (4846, 81536)


In [7]:
n_components = enc_train_features.shape[0]//2
train_proj_time, train_random_features = get_random_features(enc_train_features, n_components)
test_proj_time, test_random_features = get_random_features(enc_test_features, n_components)

train_decode_time, dec_train_random_features = decoding(train_random_features, decode_type=None)
test_decode_time, dec_test_random_features = decoding(test_random_features, decode_type=None)

print("Train projection time = {0:3.2f} s\tTrain decode time = {1:3.2f} s".format(train_proj_time, train_decode_time))
print("Test projection time = {0:3.2f} s\tTest decode time = {1:3.2f} s".format(test_proj_time, test_decode_time))

Train projection time = 13.08 s	Train decode time = 0.10 s
Test projection time = 3.46 s	Test decode time = 0.02 s


In [8]:
alpha_exp_min, alpha_exp_max, alpha_space = 6, 7, 4

alpha_mant = np.linspace(1, 9, alpha_space)
alphas = np.concatenate([alpha_mant * 10 ** i for i in range(alpha_exp_min, alpha_exp_max + 1)])

for alpha in alphas:
    clf = RidgeClassifier(alpha=alpha)
    since = time()
    clf.fit(dec_train_random_features, train_labels)
    fit_time = time() - since
    
    
    train_accuracy = clf.score(dec_train_random_features, train_labels) * 100
    test_accuracy = clf.score(dec_test_random_features, test_labels) * 100
    
    print("alpha = {0:.2e}\tTrain acc = {1:2.2f}\tTest acc = {2:2.2f}".format(alpha, train_accuracy, test_accuracy))

alpha = 1.00e+06	Train acc = 99.79	Test acc = 94.70
alpha = 9.00e+06	Train acc = 99.01	Test acc = 95.01
alpha = 1.00e+07	Train acc = 98.94	Test acc = 95.07
alpha = 9.00e+07	Train acc = 96.48	Test acc = 93.81
alpha = 1.00e+08	Train acc = 96.25	Test acc = 93.75
alpha = 9.00e+08	Train acc = 88.42	Test acc = 85.82
