In [None]:
import random
import functools
import itertools
import operator
import logging
from collections import defaultdict
import gc

from IPython import display
with __import__('importnb').Notebook():
    # github.com/deathbeds/importnb
    import eth80
    from feature_extraction import TuckerFeatureExtractor, calc_error

import numpy as np
import scipy.linalg as la
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import LabelEncoder

import tensorly as tl
from tensorly.decomposition import partial_tucker
import tensorly.tenalg as ta

import torch
import torch.nn as nn
import torch.nn.functional as tnnf
from torch.utils.data import TensorDataset
from ignite.engine import (
    Events, Engine, create_supervised_trainer, create_supervised_evaluator
)
from ignite.metrics import Accuracy

from libcrap import save_json, load_json

In [None]:
tl.set_backend("pytorch")
device = torch.device("cuda:0")
torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [None]:
dataset, y, label_encoder, object_classes = eth80.load_eth80(
    "/mnt/hdd_1tb/smiles_backup/Documents/datasets/eth80/eth80-cropped-close128/",
    use_torch=True
)

In [None]:
# def stratified_split(num_test_per_class):
#     obj_indices_sorted_by_class = torch.argsort(y)
#     test_objects = set()
#     for label in range(eth80.NUM_CLASSES):
#         obj_indices_in_class = random.choices(
#             range(eth80.NUM_OBJECTS_PER_CLASS), k=num_test_per_class
#         )
#         new_test_objects = obj_indices_sorted_by_class[[
#             label*10 + ind_in_class for ind_in_class in obj_indices_in_class
#         ]]
#         #import pdb; pdb.set_trace()
#         test_objects.update(x.item() for x in new_test_objects)
#     train_objects = sorted(frozenset(range(eth80.NUM_OBJECTS)) - test_objects)
#     test_objects = sorted(test_objects)
#     X_train = dataset[train_objects]
#     y_train = y[train_objects]
#     X_test = dataset[test_objects]
#     y_test = y[test_objects]
#     return X_train, y_train, X_test, y_test

In [None]:
class MPRLowRankCP(nn.Module):
    def __init__(self, num_inputs, num_outputs, order, rank, bias=True):
        assert order >= 1
        assert rank >= 1
        super().__init__()
        self.order = order
        self.rank = rank
        self.bias = bias
        self.num_outputs = num_outputs
        self.actual_num_inputs = num_inputs + bias
        self.factors = nn.ParameterList(
            nn.Parameter(
                torch.ones(self.num_outputs, self.actual_num_inputs, self.rank),
                requires_grad=True
            )
            for i in range(self.order)
        )
        self.reset_parameters()
    
    def _actual_num_features(self):
        return self.num_features + bias

    def reset_parameters(self):
        for i in range(len(self.factors)):
            self.factors[i].data = torch.randn_like(self.factors[i].data)
    
    def forward(self, X):
        assert len(X.shape) == 2
        num_samples = X.shape[0]
        if self.bias:
            X = torch.cat([X, torch.ones(num_samples, 1)], dim=1)
        thingies = [
            # n - num of sample, f - num o feature
            # r - num of rank one component, o - num of output
            torch.einsum("nf,ofr->nor", X, factor)
            for factor in self.factors
        ]
        return functools.reduce(operator.mul, thingies).sum(dim=2)

In [None]:
def generate_configuration():
    return {
        "extracted_features_shape": (
            random.randint(1, eth80.NUM_ANGLES // 2), # angles mode
            random.randint(1, 3), # channels mode
            random.randint(1, eth80.IMAGE_HEIGHT // 4),
            random.randint(1, eth80.IMAGE_WIDTH // 4)
        ),
        "learning_rate": 10**random.randint(-13, -1),
        "regularization_coefficient": 10**random.randint(-8, 8),
        "polynom_order": random.randint(2, 3),
        "weights_rank": random.randint(1, 100)
    }

It seems the best learning rate is around 1e-9, 1e-10. Anything greater than 1e-9 produces NaNs :(. Anything smaller is too slow.

Aaaand I don't know the best other hyperparameters because it turns out too few of my experiments were with suitable learning rate.

extracted features dimensions [18, 3, 29, 10], lr 1.000000e-09, polynomial order 3, regularization coefficient 1.000000e-03, weights_rank 80 - this gave me the best test accuracy so far.

In [None]:
def train_test(
    configuration, num_epochs, eval_every_num_epochs=10, plot_every_num_epochs=50
):
    dataset_train, y_train, dataset_test, y_test = eth80.stratified_split_torch(
        dataset, y, 2
    )
    
    extractor = TuckerFeatureExtractor(
        dataset.shape[1:],
        configuration["extracted_features_shape"]
    )
    core_train = extractor.fit_transform(dataset_train)
    core_test = extractor.transform(dataset_test)
    
    X_train = core_train.reshape(core_train.shape[0], -1)
    X_test = core_test.reshape(core_test.shape[0], -1)
    assert X_train.shape[1] == X_test.shape[1]
    num_extracted_features = X_train.shape[1]
    
    model = MPRLowRankCP(
        num_extracted_features, eth80.NUM_CLASSES,
        order=configuration["polynom_order"],
        rank=configuration["weights_rank"],
        bias=True
    )
    
    optimizer = torch.optim.SGD(
        model.parameters(), lr=configuration["learning_rate"],
        weight_decay=configuration["regularization_coefficient"]
    )
    
    trainer = create_supervised_trainer(
        model=model, optimizer=optimizer,
        loss_fn=tnnf.cross_entropy
    )

    # update plot and save diagnostic information
    train_epochs = []
    train_losses = []
    train_accuracies = []
    test_epochs = []
    test_losses = []
    test_accuracies = []
    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_on_test(engine):
        if engine.state.epoch % eval_every_num_epochs == 0:
            model.train(False)
            test_logits = model(X_test)
            test_loss = tnnf.cross_entropy(test_logits, y_test).item()
            test_predictions = test_logits.argmax(dim=1)
            test_accuracy = (y_test == test_predictions).sum().item() / len(y_test)
            
            test_epochs.append(engine.state.epoch)
            test_losses.append(test_loss)
            test_accuracies.append(test_accuracy)
    
    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_on_train(engine):
        if engine.state.epoch % eval_every_num_epochs == 0:
            model.train(False)
            train_logits = model(X_train)
            train_loss = tnnf.cross_entropy(train_logits, y_train).item()
            train_predictions = train_logits.argmax(dim=1)
            train_accuracy = (y_train == train_predictions).sum().item() / len(y_train)
            
            train_epochs.append(engine.state.epoch)
            train_losses.append(train_loss)
            train_accuracies.append(train_accuracy)
    
    @trainer.on(Events.EPOCH_COMPLETED)
    def update_plot(engine):
        if engine.state.epoch % plot_every_num_epochs == 0:
            display.clear_output(wait=True)
            fig, axes = plt.subplots(ncols=2, figsize=(14, 5))
            axes = axes.flatten()
            axes[0].set_title("Loss")
            axes[0].plot(train_epochs, train_losses, label="train loss")
            axes[0].plot(test_epochs, test_losses, label="test loss")
            axes[0].legend()
            axes[1].set_title(f"Accuracy. Test: {test_accuracies[-1]}")
            axes[1].plot(train_epochs, train_accuracies, label="train accuracy")
            axes[1].plot(test_epochs, test_accuracies, label="test accuracy")
            axes[1].legend()
            plt.show()
    
#     @trainer.on(Events.EPOCH_COMPLETED)
#     def free_memory(engine):
#         gc.collect()
        
    trainer.run([(X_train, y_train)], max_epochs=num_epochs)
    return test_accuracies[-1]

In [None]:
configuration = {
    'extracted_features_shape': [18, 3, 23, 18],
    'regularization_coefficient': 1e-1,
    "learning_rate": 1e-6,
    'polynom_order': 2,
    'weights_rank': 10
}
foo = torch.tensor([
    train_test(configuration, num_epochs=1001, plot_every_num_epochs=200)
    for i in range(1)
])
print(foo.mean())
print(foo.std())

In [None]:
def memreport():
    print(f"""
    {torch.cuda.memory_allocated()/1024/1024/1024} Gb allocated
    {torch.cuda.memory_cached()/1024/1024/1024} Gb cached
    """)
    for obj in gc.get_objects():
        if torch.is_tensor(obj):
            print(type(obj), obj.size())

In [None]:
#gc.collect()

In [None]:
#memreport()