In [None]:
import random
import functools
import itertools
import operator
import logging
from collections import defaultdict
import gc

from IPython import display
with __import__('importnb').Notebook():
    # github.com/deathbeds/importnb
    import eth80
    from eth80 import Eth80Dataset
    from feature_extraction import TuckerFeatureExtractor

import numpy as np
import scipy.linalg as la
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import LabelEncoder

import tensorly as tl
from tensorly.decomposition import partial_tucker
import tensorly.tenalg as ta

import torch
import torch.nn as nn
import torch.nn.functional as tnnf
from torch.utils.data import TensorDataset
from ignite.engine import (
    Events, Engine, create_supervised_trainer, create_supervised_evaluator
)
from ignite.metrics import Accuracy

from libcrap import save_json, load_json

In [None]:
tl.set_backend("pytorch")
device = torch.device("cuda:0")
torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [None]:
eth80_dataset = Eth80Dataset(
    "/mnt/hdd_1tb/smiles_backup/Documents/datasets/eth80/eth80-cropped-close128/"
)

In [None]:
class MPRLowRankCP(nn.Module):
    def __init__(self, num_inputs, num_outputs, polynom_order, rank, bias=True):
        assert polynom_order >= 1
        assert rank >= 1
        super().__init__()
        self.polynom_order = polynom_order
        self.rank = rank
        self.bias = bias
        self.num_outputs = num_outputs
        self.actual_num_inputs = num_inputs + bias
        self.factors = nn.ParameterList(
            nn.Parameter(
                torch.ones(self.num_outputs, self.actual_num_inputs, self.rank),
                requires_grad=True
            )
            for i in range(self.polynom_order)
        )
        self.reset_parameters()
    
    def _actual_num_features(self):
        return self.num_features + bias

    def reset_parameters(self):
        for i in range(len(self.factors)):
            self.factors[i].data = torch.randn_like(self.factors[i].data)
    
    def forward(self, X):
        assert len(X.shape) == 2
        num_samples = X.shape[0]
        if self.bias:
            X = torch.cat([X, torch.ones(num_samples, 1)], dim=1)
        thingies = [
            # n - num of sample, f - num o feature
            # r - num of rank one component, o - num of output
            torch.einsum("nf,ofr->nor", X, factor)
            for factor in self.factors
        ]
        return functools.reduce(operator.mul, thingies).sum(dim=2)

In [None]:
def generate_configuration():
    return {
        "extracted_features_shape": (
            random.randint(1, eth80.NUM_ANGLES // 2), # angles mode
            random.randint(1, 3), # channels mode
            random.randint(1, eth80.IMAGE_HEIGHT // 4),
            random.randint(1, eth80.IMAGE_WIDTH // 4)
        ),
        "learning_rate": 10**random.randint(-13, -1),
        "regularization_coefficient": 10**random.randint(-8, 8),
        "polynom_order": random.randint(2, 3),
        "weights_rank": random.randint(1, 100)
    }

It seems the best learning rate is around 1e-9, 1e-10. Anything greater than 1e-9 produces NaNs :(. Anything smaller is too slow.

Aaaand I don't know the best other hyperparameters because it turns out too few of my experiments were with suitable learning rate.

extracted features dimensions [18, 3, 29, 10], lr 1.000000e-09, polynomial order 3, regularization coefficient 1.000000e-03, weights_rank 80 - this gave me the best test accuracy so far.

In [None]:
def memreport():
    print(f"""
    {torch.cuda.memory_allocated()/1024/1024/1024} Gb allocated
    {torch.cuda.memory_cached()/1024/1024/1024} Gb cached
    """)
    for obj in gc.get_objects():
        if torch.is_tensor(obj):
            print(type(obj), obj.size())

In [None]:
#gc.collect()

In [None]:
#memreport()

In [None]:
def extract_X_y_train_test(
    dataset, num_test_objects_per_class, extracted_features_shape
):
    """Performs stratified split of ETH80, does feature extraction via
    Tucker decomposition. Returns X_train, y_train, X_test, y_test."""
    
    tensor_train, y_train, tensor_test, y_test = dataset.stratified_split(
        num_test_objects_per_class, use_torch=True
    )
    
    extractor = TuckerFeatureExtractor(
        tensor_train.shape[1:],
        extracted_features_shape
    )
    core_train = extractor.fit_transform(tensor_train)
    core_test = extractor.transform(tensor_test)
    
    X_train = core_train.reshape(core_train.shape[0], -1)
    X_test = core_test.reshape(core_test.shape[0], -1)
    assert X_train.shape[1] == X_test.shape[1]
    return X_train, y_train, X_test, y_test

In [None]:
def make_CP_MPR(
    num_extracted_features, polynom_order, weights_rank,
    optimizer_creator, learning_rate, regularization_coefficient,
    betas=None
):
    model = MPRLowRankCP(
        num_extracted_features, eth80.NUM_CLASSES,
        polynom_order=polynom_order,
        rank=weights_rank,
        bias=True
    )
    
    optimizer_additional_parameters = {}
    if betas is not None:
        optimizer_additional_parameters["betas"] = betas
    
    optimizer = optimizer_creator(
        model.parameters(), lr=learning_rate,
        weight_decay=regularization_coefficient,
        **optimizer_additional_parameters
    )
    return model, optimizer

In [None]:
def do_every_num_epochs(num_epochs):
    """This must be written after @trainer.on, not before."""
    def decorate(func):
        def decorated(engine, *args, **kwargs):
            if engine.state.epoch % num_epochs == 0:
                return func(engine, *args, **kwargs)
        return functools.update_wrapper(decorated, func)
    return decorate

def train_and_evaluate(
    dataset, extracted_features_shape,
    model, optimizer,
    num_test_objects_per_class,
    eval_every_num_epochs, plot_every_num_epochs,
    num_epochs
):
    X_train, y_train, X_test, y_test = extract_X_y_train_test(
        dataset, num_test_objects_per_class, extracted_features_shape
    )
    
    trainer = create_supervised_trainer(
        model=model, optimizer=optimizer,
        loss_fn=tnnf.cross_entropy
    )

    evaluations_epochs = []
    train_log = {
        "losses": [],
        "accuracies": []
    }
    test_log = {
        "losses": [],
        "accuracies": []
    }
    
    def evaluate(X, y, log):
        model.train(False)
        logits = model(X)
        loss = tnnf.cross_entropy(logits, y).item()
        predictions = logits.argmax(dim=1)
        accuracy = (y == predictions).sum().item() / len(y)
        log["losses"].append(loss)
        log["accuracies"].append(accuracy)

    @trainer.on(Events.EPOCH_COMPLETED)
    @do_every_num_epochs(eval_every_num_epochs)
    def evaluate_on_train_and_test(engine):
        evaluate(X_train, y_train, train_log)
        evaluate(X_test, y_test, test_log)
        assert not isinstance(engine.state.epoch, torch.Tensor)
        evaluations_epochs.append(engine.state.epoch)
    
    @trainer.on(Events.EPOCH_COMPLETED)
    @do_every_num_epochs(plot_every_num_epochs)
    def update_plot(engine):
        display.clear_output(wait=True)
        fig, axes = plt.subplots(ncols=2, figsize=(14, 5))
        axes = axes.flatten()
        axes[0].set_title("Loss")
        axes[0].plot(evaluations_epochs, train_log["losses"], label="train loss")
        axes[0].plot(evaluations_epochs, test_log["losses"], label="test loss")
        axes[0].legend()
        axes[1].set_title(f"Accuracy. Test: {test_log['accuracies'][-1]}")
        axes[1].plot(evaluations_epochs, train_log["accuracies"], label="train accuracy")
        axes[1].plot(evaluations_epochs, test_log["accuracies"], label="test accuracy")
        axes[1].legend()
        plt.show()
        
    trainer.run([(X_train, y_train)], max_epochs=num_epochs)
    return model, evaluations_epochs, train_log, test_log

In [None]:
def evaluate_CP_MPR():
    extracted_features_shape = [18, 3, 29, 10]
    polynom_order = 3
    weights_rank=500
    model, optimizer = make_CP_MPR(
        functools.reduce(operator.mul, extracted_features_shape), polynom_order,
        weights_rank,
        torch.optim.SGD, learning_rate=1e-9,
        regularization_coefficient=1e-3
    )
    train_and_evaluate(
        eth80_dataset, extracted_features_shape,
        model, optimizer,
        2,
        eval_every_num_epochs=3, plot_every_num_epochs=60,
        num_epochs=301
    )

In [None]:
evaluate_CP_MPR()