In [1]:
import numpy as np
import os
from scipy.stats import hmean

from sklearn.metrics import accuracy_score

from scarce_shot_learn.data import load_awa2
from scarce_shot_learn import zero_shot

In [2]:
import torch

In [3]:
torch.__version__

'1.7.1'

$\phi$ - input feature
$$
    loss(\phi, label) = \sum_{j \neq label} ReLU(margin - t_{label} W \phi + t_j W \phi) 
$$

In [4]:
awa2_dataset = load_awa2()
X_train, attributes_features_train, labels_train = awa2_dataset['train']
X_val, attributes_features_val, labels_val = awa2_dataset['val']
X_test, attributes_features_test, labels_test = awa2_dataset['test']
X_train = X_train.astype('float32')
attributes_features_train = attributes_features_train.astype('float32') 

In [5]:
import attr
from scarce_shot_learn.zero_shot import zsl_base
import torch
from torch import nn, optim
from toolz import partial
from ignite.contrib.handlers import tqdm_logger
from sklearn import preprocessing 


def bilinear_feature_similarity(weights, embedding, class_features):
    """
    embedding * W * class_features
    """
    return torch.matmul(embedding, torch.matmul(weights, class_features.T))  


def similarity_based_hinge_loss(weights, embedding, label, label_embeddings, feature_similarity=bilinear_feature_similarity):
    """
    see https://arxiv.org/pdf/1703.04394.pdf
    equations (4) and (7) only differn in final per-class aggregation
    this function computes value before this final aggregation step
    """
    indicator = torch.ones(label_embeddings.shape[0], dtype=bool)
    indicator[label] = 0
    correct_class_similarity = feature_similarity(weights, embedding, label_embeddings[label])
    wrong_class_similarities = feature_similarity(weights, embedding, label_embeddings[indicator])
    return - correct_class_similarity + wrong_class_similarities 


def get_dataloader(X, y, batch_size=16):
    ds = torch.utils.data.TensorDataset(torch.Tensor(X), torch.Tensor(y).int())
    return torch.utils.data.DataLoader(ds, batch_size=batch_size)


def process_function(engine, batch, loss_fn, optimizer, y_features, use_cuda=True):
    optimizer.zero_grad()
    x, y = batch
    if use_cuda:
        x = x.cuda()
        y = y.cuda()
    loss = loss_fn(x, y, y_features)
    y_pred = loss_fn.predict(x, y_features)
    loss.backward()
    optimizer.step()
    return y_pred, y, {'loss': loss.item()}


def run_training_loop(loss_fn, train_dataloader, epochs, y_features_train, optimizer=optim.Adagrad, use_cuda=True):
    if use_cuda:
        y_features_train = y_features_train.cuda()
    trainer = ignite.engine.Engine(partial(process_function, loss_fn=loss_fn, optimizer=optimizer(loss_fn.parameters()), y_features=y_features_train))
    pbar = tqdm_logger.ProgressBar()
    pbar.attach(trainer)
    trainer.run(train_dataloader, max_epochs=epochs)
    return loss_fn


class DEVISELayer(nn.Module):
    
    def __init__(self, n_features, n_class_features, margin, init_weights_std=0.1):
        super(DEVISELayer, self).__init__()
        init_weights = init_weights_std * torch.randn(n_features, n_class_features) 
        self.weights = nn.Parameter(data=init_weights.cuda())
        self.margin = margin
    
    def forward(self, X, y, label_embeddings):
        loss = torch.Tensor([0]).cuda()
        for i in range(X.shape[0]):
            loss += self._devise_loss(X[i], y[i], label_embeddings)
        return loss
        
    def _devise_loss(self, embedding, label, label_embeddings):
        indicator = torch.ones(label_embeddings.shape[0], dtype=bool)
        indicator[label] = 0
        per_class_loss = similarity_based_hinge_loss(self.weights, embedding, label, label_embeddings)
        return nn.ReLU()(self.margin + per_class_loss).sum()
    
    def predict(self, X, label_embeddings):
        class_similarities = bilinear_feature_similarity(self.weights, X, label_embeddings)
        return torch.argmax(class_similarities, axis=1)


@attr.s
class DEVISELearner(zsl_base.ZeroShotClassifier):
    """
        see https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41473.pdf
        for the paper
    """
    margin = attr.ib(default=0.1)
    
    def fit(self, X, y, class_attributes, n_epochs=1, eval_set=None):
        self.loss_fn = DEVISELayer(X.shape[1], class_attributes.shape[1], self.margin)
        le = preprocessing.LabelEncoder()
        ohe = preprocessing.OneHotEncoder()
        y_labels_encoded = le.fit_transform(y)
        y_labels_ohe = ohe.fit_transform(y_labels_encoded.reshape(-1, 1)).toarray()
        M = np.random.randn
        X_t = torch.tensor(X)
        class_attributes_t = torch.Tensor(class_attributes).float()
        train_dataloader = get_dataloader(X_train, labels_train)
        run_training_loop(self.loss_fn, train_dataloader, n_epochs, torch.Tensor(class_attributes).float())
        return self

    def predict(self, X, class_attributes):
        return self.loss_fn.predict(torch.tensor(X).cuda(), torch.Tensor(class_attributes).cuda()).cpu().detach().numpy()

In [6]:
import torch.nn

In [7]:
devise_learner = DEVISELearner(0.1)

In [8]:
devise_learner.weights = torch.randn(X_train.shape[1], attributes_features_train.shape[1]).double()

In [9]:
torch.Tensor([1,0]).bool()

tensor([ True, False])

In [10]:
devise_loss = DEVISELayer(X_train.shape[1], attributes_features_train.shape[1], 0.1)

In [11]:
optimizer = torch.optim.Adagrad(devise_loss.parameters())

In [12]:
X_train.shape

(16187, 2048)

In [13]:
torch.tensor(X_train).dtype
torch.tensor(attributes_features_train).dtype

torch.float32

In [14]:
attributes_features_train.dtype

dtype('float32')

In [15]:
%%time
loss = devise_loss(
    torch.tensor(X_train[:10]).cuda(),
    torch.tensor(labels_train[:10]).int().cuda(),
    torch.tensor(attributes_features_train).cuda()
)

CPU times: user 226 ms, sys: 102 ms, total: 328 ms
Wall time: 327 ms


In [16]:
%%time
loss.backward()

CPU times: user 235 µs, sys: 3.02 ms, total: 3.26 ms
Wall time: 2.92 ms


In [17]:
y_pred = devise_loss.predict(torch.tensor(X_train).cuda(), torch.Tensor(attributes_features_train).cuda()).cpu().detach().numpy()

In [18]:
(y_pred == labels_train).mean()

0.014208933094458516

In [19]:
import ignite

In [20]:
train_dataloader = get_dataloader(X_train, labels_train)
val_dataloader = get_dataloader(X_val, labels_val)

In [21]:
devise_learner = DEVISELearner()

In [22]:
devise_learner.fit(X_train, labels_train, attributes_features_train, n_epochs=2)

[1/1012]   0%|           [00:00<?]

[1/1012]   0%|           [00:00<?]

DEVISELearner(margin=0.1)

In [23]:
y_pred_train = devise_learner.predict(X_train, attributes_features_train)

In [24]:
(y_pred_train == labels_train).mean()

0.9389633656638043

In [27]:
train_acc = (y_pred_train == labels_train).mean()
val_acc = (y_pred_val == labels_val).mean()
test_acc = (y_pred_test == labels_test).mean()

NameError: name 'y_pred_val' is not defined

In [None]:
hmean([train_acc, val_acc])

In [None]:
test_acc