In [1]:
import numpy as np
from scipy.stats import hmean

from sklearn.metrics import accuracy_score

from scarce_shot_learn.data import load_awa2
from scarce_shot_learn import zero_shot

In [2]:
import torch

In [3]:
torch.__version__

'1.7.1'

$\phi$ - input feature
$$
    loss(\phi, label) = \sum_{j \neq label} ReLU(margin - t_{label} W \phi + t_j W \phi) 
$$

In [4]:


awa2_dataset = load_awa2()
X_train, attributes_features_train, labels_train = awa2_dataset['train']
X_val, attributes_features_val, labels_val = awa2_dataset['val']
X_test, attributes_features_test, labels_test = awa2_dataset['test']
X_train = X_train.astype('float32')
attributes_features_train = attributes_features_train.astype('float32') 

In [5]:
import attr
from scarce_shot_learn.zero_shot import zsl_base
import torch
from torch import nn, optim


class DEVISELayer(nn.Module):
    
    def __init__(self, n_features, n_class_features, margin, init_weights_std=0.1):
        super(DEVISELayer, self).__init__()
        init_weights = init_weights_std * torch.randn(n_features, n_class_features) 
        self.weights = torch.nn.Parameter(data=init_weights.cuda())
        self.margin = margin
    
    def forward(self, X, y, label_embeddings):
        loss = torch.Tensor([0]).cuda()
        for i in range(X.shape[0]):
            loss += self._devise_loss(X[i], y[i], label_embeddings)
        return loss
        
    def _devise_loss(self, emb, label, label_embeddings):
        indicator = torch.ones(label_embeddings.shape[0], dtype=bool)
        indicator[label] = 0
        correct_class_similarity = torch.matmul(emb, torch.matmul(self.weights, label_embeddings[label])) 
        wrong_class_similarities = torch.matmul(emb, torch.matmul(self.weights, label_embeddings[indicator].T)) 
        per_class_loss = torch.nn.ReLU()(
            self.margin - correct_class_similarity + wrong_class_similarities 
        )
        return per_class_loss.sum()
    
    def predict(self, X, label_embeddings):
        class_similarities = torch.matmul(X, torch.matmul(self.weights, label_embeddings.T)) 
        return torch.argmax(class_similarities, axis=1)


@attr.s
class DEVISELearner(zsl_base.ZeroShotClassifier):
    """
        see https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41473.pdf
        for the paper
    """
    margin = attr.ib(default=0.1)
    
    def fit(self, X, y, class_attributes):
        le = LabelEncoder()
        ohe = OneHotEncoder()
        y_labels_encoded = le.fit_transform(y)
        y_labels_ohe = ohe.fit_transform(y_labels_encoded.reshape(-1, 1)).toarray()
        M = np.random.randn
        X_t = torch.tensor(X)
        class_attributes_t = torch.tensor(class_attributes)
        

In [6]:
import torch.nn

In [7]:
devise_learner = DEVISELearner(0.1)

In [8]:
devise_learner.weights = torch.randn(X_train.shape[1], attributes_features_train.shape[1]).double()

In [9]:
torch.Tensor([1,0]).bool()

tensor([ True, False])

In [10]:
devise_loss = DEVISELayer(X_train.shape[1], attributes_features_train.shape[1], 0.1)

In [11]:
optimizer = torch.optim.Adagrad(devise_loss.parameters())

In [12]:
X_train.shape

(16187, 2048)

In [13]:
torch.tensor(X_train).dtype
torch.tensor(attributes_features_train).dtype

torch.float32

In [14]:
attributes_features_train.dtype

dtype('float32')

In [15]:
%%time
loss = devise_loss(
    torch.tensor(X_train[:10]).cuda(),
    torch.tensor(labels_train[:10]).int().cuda(),
    torch.tensor(attributes_features_train).cuda()
)

CPU times: user 264 ms, sys: 108 ms, total: 371 ms
Wall time: 371 ms


In [16]:
%%time
loss.backward()

CPU times: user 5 ms, sys: 0 ns, total: 5 ms
Wall time: 4.56 ms


In [17]:
y_pred = devise_loss.predict(torch.tensor(X_train).cuda(), torch.Tensor(attributes_features_train).cuda()).cpu().detach().numpy()

In [18]:
(y_pred == labels_train).mean()

0.025699635509977142

In [19]:
import ignite

In [20]:
def get_dataloader(X, y, batch_size=16):
    ds = torch.utils.data.TensorDataset(torch.Tensor(X), torch.Tensor(y).int())
    return torch.utils.data.DataLoader(ds, batch_size=batch_size)

In [21]:
train_dataloader = get_dataloader(X_train, labels_train)
val_dataloader = get_dataloader(X_val, labels_val)

In [22]:
def process_function(engine, batch, loss_fn, optimizer, y_features):
    optimizer.zero_grad()
    x, y = batch
    x = x.cuda()
    y = y.cuda()
    loss = loss_fn(x, y, y_features)
    y_pred = loss_fn.predict(x, y_features)
    loss.backward()
    optimizer.step()
    return y_pred, y, {'loss': loss.item()}



In [23]:
optimizer

Adagrad (
Parameter Group 0
    eps: 1e-10
    initial_accumulator_value: 0
    lr: 0.01
    lr_decay: 0
    weight_decay: 0
)

In [24]:
from toolz import partial
from ignite.contrib.handlers import tqdm_logger

def run_training_loop(loss_fn, epochs, y_features_train):
    trainer = ignite.engine.Engine(partial(process_function, loss_fn=loss_fn, optimizer=optimizer, y_features=y_features_train))
    pbar = tqdm_logger.ProgressBar()
    pbar.attach(trainer)
    trainer.run(train_dataloader, max_epochs=epochs)
    return loss_fn


In [25]:
run_training_loop(devise_loss, 5, torch.Tensor(attributes_features_train).float().cuda())

[1/1012]   0%|           [00:00<?]

[1/1012]   0%|           [00:00<?]

[1/1012]   0%|           [00:00<?]

[1/1012]   0%|           [00:00<?]

[1/1012]   0%|           [00:00<?]

DEVISELayer()

In [26]:
y_pred_train = devise_loss.predict(torch.tensor(X_train).cuda(), torch.Tensor(attributes_features_train).cuda()).cpu().detach().numpy()

In [31]:
train_acc = (y_pred_train == labels_train).mean()

In [28]:
attributes_features_val

array([[0.07233195, 0.        , 0.        , ..., 0.11898569, 0.12480808,
        0.04896776],
       [0.24730479, 0.26063323, 0.        , ..., 0.1044239 , 0.03998532,
        0.2584296 ],
       [0.16089413, 0.        , 0.        , ..., 0.22788871, 0.122741  ,
        0.05665286],
       ...,
       [0.23174215, 0.21811203, 0.        , ..., 0.16699909, 0.11516243,
        0.08919886],
       [0.        , 0.06587863, 0.        , ..., 0.02108505, 0.10218637,
        0.0332632 ],
       [0.22516498, 0.15266022, 0.        , ..., 0.12733492, 0.10009694,
        0.01771   ]])

In [29]:
y_pred_val = devise_loss.predict(torch.tensor(X_val).float().cuda(), torch.Tensor(attributes_features_val).cuda()).cpu().detach().numpy()

In [32]:
val_acc = (y_pred_val == labels_val).mean()

In [34]:
hmean([train_acc, val_acc])

0.5619103287274593