In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision as tv

import tensorflow as tf

from C2AE import C2AE, save_model, load_model, Fe, Fx, Fd, eval_metrics

from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss, accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from scipy.io import arff

import arff as arff2

# C2AE Architecture
* X:
    * (N, d)
* Y:
    * (N, m)
* Z:
    * (N, l)

## Three main components:
* Fx:
    * Encodes x into latent space z.
* Fe:
    * Encodes y into latent space z.
* Fd:
    * Decodes z into label space. 

## Loss functions:

$$L_1 = ||F_x(X) - F_e(Y)||^2 s.t. F_x(X)Fx(X)^T = F_e(Y)F_e(Y)^T = I$$
$$L_2 = \Gamma(F_e, F_d) = \Sigma_i^N E_i$$
$$E_i = \frac{1}{|y_i^1||y_i^0|} \Sigma_{p,q \in y_i^1\times y_i^0} e^{F_d(F_e(y_i))^q - F_d(F_e(y_I))^p}$$

## Combined Loss:
$$L_1 + \alpha L_2$$

In [7]:
# scene = arff.loadarff('./scene.arff')
# scene_df = pd.DataFrame(scene[0])
# data_labels = scene_df.loc[:, 'Beach': 'Urban'].astype('int').values

# X, Y = scene_df.loc[:, 'Att1':'Att294'].values, scene_df.loc[:, 'Beach': 'Urban'].astype('int').values
# trans = tv.transforms.Compose([tv.transforms.Lambda(lambda x: torch.Tensor(x))])
# X, Y = trans(X), trans(Y)
# X_train, X_test, y_train, y_test = train_test_split(X, Y)


# train_dataset = TensorDataset(X_train,y_train)
# test_dataset = TensorDataset(X_test, y_test)

# X_train.shape, X_test.shape, y_train.shape, y_test.shape

train = arff.loadarff('./scene.arff')
test = arff.loadarff('./scene-train.arff')
train_df = pd.DataFrame(train[0])
test_df = pd.DataFrame(test[0])
X, Y = train_df.loc[:, 'Att1':'Att294'].values, train_df.loc[:, 'Beach': 'Urban'].astype('int').values
X_test, Y_test  = test_df.loc[:, 'Att1':'Att294'].values, test_df.loc[:, 'Beach': 'Urban'].astype('int').values

trans = tv.transforms.Compose([tv.transforms.Lambda(lambda x: torch.Tensor(x))])
X, Y = trans(X), trans(Y)
X_test, Y_test = trans(X_test), trans(Y_test)

train_dataset = TensorDataset(X,Y)
test_dataset = TensorDataset(X_test, Y_test)

In [162]:
# nus_train = arff2.load(open('./nus/nus-wide-full-cVLADplus-train.arff'))
# nus_test = arff2.load(open('./nus/nus-wide-full-cVLADplus-test.arff'))
# nus_train_x = np.array(nus_train['data'])[:, 1:129].astype('float64')
# nus_train_y = np.array(nus_train['data'])[:, 129:].astype('float64')
# nus_train_x[nus_train_y.sum(axis=1) != 0]
# nus_train_y[nus_train_y.sum(axis=1) != 0]

# nus_test_x = np.array(nus_test['data'])[:, 1:129].astype('float64')
# nus_test_y = np.array(nus_test['data'])[:, 129:].astype('float64')
# nus_test_x[nus_test_y.sum(axis=1) != 0]
# nus_test_y[nus_test_y.sum(axis=1) != 0]
# train_dataset = TensorDataset(torch.Tensor(nus_train_x), torch.Tensor(nus_train_y))
# test_dataset = TensorDataset(torch.Tensor(nus_test_x), torch.Tensor(nus_test_y))

In [163]:
train_dataset[:][0].shape, train_dataset[:][1].shape, test_dataset[:][0].shape, test_dataset[:][1].shape

(torch.Size([1211, 294]),
 torch.Size([1211, 6]),
 torch.Size([1211, 294]),
 torch.Size([1211, 6]))

### Metrics:

In [3]:
def micro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='micro')
def macro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='macro')
def micro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='micro')
def macro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='macro')
def micro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='micro')
def macro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='macro')
def ham_los(*args, **kwargs):
    return hamming_loss(*args, **kwargs)

In [172]:
# Training configs.
batch_size = 32
num_epochs = 1000
lr = 0.001
device = torch.device('cuda')

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# # Nus config
# feat_dim = 128
# latent_dim = 200
# num_labels = 81
# # Nus Models.
# Fx_scene = Fx(feat_dim, 512, 512, latent_dim)
# Fe_scene = Fe(num_labels, 512, latent_dim)
# Fd_scene = Fd(latent_dim, 512, num_labels, fin_act=torch.sigmoid)

# # Scene config
feat_dim = 294
latent_dim = 5
num_labels = 6
h_dim=40

fx_h_dim=100
# Scene models.
Fx_scene = Fx(feat_dim, fx_h_dim, fx_h_dim, latent_dim)
Fe_scene = Fe(num_labels, h_dim, latent_dim)
Fd_scene = Fd(latent_dim, h_dim, num_labels, fin_act=torch.sigmoid)
               
# Initializing net.
net = C2AE(Fx_scene, Fe_scene, Fd_scene, alpha=5, emb_lambda=0.01, latent_dim=latent_dim, device=device)
net = net.to(device)


# Doing weight_decay here is eqiv to adding the L2 norm.
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=0)
writer = SummaryWriter(comment='nus')

In [173]:
train_dataset[:5][1].to(device)

tensor([[1., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.]], device='cuda:0')

In [174]:
net.Fx(train_dataset[:5][0].to(device)), net.Fe(train_dataset[:5][1].to(device)) 

(tensor([[ 0.0597, -0.0095,  0.1325, -0.0084, -0.0076],
         [ 0.0205, -0.0122,  0.1384, -0.0084, -0.0072],
         [ 0.0272, -0.0123,  0.1544, -0.0075, -0.0082],
         [ 0.0390, -0.0132,  0.1572, -0.0086, -0.0077],
         [ 0.0316, -0.0126,  0.1361, -0.0071, -0.0080]], device='cuda:0',
        grad_fn=<LeakyReluBackward0>),
 tensor([[-0.0139, -0.0177, -0.0310,  0.1932, -0.0158],
         [ 0.0791, -0.0150, -0.0020,  0.1551, -0.0146],
         [-0.0029, -0.0071, -0.0064,  0.2071, -0.0088],
         [-0.0029, -0.0071, -0.0064,  0.2071, -0.0088],
         [-0.0029, -0.0071, -0.0064,  0.2071, -0.0088]], device='cuda:0',
        grad_fn=<LeakyReluBackward0>))

In [175]:
print("Starting training!")
best_loss = np.infty
for epoch in range(num_epochs+1): 
    # Training.
    net.train()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    for x, y in train_dataloader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()      

        # Pass x, y to network. Retrieve both encodings, and decoding of ys encoding.
        fx_x, fe_y, fd_z = net(x, y)
        # Calc loss.
        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = l_loss + net.alpha*c_loss
        loss.backward()
        optimizer.step()
        
        loss_tracker+=loss.item()
        latent_loss_tracker+=l_loss.item()
        cor_loss_tracker+=c_loss.item()
    writer.add_scalar('train/loss', loss_tracker, epoch)
    writer.add_scalar('train/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('train/corr_loss', cor_loss_tracker, epoch)
    
    # Evaluation
    net.eval()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    acc_track = 0.0
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)
        # evaluation only requires x. As its just Fd(Fx(x))
        fx_x, fe_y = net.Fx(x), net.Fe(y)
        fd_z = net.Fd(fx_x)

        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = l_loss + net.alpha*c_loss
        
        latent_loss_tracker += l_loss.item()
        cor_loss_tracker += c_loss.item()
        loss_tracker += loss.item()
        lab_preds = torch.round(net.Fd(net.Fx(x))).cpu().detach().numpy()
        acc_track += accuracy_score(y.cpu().detach().numpy(), lab_preds)
        
    if loss_tracker < best_loss:
        best_loss = loss_tracker
        print("Saving model.")
        torch.save(net.state_dict(), f'./models/scene_best/best.pt')
    print(f"Epoch: {epoch}, Loss: {loss_tracker},  L-Loss: {latent_loss_tracker}, C-Loss: {cor_loss_tracker}")
    writer.add_scalar('val/loss', loss_tracker, epoch)
    writer.add_scalar('val/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('val/corr_loss', cor_loss_tracker, epoch)
    writer.add_scalar('val/acc', acc_track, epoch)
writer.add_hparams(hparam_dict=eval_metrics(net, [hamming_loss, accuracy_score], [train_dataset], device)['dataset_0'], metric_dict={})
writer.add_hparams(hparam_dict=eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset], device)['dataset_0'], metric_dict={})

Starting training!
Saving model.
Epoch: 0, Loss: 190.78302192687988,  L-Loss: 0.26020116941072047, C-Loss: 38.10456430912018
Saving model.
Epoch: 1, Loss: 190.54774522781372,  L-Loss: 0.204106422374025, C-Loss: 38.06872779130936
Saving model.
Epoch: 2, Loss: 190.4091601371765,  L-Loss: 0.23050008225254714, C-Loss: 38.035731852054596
Epoch: 3, Loss: 190.55819129943848,  L-Loss: 0.5447562548797578, C-Loss: 38.00268656015396
Saving model.
Epoch: 4, Loss: 190.32458448410034,  L-Loss: 0.4879605807363987, C-Loss: 37.967325150966644
Epoch: 5, Loss: 190.77154684066772,  L-Loss: 1.1393505791202188, C-Loss: 37.926439225673676
Epoch: 6, Loss: 190.5609049797058,  L-Loss: 1.0958964959718287, C-Loss: 37.893001437187195
Epoch: 7, Loss: 192.15753650665283,  L-Loss: 2.8998786881566048, C-Loss: 37.85153144598007
Epoch: 8, Loss: 193.4620599746704,  L-Loss: 4.633823934942484, C-Loss: 37.76564657688141
Epoch: 9, Loss: 196.85033178329468,  L-Loss: 8.098749078810215, C-Loss: 37.75031679868698
Epoch: 10, Loss

In [176]:
net.load_state_dict(torch.load('./models/scene_best/best.pt'))

<All keys matched successfully>

In [155]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.00390625, 'accuracy_score': 0.9765625},
 'dataset_1': {'hamming_loss': 0.00390625, 'accuracy_score': 0.9765625}}

In [143]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.00390625, 'accuracy_score': 0.9765625},
 'dataset_1': {'hamming_loss': 0.00390625, 'accuracy_score': 0.9765625}}

In [177]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.0013762730525736307,
  'accuracy_score': 0.9917423616845582},
 'dataset_1': {'hamming_loss': 0.0013762730525736307,
  'accuracy_score': 0.9917423616845582}}

In [62]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.010416666666666666, 'accuracy_score': 0.9375},
 'dataset_1': {'hamming_loss': 0.010416666666666666, 'accuracy_score': 0.9375}}

In [85]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.00390625, 'accuracy_score': 0.9765625},
 'dataset_1': {'hamming_loss': 0.00390625, 'accuracy_score': 0.9765625}}

In [22]:
eval_metrics(net, [hamming_loss, accuracy_score], [train_dataset, test_dataset])

{'dataset_0': {'hamming_loss': 0.016712834718374883,
  'accuracy_score': 0.9268698060941828},
 'dataset_1': {'hamming_loss': 0.09302325581395349,
  'accuracy_score': 0.6827242524916943}}

In [20]:
eval_metrics(net, [hamming_loss, accuracy_score], [train_dataset, test_dataset])

{'dataset_0': {'hamming_loss': 0.01680517082179132,
  'accuracy_score': 0.9257617728531856},
 'dataset_1': {'hamming_loss': 0.08665559246954596,
  'accuracy_score': 0.6827242524916943}}

In [66]:
eval_metrics(net, [hamming_loss, accuracy_score], [train_dataset, test_dataset])

TypeError: eval_metrics() missing 1 required positional argument: 'device'

In [14]:
eval_metrics(net, [hamming_loss, accuracy_score], [train_dataset, test_dataset])

{'dataset_0': {'hamming_loss': 0.029916897506925208,
  'accuracy_score': 0.8626038781163435},
 'dataset_1': {'hamming_loss': 0.0858250276854928,
  'accuracy_score': 0.6760797342192691}}

In [15]:
eval_metrics(net, [hamming_loss, accuracy_score], [train_dataset])

{'dataset_0': {'hamming_loss': 0.029916897506925208,
  'accuracy_score': 0.8626038781163435}}

In [372]:
eval_metrics(net, [hamming_loss, accuracy_score], [train_dataset, test_dataset])

{'dataset_0': {'hamming_loss': 0.010156971375807941,
  'accuracy_score': 0.9484764542936288},
 'dataset_1': {'hamming_loss': 0.07419712070874862,
  'accuracy_score': 0.7093023255813954}}

In [212]:
# Decoder.
Fd_scene = SFC(latent_dim, 6, num_labels).to(device)

# Turn off gradient tracking for encoder networks.
for param in Fx_scene.parameters():
    param.requires_grad = False
for param in Fe_scene.parameters():
    param.requires_grad = False

# Initializing net.
net = C2AE(Fx_scene, Fe_scene, Fd_scene, alpha=0.5, emb_lambda=0.5)
net = net.to(device)

# Doing weight_decay here is eqiv to adding the L2 norm.
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=1)
writer = SummaryWriter()

for epoch in range(num_epochs+1): 
    # Training.
    net.train()
    loss_tracker = 0.0
    for x, y in train_dataloader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()      
        # Pass x, y to network. Retrieve both encodings, and decoding of ys encoding.
        _, _, fd_z = net(x, y)
        loss = net.corr_loss(torch.sigmoid(fd_z), y)
        loss.backward()
        optimizer.step()
        loss_tracker+=loss.item()
    writer.add_scalar('loss/cor', loss_tracker, epoch)
    
    # Evaluation
    net.eval()
    test_tracker = 0.0
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)
        # evaluation only requires x. As its just Fd(Fx(x))
        loss = net.corr_loss(torch.sigmoid(net.Fd(net.Fe(y))), y)
        test_tracker += loss.item()
    writer.add_scalar('loss/cor_val', test_tracker, epoch)

    print("Epoch: {}, {}, {}".format(epoch, loss_tracker, test_tracker))
    save_model(net, './models/c2a/{}.pt'.format(epoch))

Epoch: 0, 1814.358543395996, 603.0643725395203
Epoch: 1, 1806.555687904358, 601.974002957344
Epoch: 2, 1802.5488176345825, 601.4645870923996
Epoch: 3, 1800.9454917907715, 601.294159412384
Epoch: 4, 1801.1399192810059, 601.2288442850113
Epoch: 5, 1800.7324028015137, 601.224426150322
Epoch: 6, 1800.1720390319824, 601.2312300205231
Epoch: 7, 1800.2306661605835, 601.223198890686
Epoch: 8, 1800.21125125885, 601.2118003368378
Epoch: 9, 1800.0681238174438, 601.1996719837189
Epoch: 10, 1800.1249866485596, 601.1949944496155
Epoch: 11, 1799.9471454620361, 601.1941817998886
Epoch: 12, 1799.929599761963, 601.1943576335907
Epoch: 13, 1799.9361963272095, 601.1952466964722
Epoch: 14, 1799.907998085022, 601.1959321498871
Epoch: 15, 1799.9003524780273, 601.1960427761078
Epoch: 16, 1799.898452758789, 601.1952675580978
Epoch: 17, 1799.882776260376, 601.1945011615753
Epoch: 18, 1799.884165763855, 601.1939296722412
Epoch: 19, 1799.8806791305542, 601.1934466362
Epoch: 20, 1799.880187034607, 601.193186759948

In [23]:
def load_model(model_cls, path, *args, **kwargs):
    model = model_cls(*args, **kwargs)
    model.load_state_dict(torch.load(path))
    model.eval()
    return model

In [24]:
Fx_scene = SFC(feat_dim, 50, latent_dim).to(device)
Fe_scene = SFC(num_labels, 6, latent_dim).to(device)
# Decoder.
Fd_scene = SFC(latent_dim, 6, num_labels).to(device)

net = load_model(C2AE, './models/c2a/36.pt', Fx_scene, Fe_scene, Fd_scene, 0.5, 0.5)

SFC(
  (fc1): Linear(in_features=294, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=6, bias=True)
) SFC(
  (fc1): Linear(in_features=6, out_features=6, bias=True)
  (fc2): Linear(in_features=6, out_features=6, bias=True)
) SFC(
  (fc1): Linear(in_features=6, out_features=6, bias=True)
  (fc2): Linear(in_features=6, out_features=6, bias=True)
)


In [213]:
fxs = net.Fx(X.to(device))

In [214]:
fxs = net.Fx(X.to(device))
logit_preds = net.Fd(fxs)
preds = torch.round(torch.sigmoid(logit_preds)).cpu().detach()
preds

tensor([[0., 0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1., 1.],
        ...,
        [0., 0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1., 1.]])

In [215]:
(torch.sum(Y == preds, axis=1) == 6).sum()

tensor(1)

In [221]:

preds.shape

torch.Size([2407, 6])

In [220]:
preds.sum(dim=0)

tensor([   0.,    0.,    0.,    0., 2407., 2407.])

In [205]:
def output_loss(preds, labels):
    """Computational error function,k属于Y，l属于Y补，计算ck - cl值，此时误差是对称的
    Parameters
    ----------
    y : tensorflow tensor {0,1}
        binary indicator matrix with label assignments.
    output : tensorflow tensor [0,1]
    neural network output value
    Returns


    labels = (N, l)
    -------
    tensorflow tensor
    """
    # Generate masks for [0,1] elements.
    ones = (labels == 1)
    zeros = (labels == 0)
    # Use broadcasting to apply logical and between mask arrays.
    # This will only indicate locations where both masks are 1.
    # For us this corresponds to set we are enumerating in eq (3) in Yah et al.
    ix_matrix = ones[:,:, None] & zeros[:, None, :]
    # print(ix_matrix)
    # Use same broadcasting logic to generate exponetial differences.
    # This like the above broadcast will do so between all pairs of points
    # for every datapoint.
    diff_matrix = torch.exp(-(preds[:, :, None] - preds[:, None, :]))
    # print(diff_matrix)

    # print(diff_matrix*ix_matrix, (diff_matrix*ix_matrix).shape)
    # This will sum all contributes to loss for each datapoint.
    losses = torch.flatten(diff_matrix*ix_matrix, start_dim=1).sum(dim=1)
    # print(losses)
    # print(ones.sum(dim=1), zeros.sum(dim=1))
    # print((ones.sum(dim=1)*zeros.sum(dim=1)).shape)
    # print((ones.sum(dim=1)*zeros.sum(dim=1)))
    # Normalize each loss
    losses /= (ones.sum(dim=1)*zeros.sum(dim=1))
    # print(losses.shape)
    # Combine all losses to retrieve final loss.
    return  losses.sum()


def output_loss(predictions, labels):
        """Computational error function,k属于Y，l属于Y补，计算ck - cl值，此时误差是对称的
        Parameters
        ----------
        y : tensorflow tensor {0,1}
            binary indicator matrix with label assignments.
        output : tensorflow tensor [0,1]
            neural network output value
        Returns
        -------
        tensorflow tensor
        """
        shape = tf.shape(labels)

        y_i = tf.equal(labels, tf.ones(shape))
        y_not_i = tf.equal(labels, tf.zeros(shape))

        # get indices to check
        truth_matrix = tf.cast(pairwise_and(y_i, y_not_i), float)

        # calculate all exp'd differences
        # through and with truth_matrix, we can get all c_i - c_k(appear in the paper)
        sub_matrix = pairwise_sub(predictions, predictions)
        exp_matrix = tf.exp(tf.negative(sub_matrix))

        # check which differences to consider and sum them
        sparse_matrix = tf.multiply(exp_matrix, truth_matrix)
        sums = tf.reduce_sum(sparse_matrix, axis=[1, 2])

        # get normalizing terms and apply them
        y_i_sizes = tf.reduce_sum(tf.cast(y_i, float), axis=1)
        y_i_bar_sizes = tf.reduce_sum(tf.cast(y_not_i, float), axis=1)
        normalizers = tf.multiply(y_i_sizes, y_i_bar_sizes)

        loss = tf.divide(sums, normalizers)
        zero = tf.zeros_like(loss)
        loss = tf.where(tf.logical_or(tf.math.is_inf(loss), tf.math.is_nan(loss)), x=zero, y=loss)
        loss = tf.reduce_sum(loss)
        return loss
    
def pairwise_and(a, b):
        """compute pairwise logical and between elements of the tensors a and b
        Description
        -----
        if y shape is [3,3], y_i would be translate to [3,3,1], y_not_i is would be [3,1,3]
        and return [3,3,3],through the matrix ,we can easy to caculate c_k - c_i(appear in the paper)
        """
        column = tf.expand_dims(a, 2)
        row = tf.expand_dims(b, 1)
        return tf.logical_and(column, row)

def pairwise_sub(a, b):
        """compute pairwise differences between elements of the tensors a and b
        :param a:
        :param b:
        :return:
        """
        column = tf.expand_dims(a, 2)
        row = tf.expand_dims(b, 1)
        return tf.subtract(column, row)
    
    
def embedding_loss(Fx, Fe):
        """
        caculate embedding loss
        min(||Fx(X) - Fe(Y)||^2), subject to Fx(X)Fx(X)^T = Fe(Y)Fe(Y)^T = I
        use Lagrange method and lagrange coefficient equeal to 0.5
        :param Fx: tensor {n_intances, n_latent_embedding_dim}
            Fx latent embedding data
        :param Fe: tensor {n_intances, n_latent_embedding_dim}
            Fe latent embedding data
        :return: tensor
            all n_insances loss
        """
        I = tf.eye(tf.shape(Fx)[1])
        C1, C2, C3 = Fx - Fe, tf.matmul(tf.transpose(Fx), Fx) - I, tf.matmul(tf.transpose(Fe), Fe) - I
        loss = tf.linalg.trace(tf.matmul(C1, tf.transpose(C1))) + tf.linalg.trace(tf.matmul(C2, tf.transpose(C2)) + tf.matmul(C3, tf.transpose(C3)))
        return loss
net.eval()
X_train, y_train = train_dataset[:5][0].to(device), train_dataset[:5][1].to(device)
X_train.shape

torch_fx = Fx_scene(X_train[:5])
torch_fe = Fe_scene(y_train[:5])
tf_fx = tf.convert_to_tensor(torch_fx.detach().cpu().numpy())
tf_fe = tf.convert_to_tensor(torch_fe.detach().cpu().numpy())

net.latent_loss(torch_fx, torch_fe), embedding_loss(tf_fx, tf_fe)


preds = torch.sigmoid(Fd_scene(torch_fx))
tf_preds = tf.convert_to_tensor(preds.detach().cpu().numpy())
tf_ys = tf.convert_to_tensor(y_train[:5].detach().cpu().numpy())
net.corr_loss(preds, y_train[:5]), output_loss(tf_preds, tf_ys)

(tensor(4.9726, device='cuda:0', grad_fn=<SumBackward0>),
 <tf.Tensor: shape=(), dtype=float32, numpy=4.972551>)

In [211]:
correlation_loss(preds, Y[:5])

tensor([[0],
        [4]]) tensor([[1],
        [2],
        [3],
        [5]])
tensor([[0],
        [5]]) tensor([[1],
        [2],
        [3],
        [4]])
tensor([[0]]) tensor([[1],
        [2],
        [3],
        [4],
        [5]])
tensor([[0]]) tensor([[1],
        [2],
        [3],
        [4],
        [5]])
tensor([[0]]) tensor([[1],
        [2],
        [3],
        [4],
        [5]])


tensor([5.0206], device='cuda:0', grad_fn=<AddBackward0>)

In [210]:
def correlation_loss(preds, y):
    """
    Loss between output of decoder, and actual y label of example.
    """
    E_i = 0.0
    for i in range(y.shape[0]):
        pos_ixs = torch.nonzero(y[i, :] != 0)
        neg_ixs = torch.nonzero(y[i, :] == 0)
        e_i = 0.0
        for p_ix in pos_ixs:
            for n_ix in neg_ixs:
                e_i += torch.exp(-(preds[i, p_ix]-preds[i, n_ix]))
        E_i += e_i/(len(pos_ixs)*len(neg_ixs))

    return E_i