### Contrastive Loss for Self-supervised Learning

In [1]:
import numpy as np
import tensorflow as tf
np.set_printoptions(precision=3)
print ("Done.")

Done.


### Helper functions

In [2]:
def gpu_sess(): 
    config = tf.ConfigProto(); 
    config.gpu_options.allow_growth=True
    sess = tf.Session(config=config)
    return sess    
def print_tf_tensor(sess,tf_tensor):
    tf_tensor_val = sess.run(tf_tensor)
    print ("[%s] shape:%s"%(tf_tensor.name,tf_tensor_val.shape))
    print (tf_tensor_val)

### Simple illustration of the original NCE loss
https://github.com/google-research/simclr/blob/master/objective.py

In [3]:
tf.reset_default_graph()
sess = gpu_sess()
n_batch,dim = 5,2
hidden_concat = tf.cast(tf.Variable(np.random.rand(n_batch*2,dim)),tf.float32,name='hidden_concat')
sess.run(tf.global_variables_initializer())
print_tf_tensor(sess,hidden_concat)

[hidden_concat:0] shape:(10, 2)
[[0.648 0.706]
 [0.509 0.362]
 [0.462 0.338]
 [0.925 0.73 ]
 [0.342 0.013]
 [0.308 0.826]
 [0.462 0.726]
 [0.47  0.977]
 [0.708 0.105]
 [0.326 0.858]]


In [4]:
# L2 normalize
hidden_concat_nzd = tf.math.l2_normalize(hidden_concat,-1)
# Split into half (which originally came from two separate feature maps)
hidden1,hidden2 = tf.split(hidden_concat_nzd,num_or_size_splits=2,axis=0)
print_tf_tensor(sess,hidden_concat_nzd)
print_tf_tensor(sess,hidden1)
print_tf_tensor(sess,hidden2)

[l2_normalize:0] shape:(10, 2)
[[0.676 0.737]
 [0.815 0.579]
 [0.807 0.59 ]
 [0.785 0.62 ]
 [0.999 0.037]
 [0.349 0.937]
 [0.537 0.844]
 [0.434 0.901]
 [0.989 0.147]
 [0.355 0.935]]
[split:0] shape:(5, 2)
[[0.676 0.737]
 [0.815 0.579]
 [0.807 0.59 ]
 [0.785 0.62 ]
 [0.999 0.037]]
[split:1] shape:(5, 2)
[[0.349 0.937]
 [0.537 0.844]
 [0.434 0.901]
 [0.989 0.147]
 [0.355 0.935]]


In [5]:
# Label and mask
hidden1_large = hidden1 # ?
hidden2_large = hidden2 # ?
labels = tf.one_hot(tf.range(n_batch),n_batch*2,name='labels')
masks = tf.one_hot(tf.range(n_batch),n_batch,name='masks')
print_tf_tensor(sess,labels)
print_tf_tensor(sess,masks)

[labels:0] shape:(5, 10)
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
[masks:0] shape:(5, 5)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [6]:
# [G_aa]_{i,j} = cosdist(xa_i,xa_j) - LARGE_NUM*delta(xa_i,xa_j)
LARGE_NUM = 1e9
temperature = 1.0
logits_aa = tf.matmul(hidden1, hidden1_large, transpose_b=True) / temperature
logits_aa = tf.subtract(logits_aa,LARGE_NUM*masks,name='logits_aa')
print_tf_tensor(sess,logits_aa)

[logits_aa:0] shape:(5, 5)
[[-1.000e+09  9.779e-01  9.807e-01  9.873e-01  7.034e-01]
 [ 9.779e-01 -1.000e+09  9.999e-01  9.987e-01  8.364e-01]
 [ 9.807e-01  9.999e-01 -1.000e+09  9.993e-01  8.288e-01]
 [ 9.873e-01  9.987e-01  9.993e-01 -1.000e+09  8.075e-01]
 [ 7.034e-01  8.364e-01  8.288e-01  8.075e-01 -1.000e+09]]


In [7]:
# [G_bb]_{i,j} = cosdist(xb_i,xb_j) - LARGE_NUM*delta(xb_i,xb_j)
logits_bb = tf.matmul(hidden2, hidden2_large, transpose_b=True) / temperature
logits_bb = tf.subtract(logits_bb,LARGE_NUM*masks,name='logits_bb')
print_tf_tensor(sess,logits_bb)

[logits_bb:0] shape:(5, 5)
[[-1.000e+09  9.780e-01  9.958e-01  4.834e-01  1.000e+00]
 [ 9.780e-01 -1.000e+09  9.930e-01  6.552e-01  9.794e-01]
 [ 9.958e-01  9.930e-01 -1.000e+09  5.616e-01  9.964e-01]
 [ 4.834e-01  6.552e-01  5.616e-01 -1.000e+09  4.891e-01]
 [ 1.000e+00  9.794e-01  9.964e-01  4.891e-01 -1.000e+09]]


In [8]:
# [G_ab]_{i,j} = cosdist(xa_i,xb_j)
logits_ab = tf.matmul(hidden1, hidden2_large, transpose_b=True) / temperature
logits_ba = tf.matmul(hidden2, hidden1_large, transpose_b=True) / temperature
print_tf_tensor(sess,logits_ab)
print_tf_tensor(sess,logits_ba)

[truediv_2:0] shape:(5, 5)
[[0.926 0.985 0.957 0.777 0.929]
 [0.827 0.926 0.875 0.892 0.831]
 [0.835 0.931 0.882 0.885 0.838]
 [0.855 0.944 0.899 0.868 0.858]
 [0.384 0.568 0.467 0.994 0.39 ]]
[truediv_3:0] shape:(5, 5)
[[0.926 0.827 0.835 0.855 0.384]
 [0.985 0.926 0.931 0.944 0.568]
 [0.957 0.875 0.882 0.899 0.467]
 [0.777 0.892 0.885 0.868 0.994]
 [0.929 0.831 0.838 0.858 0.39 ]]


### $\text{Given } \{ x^a_i, x^b_i \}_{i=1}^N \text{ where }x^a_i \text{ and } x^b_i \text{ has a correspondence.}$

### $\quad \text{loss_a} = \sum_{i=1}^N 
\left(
\log \frac{\exp( sim(x^a_i,~x^b_i) )}
{\sum_{k=1}^K \exp( sim(x^a_i,~x^b_k)) ~+~ \sum_{k=1, k \neq i}^K \exp( sim(x^a_i,~x^a_k)) } 
\right) $

### $\quad \text{loss_b} = \sum_{i=1}^N 
\left(
\log \frac{\exp( sim(x^b_i,~x^a_i) )}
{\sum_{k=1}^K \exp( sim(x^b_i,~x^a_k)) ~+~ \sum_{k=1, k \neq i}^K \exp( sim(x^b_i,~x^b_k)) } 
\right) $

### $\quad \text{loss} = \text{loss_a} + \text{loss_b}$

In [9]:
print_tf_tensor(sess,labels)

[labels:0] shape:(5, 10)
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [10]:
# Define the nce loss function
weights = 1.0
loss_a = tf.losses.softmax_cross_entropy(
  labels, tf.concat([logits_ab, logits_aa], 1), weights=weights)
loss_b = tf.losses.softmax_cross_entropy(
  labels, tf.concat([logits_ba, logits_bb], 1), weights=weights)
loss = loss_a + loss_b
print (loss)

Tensor("add:0", shape=(), dtype=float32)


### Wrap it up with a function

In [13]:
-
print ("Done.")

Done.


### Usage 1 (random feature maps)

In [32]:
tf.reset_default_graph()
sess = gpu_sess()
n_batch,dim = 5,2
h1 = tf.cast(tf.Variable(np.random.rand(n_batch,dim)),tf.float32,name='h1')
h2 = tf.cast(tf.Variable(np.random.rand(n_batch,dim)),tf.float32,name='h2')
hidden_concat = tf.concat([h1,h2],axis=0)
nce_loss = get_nce_loss(hidden_concat)
sess.run(tf.global_variables_initializer())
print_tf_tensor(sess,h1)
print_tf_tensor(sess,h2)
print ("Loss is [%.4f]."%(sess.run(nce_loss)))

[h1:0] shape:(5, 2)
[[0.283 0.299]
 [0.783 0.863]
 [0.334 0.133]
 [0.878 0.516]
 [0.95  0.637]]
[h2:0] shape:(5, 2)
[[0.858 0.817]
 [0.811 0.107]
 [0.765 0.798]
 [0.764 0.56 ]
 [0.515 0.597]]
Loss is [4.4372].


### Usage 2 (similar feature maps)

In [41]:
tf.reset_default_graph()
sess = gpu_sess()
n_batch,dim = 5,2
bias = np.random.randn(n_batch,dim)
eps = 0.1
h1 = tf.cast(tf.Variable(bias+eps*np.random.randn(n_batch,dim)),tf.float32,name='h1')
h2 = tf.cast(tf.Variable(bias+eps*np.random.randn(n_batch,dim)),tf.float32,name='h2')
hidden_concat = tf.concat([h1,h2],axis=0)
nce_loss = get_nce_loss(hidden_concat)
sess.run(tf.global_variables_initializer())
print_tf_tensor(sess,h1)
print_tf_tensor(sess,h2)
print ("Loss is [%.4f]."%(sess.run(nce_loss)))

[h1:0] shape:(5, 2)
[[-0.678 -0.31 ]
 [-0.706  1.504]
 [ 0.645  0.913]
 [-1.351 -1.435]
 [ 0.041 -0.077]]
[h2:0] shape:(5, 2)
[[-0.816 -0.507]
 [-0.865  1.495]
 [ 0.625  0.879]
 [-1.352 -1.448]
 [ 0.281  0.041]]
Loss is [2.9659].
