In [None]:
!pip install datasets==1.9.0

Collecting datasets==1.9.0
  Downloading datasets-1.9.0-py3-none-any.whl (262 kB)
[K     |████████████████████████████████| 262 kB 33.7 MB/s 
[?25hCollecting huggingface-hub<0.1.0
  Downloading huggingface_hub-0.0.14-py3-none-any.whl (43 kB)
[K     |████████████████████████████████| 43 kB 709 kB/s 
Collecting fsspec>=2021.05.0
  Downloading fsspec-2021.7.0-py3-none-any.whl (118 kB)
[K     |████████████████████████████████| 118 kB 43.9 MB/s 
Collecting xxhash
  Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)
[K     |████████████████████████████████| 243 kB 62.3 MB/s 
Installing collected packages: xxhash, huggingface-hub, fsspec, datasets
Successfully installed datasets-1.9.0 fsspec-2021.7.0 huggingface-hub-0.0.14 xxhash-2.0.2


##### This is the Keras implementation from the researches themselves

In [9]:
from tensorflow import keras
from keras import backend as K
from tensorflow.keras.layers import Layer
from tensorflow.keras import activations
from keras import initializers

In [261]:
class AttentionMLP(Layer):
    """
    Genre Aware Attention Model

    """
    def __init__(self,
                 units, # what does units mean
                 activation=None,
                 use_bias=True,
                 kernel_initializer='ones',
                 bias_initializer='ones',
                 v_initializer='ones',
                 Wg_initializer='ones',
                #  kernel_initializer='glorot_uniform',
                #  bias_initializer='ones',
                #  v_initializer='glorot_uniform',
                #  Wg_initializer='glorot_uniform',
                 **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        self.units = units
        self.activation = activations.get(activation) # "selu"
        self.use_bias = use_bias
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.v_initializer = initializers.get(v_initializer)
        self.Wg_initializer = initializers.get(Wg_initializer)
        self.supports_masking = True
        super(AttentionMLP, self).__init__(**kwargs)

    def build(self, input_shape):
        assert type(input_shape) is list and len(input_shape) == 2
        # W: (EMBED_SIZE, units)
        # Wg:(GENRE_EMB_SIZE, units)
        # b: (units,)
        # v: (units,)
        print("input_shape",input_shape)
        print("oy",input_shape[1][-1])

        self.W = self.add_weight(name="W_{:s}".format(self.name),
                                 shape=(input_shape[0][-1], self.units),
                                 initializer=self.kernel_initializer,
                                 trainable=True)

        self.Wg = self.add_weight(name="W_g{:s}".format(self.name),
                                  shape=(input_shape[1][-1], self.units),
                                  initializer=self.Wg_initializer,
                                  trainable=True)

        self.b = self.add_weight(name="b_{:s}".format(self.name), # b_a in the paper
                                 shape=(self.units,),
                                 initializer=self.bias_initializer,
                                 trainable=True)

        self.v = self.add_weight(name="v_{:s}".format(self.name),
                                 shape=(self.units,1),
                                 initializer=self.v_initializer,
                                 trainable=True)

        super(AttentionMLP, self).build(input_shape)

    def call(self, xs, mask=None):
        # input: [x, u]
        # x: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
        # g: (BATCH_SIZE, 1,GENRE_EMB_SIZE)

        x, g = xs
        g=K.squeeze(g,axis=1)
        atten_g = K.expand_dims(K.dot(g, self.Wg), axis=1)
        et = self.activation(K.dot(x, self.W) + atten_g + self.b)
        # print("Before dot et:", et.shape.eval())
        et =  K.dot(et, self.v)
        at = K.softmax(K.squeeze(et, axis=-1))
        if mask is not None and mask[0] is not None:
            at *= K.cast(mask, K.floatx())
        # ot: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
        atx = K.expand_dims(at, axis=-1)
        ot = atx * x
        # output: (BATCH_SIZE, EMBED_SIZE)
        # print(ot.eval())
        return K.sum(ot, axis=1)

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def compute_output_shape(self, input_shape):
        # output shape: (BATCH_SIZE, EMBED_SIZE)
        return (input_shape[0][0], input_shape[0][-1])

    def get_config(self):
        return super(AttentionMLP, self).get_config()

In [262]:
import tensorflow as tf

In [263]:
amlp = AttentionMLP(5)

In [268]:
x = tf.convert_to_tensor(
    [[[1,2,3,4,5],
         [6,7,8,9,10]],
        [[11,12,13,14,15],
         [16,17,18,19,20]]], dtype=tf.float32
)

g = tf.convert_to_tensor(
    [[1,2,3,4,5,6,7,8],
     [9,10,11,12,13,14,15,16]], dtype=tf.float32
)
g = tf.expand_dims(g,1)

In [269]:
# x=tf.random.uniform(
#     [4,2,5], minval=0, maxval=None, dtype=tf.dtypes.float32, seed=None, name=None
# )
# g=tf.random.uniform(
#     [4,1,8], minval=0, maxval=None, dtype=tf.dtypes.float32, seed=None, name=None
# )

In [270]:
amlp([x,g])

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[ 6.,  7.,  8.,  9., 10.],
       [16., 17., 18., 19., 20.]], dtype=float32)>

In [233]:
x.shape

TensorShape([16, 2, 100])

In [234]:
g.shape

TensorShape([16, 1, 8])

In [235]:
xs = [x,g]

In [236]:
x, g = xs 
g=K.squeeze(g, axis=1)
atten_g = K.expand_dims(K.dot(g, amlp.Wg), axis=1) # dot product between genre vector and genre Weights

# computes score(x_i, g) and NOT h_i as said in the paper
et = amlp.activation(K.dot(x, amlp.W) + atten_g + amlp.b) # this is h (all concatenated together)
et = K.dot(et, amlp.v)

In [198]:
at = K.softmax(K.squeeze(et, axis=-1))

In [None]:
at

In [None]:
et

In [None]:
K.squeeze(at,axis=-1)

In [164]:
et.shape

TensorShape([16, 2, 1])

In [157]:
print("Before dot et:", et.shape)

Before dot et: (16, 2, 1)


In [128]:
et.shape

TensorShape([16, 2, 100])

In [129]:
amlp.v.shape

TensorShape([100])

In [147]:
a=tf.random.uniform(
    [2,100], minval=0, maxval=None, dtype=tf.dtypes.float32, seed=None, name=None
)
b=tf.random.uniform(
    [100,1], minval=0, maxval=None, dtype=tf.dtypes.float32, seed=None, name=None
)

In [148]:
K.dot(a,b)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[22.931757],
       [22.138748]], dtype=float32)>