# Embedding layer

In [1]:
import numpy as np

### Data

In [44]:
'''Made up ratings of somewhat rigid watchers'''

# number of watchers
n_p = 1000
# number of unique movies
movies = 80
# types of people (in terms of what types of movies they like)
types = 10

# which types does each person belong to
people = np.random.randint(types,size=n_p)

def get_like(i):
    '''generate a probably to like movies array and then randomly select whether a specific watcher
        liked them or not'''
    # type of ith person
    t = people[i]
    draws = np.random.random(movies)
    # probability array. Each type likes on average 50% of the movies (with fixed criteria)
    # and doesn't like the others, with fairly high probabilities
    like = np.array([0.95 if abs(m%types-t)<3 else 0.05 for m in range(movies)])
    return (like > draws).astype(int)

ratings = np.zeros((len(people),movies))

# populate the ratings array
# for simplicity, all people have seen all movies
for i in range(len(ratings)):
    ratings[i] = get_like(i)

# add other features
age = np.maximum(np.random.normal(20+people*5,scale=10).astype(int),15)
age = (age-age.mean())/age.std()
education = np.zeros(len(ratings),dtype=int)
for i in range(len(ratings)):
    education[i] = np.random.randint(people[i]%2,people[i]%5+people[i]%2+1)

# data
X = np.hstack((age[:,None],education[:,None],ratings))

### Model

In [54]:
from keras.models import Model
from keras.layers import Input, Dense, concatenate

In [65]:
# size of word vector
embed = 4

embed_input = Input(shape=(movies,))
embed_layer = Dense(units=5,use_bias=False)(embed_input)
other_input = Input(shape=(2,))
merge = concatenate([other_input,embed_layer])

dense = Dense(32,activation='relu')(merge)
output = Dense(movies,activation='sigmoid')(dense)

model = Model(inputs=[embed_input,other_input],outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy')
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_21 (InputLayer)           (None, 80)           0                                            
__________________________________________________________________________________________________
input_22 (InputLayer)           (None, 2)            0                                            
__________________________________________________________________________________________________
dense_28 (Dense)                (None, 5)            400         input_21[0][0]                   
__________________________________________________________________________________________________
concatenate_12 (Concatenate)    (None, 7)            0           input_22[0][0]                   
                                                                 dense_28[0][0]                   
__________

### Training

In [67]:
model.fit(x=[X[:,2:],X[:,:2]],y=X[:,2:],epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd6bcc85f28>

### Embedding layer

Each row is the vector representation of the ith movie

In [76]:
np.round(model.layers[2].get_weights()[0],2)

array([[-0.12, -0.01, -0.11, -0.02, -0.25],
       [ 0.13, -0.02,  0.13,  0.24, -0.2 ],
       [-0.31, -0.13,  0.2 ,  0.18,  0.  ],
       [-0.24, -0.38,  0.06, -0.36,  0.14],
       [ 0.06,  0.06,  0.21, -0.2 ,  0.31],
       [ 0.34,  0.11,  0.03, -0.07,  0.18],
       [ 0.15,  0.25,  0.39, -0.03, -0.18],
       [ 0.23,  0.37,  0.09, -0.12, -0.13],
       [ 0.11,  0.39,  0.18,  0.03,  0.03],
       [ 0.11,  0.21, -0.11, -0.28, -0.31],
       [-0.33,  0.08, -0.12,  0.  , -0.  ],
       [-0.09, -0.21,  0.08,  0.  , -0.3 ],
       [-0.15, -0.19,  0.29,  0.09,  0.11],
       [-0.21, -0.1 ,  0.03, -0.21, -0.06],
       [-0.11,  0.11,  0.08,  0.03,  0.32],
       [ 0.28, -0.13,  0.37, -0.07,  0.23],
       [ 0.1 ,  0.3 ,  0.24, -0.31, -0.18],
       [ 0.01,  0.25, -0.09, -0.01, -0.02],
       [ 0.04, -0.09, -0.19,  0.17, -0.24],
       [-0.27,  0.33,  0.07, -0.25, -0.39],
       [-0.1 , -0.08,  0.24,  0.39, -0.12],
       [-0.26, -0.2 ,  0.1 ,  0.07, -0.34],
       [ 0.1 , -0.13, -0.08, -0.