# Recommender- Yeah Money Money Money

### Requirements
```
python 3.6

numpy == '1.14.3'
pandas == '0.23.0'
tensorflow == '1.8.0'
keras == '2.2.0'
```
Other versions of above library will probably work

Prepare the csv data, from [movielens](https://grouplens.org/datasets/movielens/)

In [1]:
# %cd /data
# !!wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
# !!unzip ml-latest-small.zip

In [4]:
import pandas as pd
import numpy as np
import os

In [3]:
DATA = "/data/ml-latest-small/"
DIM = 100

In [5]:
files = os.listdir(DATA)
files

['links.csv', 'tags.csv', 'ratings.csv', 'README.txt', 'movies.csv']

In [6]:
data = dict()
for f in files:
    if f[-3:]=="csv":
        data[f.split(".")[0]] = pd.read_csv(DATA+f)

## Check Sample Data

In [7]:
from IPython.display import display
list(display(k,v.sample(5)) for k,v in data.items())

'links'

Unnamed: 0,movieId,imdbId,tmdbId
5528,8763,63389,64877.0
6604,50162,344854,9992.0
6895,58107,1023481,8328.0
4090,5357,110157,24767.0
7239,68967,836700,8282.0


'tags'

Unnamed: 0,userId,movieId,tag,timestamp
572,423,6713,Satoshi Kon,1354033681
1043,547,55276,toplist07,1195959915
1153,547,96728,toplist12,1355599008
1013,547,48394,holes00s,1342850387
1262,547,160954,bkk,1472178574


'ratings'

Unnamed: 0,userId,movieId,rating,timestamp
42649,306,1216,3.0,956081725
7262,44,135,4.0,858707310
74797,519,1240,4.5,1468758883
37768,271,2028,4.0,1107785352
9399,62,111759,4.0,1451708157


'movies'

Unnamed: 0,movieId,title,genres
7789,86320,Melancholia (2011),Drama|Sci-Fi
5742,26151,Au Hasard Balthazar (1966),Crime|Drama
7721,83613,Cowboys & Aliens (2011),Action|Sci-Fi|Thriller|Western|IMAX
7027,61236,Waltz with Bashir (Vals im Bashir) (2008),Animation|Documentary|Drama|War
7135,65193,Wild Child (2008),Drama|Romance


[None, None, None, None]

## Model On Rating

In [12]:
len(data["ratings"])

100004

In [13]:
userId = list(set(data["ratings"]["userId"]))
movieId = list(set(data["ratings"]["movieId"]))
print(len(userId),len(movieId))

671 9066


### Mapping
user to index, movie to index, index to user, index to movie

In [20]:
u2i = dict((v,k) for k,v in enumerate(userId))
m2i = dict((v,k) for k,v in enumerate(movieId))
i2u = dict((k,v) for k,v in enumerate(userId))
i2m = dict((k,v) for k,v in enumerate(movieId))

In [21]:
data["ratings"]["movie_idx"] = data["ratings"]["movieId"].apply(lambda x:m2i[x])
data["ratings"]["user_idx"] = data["ratings"]["userId"].apply(lambda x:u2i[x])

In [48]:
user_array = data["ratings"]["user_idx"].values.reshape(-1,1)
movie_array = data["ratings"]["movie_idx"].values.reshape(-1,1)

In [49]:
rating_array = data["ratings"]["rating"].values.reshape(-1,1)/5

In [50]:
user_array.shape,movie_array.shape,rating_array.shape

((100004, 1), (100004, 1), (100004, 1))

## Model

In [None]:
from keras.layers import *
from keras.models import *

In [57]:
def idx2vec(leng,name,dim = DIM):
    ipt = Input((1,),name=name+"_ipt")
    ebd = Embedding(leng,dim)(ipt)
    ebd = SpatialDropout1D(.3)(ebd)
    return ipt,ebd

In [58]:
ipt_u, ebd_u = idx2vec(len(userId),"user")
ipt_m, ebd_m = idx2vec(len(movieId),"movie")

In [59]:
user2vec = Model(ipt_u,ebd_u)
movie2vec = Model(ipt_m,ebd_m)

### What is Embeding Layer

In [60]:
user2vec.predict(np.array([[0],[2]]))

array([[[ 1.91395916e-02, -4.51731198e-02,  4.37530987e-02,
         -1.63733847e-02,  6.41286373e-03,  4.66329344e-02,
         -2.11565029e-02, -3.47738862e-02,  2.13194229e-02,
          3.23534012e-03,  4.95511293e-03, -1.18402839e-02,
         -2.76263598e-02,  1.77213289e-02, -1.52823552e-02,
          2.01885365e-02,  3.57834809e-02, -1.48696899e-02,
          1.56355761e-02,  1.01965554e-02, -8.46663862e-03,
          3.66757549e-02,  3.48094441e-02, -2.57774722e-02,
          3.20844986e-02, -4.09193523e-02, -1.54222474e-02,
          2.47629397e-02,  4.87549193e-02, -4.60501797e-02,
         -8.87737423e-03, -3.97561304e-02, -1.64012089e-02,
         -4.16589901e-03,  4.20560725e-02, -4.36268337e-02,
          3.48217748e-02,  3.49102505e-02,  2.14034356e-02,
         -1.34521946e-02, -2.86731608e-02,  3.45314257e-02,
          4.00956757e-02, -2.78212875e-03, -3.91735211e-02,
         -2.87387371e-02, -3.99985909e-02,  3.78483795e-02,
         -2.96304822e-02, -1.71114132e-0

In [61]:
x = Multiply()([ebd_u,ebd_m])
x = Flatten()(x)
x = Dense(1,)(x)

cf = Model([ipt_u,ipt_m],x)

In [62]:
cf.compile("Adam",loss="mse",)

In [63]:
cf.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_ipt (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
movie_ipt (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 1, 100)       67100       user_ipt[0][0]                   
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 1, 100)       906600      movie_ipt[0][0]                  
__________________________________________________________________________________________________
spatial_dr

In [64]:
cf.fit([user_array,movie_array],rating_array,epochs=5,shuffle=True,validation_split=.3,batch_size=512)

Train on 70002 samples, validate on 30002 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb1fee14e0>

### Deep Collaborative Filtering

In [70]:
ipt_u, ebd_u = idx2vec(len(userId),"user")
ipt_m, ebd_m = idx2vec(len(movieId),"movie")

x = Multiply()([ebd_u,ebd_m])
x = Flatten()(x)

# The rest is multi-layer perceptron
x = Dense(512,)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dense(256,)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dense(1,)(x)

In [71]:
cf_dnn = Model([ipt_u,ipt_m],x)

In [72]:
cf_dnn.compile("Adam",loss="mse",)

In [73]:
cf_dnn.fit([user_array,movie_array],rating_array,epochs=5,shuffle=True,validation_split=.3,batch_size=512)

Train on 70002 samples, validate on 30002 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb21f2b8d0>