In [137]:
import pickle
import datetime
import json
import requests
import itertools
import tensorflow as tf
import pandas as pd
import numpy as np 
from sklearn import preprocessing

from elasticsearch import Elasticsearch

In [3]:
df = pd.read_csv('movies.csv')
df['title'] = df['title'].map(lambda x: x.strip())
df['genres'] = df['genres'].map(lambda x: x.split('|'))
# df['year'] = df['title'].map(lambda x: x[-6:][1:-1])
df = df.astype({'title': 'str'})

df_movies = df

df = pd.read_csv('ratings.csv')
df['datetime'] = df['timestamp'].map(lambda x: datetime.datetime.fromtimestamp(x))

df_ratings = df

In [95]:
userIds = df_ratings['userId'].unique()
movieIds = df_ratings['movieId'].unique()

{'(no genres listed)',
 'Action',
 'Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'IMAX',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western'}

In [127]:
user_id_enc = preprocessing.LabelEncoder()
user_id_enc.fit(df_ratings['userId'].values)

movie_id_enc = preprocessing.LabelEncoder()
movie_id_enc.fit(df_ratings['movieId'].values)

LabelEncoder()

In [129]:
len(user_id_enc.classes_), len(movie_id_enc.classes_)

(671, 9066)

In [130]:
user_ids = user_id_enc.transform(df_ratings['userId'].values)
movie_ids = movie_id_enc.transform(df_ratings['movieId'].values)

In [131]:
# model creation
input_layers = {
    colname: tf.keras.layers.Input(name=colname, shape=(1,), dtype=tf.int32)
    for colname in ['user_id', 'movie_id']
}


user_id_input = input_layers['user_id']
movie_id_input = input_layers['movie_id']

user_emb = tf.keras.layers.Embedding(len(user_id_enc.classes_), 64, name='user_emb')(user_id_input)
movie_emb = tf.keras.layers.Embedding(len(movie_id_enc.classes_), 64, name='movie_emb')(movie_id_input)
features = tf.keras.layers.Concatenate()([user_emb, movie_emb])

hidden = tf.keras.layers.Dense(64, activation='relu')(features)
model_out = tf.keras.layers.Dense(1, activation='relu')(hidden)
model = tf.keras.Model(input_layers, model_out)

In [133]:
model.compile(loss='mae', optimizer='adam')
#model.summary()

In [134]:
# X = [{'user_id': np.asarray(x), 'movie_id': np.asarray(y), 'genre': z} for x, y, z in zip(user_ids, movie_ids, genres)]
X = {'user_id': user_ids, 'movie_id': movie_ids}
Y = df_ratings['rating'].values

len(X['user_id']), len(X['movie_id']), len(Y)

(100004, 100004, 100004)

In [135]:
history = model.fit(X, Y, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [140]:
pickle.dump(user_id_enc, open('user_id_enc.pkl', 'wb'))
pickle.dump(movie_id_enc, open('movie_id_enc.pkl', 'wb'))
model.save('neural_model')

INFO:tensorflow:Assets written to: neural_model\assets
