In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [15]:

# Load data
ratings_data = pd.read_csv('../datasets/movielens-1m/ml-1m/ratings.dat', sep='::', engine='python', names=['userId', 'movieId', 'rating', 'timestamp'])
movies_data = pd.read_csv('../datasets/movielens-1m/ml-1m/movies.dat', sep='::', engine='python', names=['movieId', 'title', 'genres'],encoding='latin-1')

# Merge data
data = pd.merge(ratings_data, movies_data, on='movieId')

In [16]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [17]:
movies_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [18]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama
1,2,1193,5,978298413,One Flew Over the Cuckoo's Nest (1975),Drama
2,12,1193,4,978220179,One Flew Over the Cuckoo's Nest (1975),Drama
3,15,1193,4,978199279,One Flew Over the Cuckoo's Nest (1975),Drama
4,17,1193,5,978158471,One Flew Over the Cuckoo's Nest (1975),Drama


In [19]:


# Preprocessing
user_ids = data['userId'].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
movie_ids = data['movieId'].unique().tolist()
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
data['user'] = data['userId'].map(user2user_encoded)
data['movie'] = data['movieId'].map(movie2movie_encoded)

num_users = len(user2user_encoded)
num_movies = len(movie_encoded2movie)

data = data[['user', 'movie', 'rating']]

# Split data into train and test sets

In [20]:
train, test = train_test_split(data, test_size=0.2)
train.head()

Unnamed: 0,user,movie,rating
283994,4468,316,4
872260,1922,1812,1
260264,1231,273,3
818361,711,1537,4
457353,3740,615,4


In [21]:
test.head()

Unnamed: 0,user,movie,rating
262589,2089,276,4
736325,2102,1248,4
763168,2182,1321,4
568130,152,814,5
23076,2792,21,5


In [22]:


# Create neural network model
embedding_size = 50
input_user = tf.keras.layers.Input(shape=(1,))
input_movie = tf.keras.layers.Input(shape=(1,))

embedding_user = tf.keras.layers.Embedding(num_users, embedding_size)(input_user)
embedding_movie = tf.keras.layers.Embedding(num_movies, embedding_size)(input_movie)

flatten_user = tf.keras.layers.Flatten()(embedding_user)
flatten_movie = tf.keras.layers.Flatten()(embedding_movie)

concat = tf.keras.layers.Concatenate()([flatten_user, flatten_movie])

dense1 = tf.keras.layers.Dense(128, activation='relu')(concat)
dense2 = tf.keras.layers.Dense(64, activation='relu')(dense1)
dense3 = tf.keras.layers.Dense(32, activation='relu')(dense2)

output = tf.keras.layers.Dense(1)(dense3)
model = tf.keras.Model(inputs=[input_user, input_movie], outputs=output)

In [23]:
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit([train['user'], train['movie']], train['rating'], epochs=1)

# Evaluate the model
loss = model.evaluate([test['user'], test['movie']], test['rating'])
print("Mean Squared Error:", loss)

Mean Squared Error: 0.8082492351531982


In [24]:
#predict the rating for a user and a movie

user_id = data.user.sample(1).iloc[0]
movie_id = data.movie.sample(1).iloc[0]
print(user_id, movie_id)

1738 2837


In [25]:
#find movieid of starwars 
movie_id = movies_data[movies_data['title'] == 'Titanic (1997)']['movieId'].values[0]
print(movie_id)

1721


In [26]:
userid = 42
movie_id = 1721

#predict the rating
preds = model.predict([np.array([user_id]), np.array([movie_id])])
print(preds)

[[3.228561]]
