In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, Dot, Flatten, Dense, Concatenate
from tensorflow.keras.models import Model

# Load the MovieLens dataset
ratings_data = pd.read_csv('ratings.csv')
movies_data = pd.read_csv('movies.csv')

# Merge ratings and movies data
data = pd.merge(ratings_data, movies_data, on='movieId')

# Check the data
print(data.head())

# Create a mapping of user and movie IDs to unique indices
user_ids = data['userId'].unique()
movie_ids = data['movieId'].unique()
user_id_map = {id: i for i, id in enumerate(user_ids)}
movie_id_map = {id: i for i, id in enumerate(movie_ids)}

# Create new columns with the mapped indices
data['user_idx'] = data['userId'].map(user_id_map)
data['movie_idx'] = data['movieId'].map(movie_id_map)

# Split data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Define the number of users and movies
num_users = len(user_ids)
num_movies = len(movie_ids)

# Define embedding dimension
embedding_dim = 50

# Define user input
user_input = Input(shape=(1,), name='user_input')

# User embedding layer
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, input_length=1)(user_input)
user_flatten = Flatten()(user_embedding)

# Define movie input
movie_input = Input(shape=(1,), name='movie_input')

# Movie embedding layer
movie_embedding = Embedding(input_dim=num_movies, output_dim=embedding_dim, input_length=1)(movie_input)
movie_flatten = Flatten()(movie_embedding)

# Concatenate user and movie embeddings
concat = Concatenate()([user_flatten, movie_flatten])

# Dense layers for neural network
dense1 = Dense(128, activation='relu')(concat)
dense2 = Dense(64, activation='relu')(dense1)
output = Dense(1)(dense2)

# Create and compile the model
model = Model(inputs=[user_input, movie_input], outputs=output)
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
history = model.fit([train_data['user_idx'], train_data['movie_idx']], train_data['rating'],
                    validation_data=([test_data['user_idx'], test_data['movie_idx']], test_data['rating']),
                    epochs=5, batch_size=64)

# Evaluate the model
loss = model.evaluate([test_data['user_idx'], test_data['movie_idx']], test_data['rating'])
print("Test Loss:", loss)

   userId  movieId  rating   timestamp             title  \
0       1        1     4.0   964982703  Toy Story (1995)   
1       5        1     4.0   847434962  Toy Story (1995)   
2       7        1     4.5  1106635946  Toy Story (1995)   
3      15        1     2.5  1510577970  Toy Story (1995)   
4      17        1     4.5  1305696483  Toy Story (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1  Adventure|Animation|Children|Comedy|Fantasy  
2  Adventure|Animation|Children|Comedy|Fantasy  
3  Adventure|Animation|Children|Comedy|Fantasy  
4  Adventure|Animation|Children|Comedy|Fantasy  
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.7839866876602173
