In [14]:
import pandas as pd
import numpy as np

# Load the datasets

ratings = pd.read_csv(r"C:\Users\mohamed elsayed\Desktop\ml-latest-small\Movie Recommendation System app\dataset\ratings.csv")
movies = pd.read_csv(r"C:\Users\mohamed elsayed\Desktop\ml-latest-small\Movie Recommendation System app\dataset\movies.csv")


# Prepare the data
user_ids = ratings['userId'].unique().tolist()
movie_ids = ratings['movieId'].unique().tolist()

# Create mappings
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}

user_encoded2user = {i: x for x, i in user2user_encoded.items()}
movie_encoded2movie = {i: x for x, i in movie2movie_encoded.items()}

# Encode userId and movieId
ratings['user'] = ratings['userId'].map(user2user_encoded)
ratings['movie'] = ratings['movieId'].map(movie2movie_encoded)

# Process genres
all_genres = set()
for genres in movies['genres'].str.split('|'):
    all_genres.update(genres)
all_genres = sorted(all_genres)

# Create genre mappings
genre2genre_encoded = {x: i for i, x in enumerate(all_genres)}
genre_encoded2genre = {i: x for x, i in genre2genre_encoded.items()}

# Encode genres
def encode_genres(genres):
    encoded = np.zeros(len(all_genres))
    for genre in genres.split('|'):
        if genre in genre2genre_encoded:
            encoded[genre2genre_encoded[genre]] = 1
    return encoded

movies['genre_encoded'] = movies['genres'].apply(encode_genres)

# Merge ratings with movies to get genre information
ratings = ratings.merge(movies[['movieId', 'genre_encoded']], on='movieId', how='left')

num_users = len(user2user_encoded)
num_movies = len(movie2movie_encoded)
num_genres = len(all_genres)

# Sort ratings by user and timestamp
ratings = ratings.sort_values(['userId', 'timestamp'])

# Prepare training and test data
train_data = ratings.groupby('userId').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)
test_data = ratings.groupby('userId').apply(lambda x: x.iloc[-1]).reset_index(drop=True)

X_train = train_data[['user', 'movie']].values
genres_train = np.stack(train_data['genre_encoded'].values)
y_train = train_data['rating'].values

X_test = test_data[['user', 'movie']].values
genres_test = np.stack(test_data['genre_encoded'].values)
y_test = test_data['rating'].values


  train_data = ratings.groupby('userId').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)
  test_data = ratings.groupby('userId').apply(lambda x: x.iloc[-1]).reset_index(drop=True)


In [15]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout

# Model parameters
embedding_size = 50

# User and Movie input layers
user_input = Input(shape=(1,), name='user_input')
user_embedding = Embedding(num_users, embedding_size, name='user_embedding')(user_input)
user_vec = Flatten(name='user_flatten')(user_embedding)

movie_input = Input(shape=(1,), name='movie_input')
movie_embedding = Embedding(num_movies, embedding_size, name='movie_embedding')(movie_input)
movie_vec = Flatten(name='movie_flatten')(movie_embedding)

# Genre input layer
genre_input = Input(shape=(num_genres,), name='genre_input')

# Concatenate user, movie, and genre embeddings
concat = Concatenate()([user_vec, movie_vec, genre_input])

# Fully connected layers
fc1 = Dense(128, activation='relu')(concat)
dropout1 = Dropout(0.2)(fc1)
fc2 = Dense(64, activation='relu')(dropout1)
dropout2 = Dropout(0.2)(fc2)
fc3 = Dense(32, activation='relu')(dropout2)

# Output layer
output = Dense(1)(fc3)

# Build the model
model = Model([user_input, movie_input, genre_input], output)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Summary of the model
model.summary()


In [41]:
# Train the model
history = model.fit(
    [X_train[:, 0], X_train[:, 1], genres_train],
    y_train,
    epochs=20,
    batch_size=256
)


Epoch 1/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 3.6139
Epoch 2/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.8129
Epoch 3/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.7283
Epoch 4/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.6745
Epoch 5/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.6375
Epoch 6/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.6099
Epoch 7/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.5798
Epoch 8/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.5497
Epoch 9/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.5283
Epoch 10/20
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - lo

In [42]:
# Predict ratings for the test data
predicted_ratings = model.predict([X_test[:, 0], X_test[:, 1], genres_test])

# Calculate Mean Squared Error
mse = np.mean((predicted_ratings.flatten() - y_test) ** 2)
print(f'Test MSE: {mse}')


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Test MSE: 1.042216489100218


In [43]:
def predict_top_n(user_id, n=10):
    user_encoded = user2user_encoded[user_id]
    movie_ids = movies['movieId'].values
    
    # Filter out movie_ids that are not in movie2movie_encoded
    valid_movie_ids = [movie_id for movie_id in movie_ids if movie_id in movie2movie_encoded]
    valid_movie_encoded = [movie2movie_encoded[movie_id] for movie_id in valid_movie_ids]
    genre_encoded = np.stack(movies[movies['movieId'].isin(valid_movie_ids)]['genre_encoded'].values)
    
    user_array = np.array([user_encoded] * len(valid_movie_encoded))
    
    predictions = model.predict([user_array, np.array(valid_movie_encoded), genre_encoded])
    predictions = predictions.flatten()
    
    top_n_indices = predictions.argsort()[-n:][::-1]
    top_n_movie_ids = [movie_encoded2movie[valid_movie_encoded[i]] for i in top_n_indices]
    top_n_predictions = predictions[top_n_indices]
    
    return top_n_movie_ids, top_n_predictions

# Predict top 10 movies for user with ID 1
user_id = 2
top_n = 10

top_n_movie_ids, top_n_predictions = predict_top_n(user_id, top_n)

print(f'Top {top_n} recommendations for User {user_id}:')
for movie_id, rating in zip(top_n_movie_ids, top_n_predictions):
    movie_title = movies[movies['movieId'] == movie_id]['title'].values[0]
    print(f'Movie ID: {movie_id}, Title: {movie_title}, Predicted Rating: {rating}')


[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Top 10 recommendations for User 2:
Movie ID: 108078, Title: Chinese Puzzle (Casse-tête chinois) (2013), Predicted Rating: 5.443867206573486
Movie ID: 158882, Title: All Yours (2016), Predicted Rating: 5.32624626159668
Movie ID: 60737, Title: Watching the Detectives (2007), Predicted Rating: 5.318445205688477
Movie ID: 33649, Title: Saving Face (2004), Predicted Rating: 5.298862457275391
Movie ID: 143031, Title: Jump In! (2007), Predicted Rating: 5.254790306091309
Movie ID: 157775, Title: Tenchi Muyô! In Love (1996), Predicted Rating: 5.241457462310791
Movie ID: 134004, Title: What Love Is (2007), Predicted Rating: 5.233536720275879
Movie ID: 53355, Title: Sun Alley (Sonnenallee) (1999), Predicted Rating: 5.19766902923584
Movie ID: 8238, Title: Little Murders (1971), Predicted Rating: 5.092522621154785
Movie ID: 50999, Title: Ugly Duckling and Me!, The (2006), Predicted Rating: 5.091744899749756


In [44]:
# Save the trained model
model.save('neural_collaborative_filtering_model.h5')




In [47]:
from tensorflow.keras.models import load_model

# Load the model from the file
loaded_model = load_model('neural_collaborative_filtering_model.h5')
print("Model loaded successfully.")




Model loaded successfully.
