In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dot, Dense
from keras.optimizers import Adam
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Load the datasets
movies = pd.read_csv(r'/content/drive/MyDrive/Datasets/movie.csv')
ratings = pd.read_csv(r'/content/drive/MyDrive/Datasets/rating.csv')

#print(movies.head(5))
#print(ratings.head(5))

In [3]:
# Merge the datasets
data = pd.merge(ratings, movies, on='movieId')

In [4]:
# Check if movieId and userId are within the expected range
max_movie_id = len(data['movieId'].unique())
max_user_id = len(data['userId'].unique())

# Filter out invalid movieId and userId
data = data[(data['movieId'] <= max_movie_id) & (data['userId'] <= max_user_id)]

# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.1, random_state=42)

In [5]:
# Create user and movie input layers
user_input = Input(shape=(1,), name='user_input')
movie_input = Input(shape=(1,), name='movie_input')

# Embedding layers for users and movies
user_embedding = Embedding(input_dim=max_user_id + 1, output_dim=50, input_length=1, name='user_embedding')(user_input)
movie_embedding = Embedding(input_dim=max_movie_id + 1, output_dim=50, input_length=1, name='movie_embedding')(movie_input)

# Flatten the embedding layers
user_flat = Flatten()(user_embedding)
movie_flat = Flatten()(movie_embedding)

# Dot product of user and movie embeddings
dot_product = Dot(axes=1)([user_flat, movie_flat])

# Combine the model
model = Model(inputs=[user_input, movie_input], outputs=dot_product)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the model on the training set
model.fit([train_data['userId'], train_data['movieId']], train_data['rating'], epochs=1, validation_data=([test_data['userId'], test_data['movieId']], test_data['rating']))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x78ed7a48eef0>

In [9]:
# Evaluate the model accuracy on the testing set
loss = model.evaluate([test_data['userId'], test_data['movieId']], test_data['rating'])
print(f'Model Test Loss: {loss}')

NameError: ignored

In [10]:
# User input for movie genre
movie_genre = input("Enter the movie genre: ")

# Find movies of the entered genre
genre_movies = movies[movies['genres'].str.lower().str.contains(movie_genre.lower())]

if genre_movies.empty:
    print(f"No movies found in the dataset for the genre '{movie_genre}'.")
else:
    # Get movieId for movies of the entered genre
    genre_movie_ids = genre_movies['movieId'].values

    # Filter out invalid movieIds
    valid_movie_ids = genre_movie_ids[genre_movie_ids <= max_movie_id]

    if len(valid_movie_ids) == 0:
        print(f"No valid movies found in the dataset for the genre '{movie_genre}'.")
    else:
        # Predict the ratings for movies of the entered genre
        user_id = 1  # You can change this to any user ID
        user_array = np.full_like(valid_movie_ids, user_id)

        # Predict ratings for valid movies
        predicted_ratings = model.predict([user_array, valid_movie_ids])

        # Combine movieId, predicted ratings, and genres
        recommended_movies_data = pd.DataFrame({
            'movieId': valid_movie_ids,
            'predicted_rating': predicted_ratings[:, 0]
        })

        # Merge with genre_movies to get full movie details
        recommended_movies = pd.merge(recommended_movies_data, genre_movies, on='movieId')

        # Sort recommended movies by predicted rating in descending order
        recommended_movies = recommended_movies.sort_values(by='predicted_rating', ascending=False)

        if recommended_movies.empty:
            print(f"No recommended movies found for the genre '{movie_genre}' with a predicted rating of 3 or above.")
        else:
            print(f"Recommended movies for the genre '{movie_genre}' with a predicted rating of 3 or above:")
            print(recommended_movies[['title']].head(5))


KeyboardInterrupt: ignored

In [5]:
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

In [None]:
# Support Vector Machine (SVM)
model_svm = SVR()
model_svm.fit(train_data[['userId', 'movieId']], train_data['rating'])

# Evaluate SVM
predicted_ratings_svm = model_svm.predict(test_data[['userId', 'movieId']])
mse_svm = mean_squared_error(test_data['rating'], predicted_ratings_svm)
print(f'SVM Test MSE: {mse_svm}')

In [6]:
# Decision Tree
model_dt = DecisionTreeRegressor()
model_dt.fit(train_data[['userId', 'movieId']], train_data['rating'])

# Evaluate Decision Tree
predicted_ratings_dt = model_dt.predict(test_data[['userId', 'movieId']])
mse_dt = mean_squared_error(test_data['rating'], predicted_ratings_dt)
print(f'Decision Tree Test MSE: {mse_dt}')

Decision Tree Test MSE: 1.7573478069374469


In [None]:
# Plotting the histogram
models = ['Matrix Factorization - Collaborative Filtering', 'SVM', 'Decision Trees']
accuracies = [loss, mse_svm, mse_dt]

In [None]:
import matplotlib.pyplot as plt
plt.bar(models, accuracies, color=['blue', 'orange', 'green'])
plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.title('Comparison of Recommendation Models')
plt.ylim(0, 1)
plt.show()