In [1]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity

# Load the ratings and movies data
movies = pd.read_csv(r'E:\Python\Movie Recomendation System\ml-25m\ml-25m\movies.csv')
ratings = pd.read_csv(r'E:\Python\Movie Recomendation System\ml-25m\ml-25m\ratings.csv')

# Create a sparse matrix from the ratings dataframe
# Rows represent users, columns represent movies, values are the ratings
user_movie_sparse = csr_matrix((ratings['rating'], (ratings['userId'], ratings['movieId'])))

# Check the shape of the sparse matrix (will not load entire matrix into memory)
print(user_movie_sparse.shape)  # Shape: (num_users, num_movies)

# Apply NNMF on the sparse matrix
nmf_model = NMF(n_components=20, init='random', random_state=42)

# Fit the NNMF model
W = nmf_model.fit_transform(user_movie_sparse)  # User feature matrix (num_users x 20)
H = nmf_model.components_  # Movie feature matrix (20 x num_movies)

print(W.shape)  # Shape: (num_users, 20)
print(H.shape)  # Shape: (20, num_movies)


(162542, 209172)




(162542, 20)
(20, 209172)


In [3]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity

# Load the ratings and movies data
movies = pd.read_csv(r'E:\Python\Movie Recomendation System\ml-25m\ml-25m\movies.csv')
ratings = pd.read_csv(r'E:\Python\Movie Recomendation System\ml-25m\ml-25m\ratings.csv')

# Create a sparse matrix from the ratings dataframe
user_movie_sparse = csr_matrix((ratings['rating'], (ratings['userId'], ratings['movieId'])))

# Apply NNMF on the sparse matrix
nmf_model = NMF(n_components=20, init='random', random_state=42, max_iter=500)
W = nmf_model.fit_transform(user_movie_sparse)
H = nmf_model.components_

# Create a mapping of movieId to the index in the matrix
movie_id_to_index = {movieId: index for index, movieId in enumerate(movies['movieId'])}

# Function to search for similar movies using the NNMF movie features matrix
def search_similar_movies(movie_title, num_recommendations=5):
    # Look for the movie by title (ensure case insensitive search)
    matching_movies = movies[movies['title'].str.contains(movie_title, case=False, na=False)]
    
    if matching_movies.empty:
        return f"Movie '{movie_title}' not found."
    
    movie_id = matching_movies.iloc[0]['movieId']
    
    # Ensure movieId is valid and map to correct index in H
    if movie_id not in movie_id_to_index:
        return f"Movie ID {movie_id} is out-of-bounds."
    
    movie_idx = movie_id_to_index[movie_id]
    movie_vector = H[:, movie_idx]

    # Compute cosine similarity with all other movies
    similarity = cosine_similarity(movie_vector.reshape(1, -1), H.T).flatten()
    
    # Get top recommendations
    indices = np.argpartition(similarity, -num_recommendations)[-num_recommendations:]
    recommended_movies = movies.iloc[indices].iloc[::-1]  # Sort by similarity score in descending order

    return recommended_movies[['movieId', 'title', 'genres']]

# Example: Search similar movies to "Toy Story"
recommendations = search_similar_movies('Toy Story')
print(recommendations)




Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\Rono\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pandas\core\indexing.py", line 1714, in _get_list_axis
    return self.obj._take_with_is_copy(key, axis=axis)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Rono\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pandas\core\generic.py", line 4153, in _take_with_is_copy
    result = self.take(indices=indices, axis=axis)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Rono\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pandas\core\generic.py", line 4133, in take
    new_data = self._mgr.take(
               ^^^^^^^^^^^^^^^
  File "C:\Users\Rono\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\

In [None]:
# Function to recommend movies for a specific user based on their latent factors
def recommend_movies_for_user(user_id, num_recommendations=5):
    # Ensure valid user_id (since the indices are 0-based)
    user_idx = user_id - 1
    
    # Get the user's latent feature vector
    user_vector = W[user_idx]
    
    # Compute similarity between the user and all movies (dot product of user vector with movie matrix)
    movie_scores = np.dot(H.T, user_vector)
    
    # Get movies the user has already rated
    already_rated = ratings[ratings['userId'] == user_id]['movieId'].values
    
    # Sort the scores and get top recommendations for the user
    recommendations = np.argsort(movie_scores)[::-1]  # Sort in descending order of scores
    
    # Filter out movies the user has already rated
    filtered_recommendations = [movie for movie in recommendations if movie not in already_rated]
    
    # Take the top N recommendations
    top_recommendations = filtered_recommendations[:num_recommendations]
    
    # Get the movie details for the recommendations
    recommended_movies = movies[movies['movieId'].isin(top_recommendations)]
    
    return recommended_movies[['movieId', 'title', 'genres']]

# Example: Recommend 5 movies for user 1
user_recommendations = recommend_movies_for_user(1, num_recommendations=5)
print(user_recommendations)
