STEP 1 ‚Äî Import Libraries

In [20]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

STEP 2 ‚Äî Load CSV Files

In [21]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

print("Movies shape:", movies.shape)
print("Ratings shape:", ratings.shape)

print("\nMovies preview:")
print(movies.head())

print("\nRatings preview:")
print(ratings.head())


Movies shape: (9742, 3)
Ratings shape: (100836, 4)

Movies preview:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  

Ratings preview:
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


STEP 3 ‚Äî Create User‚ÄìMovie Ratings Matrix

In [22]:
# Create a matrix: rows = movieId, columns = userId, values = rating
ratings_matrix = ratings.pivot_table(
    index='movieId',
    columns='userId',
    values='rating'
).fillna(0)

print("Ratings matrix shape:", ratings_matrix.shape)
ratings_matrix.head()


Ratings matrix shape: (9724, 610)


userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


STEP 4 ‚Äî Filter Sparse Movies & Users (Better Quality)

In [23]:
# Keep movies that have more than 10 ratings
movie_counts = ratings.groupby('movieId')['rating'].count()
ratings_matrix = ratings_matrix.loc[movie_counts[movie_counts > 10].index]

# Keep users who rated more than 50 movies
user_counts = ratings.groupby('userId')['rating'].count()
ratings_matrix = ratings_matrix.loc[:, user_counts[user_counts > 50].index]

print("Filtered ratings matrix shape:", ratings_matrix.shape)


Filtered ratings matrix shape: (2121, 378)


STEP 5 ‚Äî Convert to Sparse Matrix (for KNN)

In [24]:
sparse_ratings = csr_matrix(ratings_matrix.values)

# Reset index so we keep movieId as a normal column
ratings_matrix = ratings_matrix.reset_index()
ratings_matrix.head()


userId,movieId,1,4,6,7,10,11,15,16,17,...,600,601,602,603,604,605,606,607,608,610
0,1,4.0,0.0,0.0,4.5,0.0,0.0,2.5,0.0,4.5,...,2.5,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,5.0
1,2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0
2,3,4.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
3,5,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.5,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
4,6,4.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0,0.0,...,0.0,0.0,3.0,4.0,3.0,0.0,0.0,0.0,0.0,5.0


STEP 6 ‚Äî Train KNN Model

In [25]:
knn_model = NearestNeighbors(
    metric='cosine',
    algorithm='brute',
    n_neighbors=20
)

knn_model.fit(sparse_ratings)
print("‚úÖ KNN model trained!")


‚úÖ KNN model trained!


STEP 7 ‚Äî Recommendation Function

In [26]:
def recommend_movies(movie_name, n_recommendations=5):
    """
    Given a movie name (partial is okay),
    return a list of similar movie titles.
    """
    print(f"\nSearching for: {movie_name}")

    # Find movies whose title contains the search text
    matches = movies[movies['title'].str.contains(movie_name, case=False, na=False)]

    if matches.empty:
        print("‚ùå Movie not found in movies.csv")
        return ["Movie not found in dataset"]

    # Just take the first match
    movie_id = matches.iloc[0]['movieId']
    movie_title = matches.iloc[0]['title']
    print(f"‚úÖ Found: {movie_title} (movieId = {movie_id})")

    # Check if this movie exists in the filtered ratings_matrix
    movie_row = ratings_matrix[ratings_matrix['movieId'] == movie_id]

    if movie_row.empty:
        print("‚ùå This movie does not have enough rating data for recommendations.")
        return ["Not enough rating data for this movie"]

    movie_index = movie_row.index[0]

    # Get nearest neighbors
    distances, indices = knn_model.kneighbors(
        sparse_ratings[movie_index],
        n_neighbors=n_recommendations + 1  # +1 because first is the movie itself
    )

    recommendations = []
    print("\nüé• Recommended movies:")
    for idx in indices.flatten()[1:]:  # skip the first (input movie)
        rec_movie_id = ratings_matrix.iloc[idx]['movieId']
        rec_title = movies[movies['movieId'] == rec_movie_id]['title'].values[0]
        recommendations.append(rec_title)
        print("‚Ä¢", rec_title)

    return recommendations


STEP 8 ‚Äî Test Recommender

In [19]:
recommendations = recommend_movies("Up")
print("\nReturned list:", recommendations)



Searching for: Up
‚úÖ Found: Indian in the Cupboard, The (1995) (movieId = 60)

üé• Recommended movies:
‚Ä¢ Casper (1995)
‚Ä¢ Santa Clause, The (1994)
‚Ä¢ Mighty Morphin Power Rangers: The Movie (1995)
‚Ä¢ Next Karate Kid, The (1994)
‚Ä¢ Richie Rich (1994)

Returned list: ['Casper (1995)', 'Santa Clause, The (1994)', 'Mighty Morphin Power Rangers: The Movie (1995)', 'Next Karate Kid, The (1994)', 'Richie Rich (1994)']
