In [13]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors

ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movies.csv")


In [14]:
data = ratings.merge(movies, on="movieId")

def build_model(min_movie_ratings=0, n_neighbors=30):
    if min_movie_ratings > 0:
        counts = data["title"].value_counts()
        keep_titles = counts[counts >= min_movie_ratings].index
        df = data[data["title"].isin(keep_titles)].copy()
    else:
        df = data.copy()

    movie_user = df.pivot_table(index="title", columns="userId", values="rating").fillna(0)
    model = NearestNeighbors(metric="cosine", algorithm="brute", n_neighbors=n_neighbors)
    model.fit(movie_user.values)
    return movie_user, model


In [15]:
def recommend(movie_title, movie_user, model, k=10):
    if movie_title not in movie_user.index:
        matches = movie_user.index[movie_user.index.str.contains(movie_title, case=False, regex=False)]
        if len(matches) == 0:
            return []
        movie_title = matches[0]

    idx = movie_user.index.get_loc(movie_title)
    distances, indices = model.kneighbors(movie_user.iloc[idx].values.reshape(1, -1), n_neighbors=k+1)

    recs = []
    for i in range(1, len(indices[0])):
        title = movie_user.index[indices[0][i]]
        similarity = 1 - distances[0][i]
        recs.append((title, round(float(similarity), 3)))
    return recs


In [16]:
movie_user, model = build_model(min_movie_ratings=50, n_neighbors=30)

test_movies = ["Toy Story (1995)", "Jumanji (1995)", "Heat (1995)", "GoldenEye (1995)"]

final_movie = None
final_recs = []

for m in test_movies:
    r = recommend(m, movie_user, model, k=10)
    if len(r) > 0:
        final_movie = m
        final_recs = r
        break

print("Input Movie:", final_movie)
print("Recommendations:")
for i, (t, s) in enumerate(final_recs, 1):
    print(f"{i}. {t} (similarity={s})")


Input Movie: Toy Story (1995)
Recommendations:
1. Toy Story 2 (1999) (similarity=0.573)
2. Jurassic Park (1993) (similarity=0.566)
3. Independence Day (a.k.a. ID4) (1996) (similarity=0.564)
4. Star Wars: Episode IV - A New Hope (1977) (similarity=0.557)
5. Forrest Gump (1994) (similarity=0.547)
6. Lion King, The (1994) (similarity=0.541)
7. Star Wars: Episode VI - Return of the Jedi (1983) (similarity=0.541)
8. Mission: Impossible (1996) (similarity=0.539)
9. Groundhog Day (1993) (similarity=0.534)
10. Back to the Future (1985) (similarity=0.53)


In [17]:
movies[movies["title"].str.contains("toy story", case=False, na=False)].head(20)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2355,3114,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy
7355,78499,Toy Story 3 (2010),Adventure|Animation|Children|Comedy|Fantasy|IMAX
