In [2]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

In [6]:
# Load the dataset
movies = pd.read_csv("data/tmdb_5000_movies.csv")
credits = pd.read_csv("data/tmdb_5000_credits.csv")

In [8]:
# Clean genres

def parse_genres(s):
    try:
        return [d['name'] for d in ast.literal_eval(s)]
    except:
        return []

movies['genres_list'] = movies['genres'].apply(parse_genres)

In [10]:
#  Popularity-based recommendation

print("\nTop 10 Rated Movies:")
top_movies = movies.sort_values('vote_average', ascending=False)[['title','vote_average']].head(10)
for i, row in enumerate(top_movies.itertuples(), 1):
    print(f"{i}. {row.title} (Rating: {row.vote_average})")


Top 10 Rated Movies:
1. Stiff Upper Lips (Rating: 10.0)
2. Me You and Five Bucks (Rating: 10.0)
3. Dancer, Texas Pop. 81 (Rating: 10.0)
4. Little Big Top (Rating: 10.0)
5. Sardaarji (Rating: 9.5)
6. One Man's Hero (Rating: 9.3)
7. There Goes My Baby (Rating: 8.5)
8. The Shawshank Redemption (Rating: 8.5)
9. The Prisoner of Zenda (Rating: 8.4)
10. The Godfather (Rating: 8.4)


In [12]:
#  Similarity-based recommendation

mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(movies['genres_list'])
sim = cosine_similarity(genre_matrix)

def recommend_by_genre(title, top_n=5):
    # Case-insensitive match
    lower_titles = movies['title'].str.lower()
    title_lower = title.lower()
    
    if title_lower not in lower_titles.values:
        return ["Movie not found!"]
    
    idx = lower_titles[lower_titles == title_lower].index[0]
    scores = list(enumerate(sim[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    rec_idxs = [i for i,_ in scores[1:top_n+1]]
    return movies['title'].iloc[rec_idxs].tolist()


#  Show some movies for user to pick from

print("\nHere are some movies you can choose from:")
sample_movies = movies['title'].sample(20, random_state=42).tolist()
for i, m in enumerate(sample_movies, 1):
    print(f"{i}. {m}")

#  Take user input (number or name, case-insensitive)

choice = input("\nEnter the movie name or number: ")

if choice.isdigit():
    movie_name = sample_movies[int(choice)-1]
else:
    movie_name = choice.strip()


#  Show recommendations

recommendations = recommend_by_genre(movie_name, top_n=5)
print(f"\nMovies similar to '{movie_name}':")
for i, m in enumerate(recommendations, 1):
    print(f"{i}. {m}")


Here are some movies you can choose from:
1. I Spy
2. Split Second
3. Gossip
4. Vicky Cristina Barcelona
5. Harry Potter and the Half-Blood Prince
6. AVP: Alien vs. Predator
7. The Contender
8. Meet the Parents
9. Away We Go
10. Sleep Dealer
11. In Her Line of Fire
12. Beyond the Lights
13. The Front Page
14. Taxi Driver
15. Knight and Day
16. End of Days
17. Code 46
18. Double Take
19. Tidal Wave
20. I Heart Huckabees



Enter the movie name or number:  1



Movies similar to 'I Spy':
1. Hollywood Homicide
2. Bad Company
3. I Spy
4. Without a Paddle
5. The Big Hit
