In [21]:
# Install Surprise library
!pip install scikit-surprise

import pandas as pd
from surprise import Reader, Dataset
from surprise.model_selection import train_test_split
from surprise import KNNBasic

# Load movies data
movies_df = pd.read_csv('movies.csv')

# Display available genres to the user
print("Available genres:")
unique_genres = movies_df['genres'].str.split('|').explode().unique()
genre_map = {i+1: genre for i, genre in enumerate(unique_genres)}
for i, genre in enumerate(unique_genres):
    print(f"{i+1}. {genre}")

# Prompt the user to input their preferred genres
genre_choices = input("\nEnter the numbers corresponding to your preferred genres (comma-separated): ")
genre_choices = [int(choice.strip()) for choice in genre_choices.split(',')]

# Ensure the inputs are valid
valid_genre_choices = []
for choice in genre_choices:
    if choice not in genre_map:
        print(f"Invalid genre number {choice}. Skipping...")
    else:
        valid_genre_choices.append(genre_map[choice])

if not valid_genre_choices:
    print("No valid genre selections. Exiting...")
else:
    print(f"\nSelected genres: {valid_genre_choices}")

    # Load ratings data
    ratings_df = pd.read_csv('ratings.csv')

    # Create Surprise Reader object
    reader = Reader(rating_scale=(0.5, 5))

    # Load data into Surprise Dataset
    data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

    # Split the data into training and testing sets
    trainset, testset = train_test_split(data, test_size=0.2)

    # Define the collaborative filtering model
    sim_options = {'name': 'cosine', 'user_based': False}
    model = KNNBasic(sim_options=sim_options)

    # Train the model
    model.fit(trainset)

    # Function to get movie recommendations for a user
    def get_movie_recommendations(user_id, genres, num_recommendations=3):
        # Get list of movie IDs not rated by the user
        user_movies = ratings_df[ratings_df['userId'] == user_id]['movieId']
        all_movies = ratings_df['movieId'].unique()
        unrated_movies = list(set(all_movies) - set(user_movies))

        # Get predicted ratings for unrated movies
        predicted_ratings = [model.predict(user_id, movie_id).est for movie_id in unrated_movies]

        # Create DataFrame of unrated movies and predicted ratings
        recommendations_df = pd.DataFrame({'movieId': unrated_movies, 'predicted_rating': predicted_ratings})

        # Merge with movies DataFrame to get movie titles and genres
        recommendations_df = pd.merge(recommendations_df, movies_df, on='movieId')

        # Filter movies by the preferred genres
        genre_filtered_movies = recommendations_df[recommendations_df['genres'].str.contains('|'.join(genres))]

        if len(genre_filtered_movies) == 0:
            print("No movies found in the selected genres.")
            return None

        # Sort by predicted rating and get top recommendations
        top_recommendations = genre_filtered_movies.sort_values(by='predicted_rating', ascending=False).head(num_recommendations)

        return top_recommendations[['movieId', 'title', 'predicted_rating']]

    #Usage
    user_id = 1
    recommendations = get_movie_recommendations(user_id, valid_genre_choices)
    if recommendations is not None:
        print(f"\nTop {len(recommendations)} movie recommendations for user {user_id} in the genres '{', '.join(valid_genre_choices)}':")
        print(recommendations[['title', 'predicted_rating']])


Available genres:
1. Adventure
2. Animation
3. Children
4. Comedy
5. Fantasy
6. Romance
7. Drama
8. Action
9. Crime
10. Thriller
11. Horror
12. Mystery
13. Sci-Fi
14. War
15. Musical
16. Documentary
17. IMAX
18. Western
19. Film-Noir
20. (no genres listed)

Enter the numbers corresponding to your preferred genres (comma-separated): 1,13

Selected genres: ['Adventure', 'Sci-Fi']
Computing the cosine similarity matrix...
Done computing similarity matrix.

Top 3 movie recommendations for user 1 in the genres 'Adventure, Sci-Fi':
                                title  predicted_rating
4574                   Android (1982)               4.8
5155       Alien Contamination (1980)               4.8
4469  Galaxy of Terror (Quest) (1981)               4.8
