In [None]:
# Install Surprise library
!pip install scikit-surprise

import pandas as pd
from surprise import Reader, Dataset
from surprise.model_selection import train_test_split
from surprise import KNNBasic

# Load movies data
movies_df = pd.read_csv('movies.csv')

# Display available genres to the user
print("Available genres:")
unique_genres = movies_df['genres'].str.split('|').explode().unique()
genre_map = {i+1: genre for i, genre in enumerate(unique_genres)}
for i, genre in enumerate(unique_genres):
    print(f"{i+1}. {genre}")

# Prompt the user to input their preferred genres
genre_choices = input("\nEnter the numbers corresponding to your preferred genres (comma-separated): ")
genre_choices = [int(choice.strip()) for choice in genre_choices.split(',')]

# Ensure the inputs are valid
valid_genre_choices = []
for choice in genre_choices:
    if choice not in genre_map:
        print(f"Invalid genre number {choice}. Skipping...")
    else:
        valid_genre_choices.append(genre_map[choice])

if not valid_genre_choices:
    print("No valid genre selections. Exiting...")
else:
    print(f"\nSelected genres: {valid_genre_choices}")

    # Load ratings data
    ratings_df = pd.read_csv('ratings.csv')

    # Create Surprise Reader object
    reader = Reader(rating_scale=(0.5, 5))

    # Load data into Surprise Dataset
    data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

    # Split the data into training and testing sets
    trainset, testset = train_test_split(data, test_size=0.2)

    # Define the collaborative filtering model
    sim_options = {'name': 'cosine', 'user_based': False}
    model = KNNBasic(sim_options=sim_options)

    # Train the model
    model.fit(trainset)

    # Function to get movie recommendations for a user
    def get_movie_recommendations(user_id, genres, num_recommendations=3):
        # Get list of movie IDs not rated by the user
        user_movies = ratings_df[ratings_df['userId'] == user_id]['movieId']
        all_movies = ratings_df['movieId'].unique()
        unrated_movies = list(set(all_movies) - set(user_movies))

        # Get predicted ratings for unrated movies
        predicted_ratings = [model.predict(user_id, movie_id).est for movie_id in unrated_movies]

        # Create DataFrame of unrated movies and predicted ratings
        recommendations_df = pd.DataFrame({'movieId': unrated_movies, 'predicted_rating': predicted_ratings})

        # Merge with movies DataFrame to get movie titles and genres
        recommendations_df = pd.merge(recommendations_df, movies_df, on='movieId')

        # Filter movies by the preferred genres
        genre_filtered_movies = recommendations_df[recommendations_df['genres'].str.contains('|'.join(genres))]

        if len(genre_filtered_movies) == 0:
            print("No movies found in the selected genres.")
            return None

        # Sort by predicted rating and get top recommendations
        top_recommendations = genre_filtered_movies.sort_values(by='predicted_rating', ascending=False).head(num_recommendations)

        return top_recommendations[['movieId', 'title', 'predicted_rating']]

    #Usage
    user_id = 1
    recommendations = get_movie_recommendations(user_id, valid_genre_choices)
    if recommendations is not None:
        print(f"\nTop {len(recommendations)} movie recommendations for user {user_id} in the genres '{', '.join(valid_genre_choices)}':")
        print(recommendations[['title', 'predicted_rating']])


Available genres:
1. Adventure
2. Animation
3. Children
4. Comedy
5. Fantasy
6. Romance
7. Drama
8. Action
9. Crime
10. Thriller
11. Horror
12. Mystery
13. Sci-Fi
14. War
15. Musical
16. Documentary
17. IMAX
18. Western
19. Film-Noir
20. (no genres listed)

Enter the numbers corresponding to your preferred genres (comma-separated): 1,13

Selected genres: ['Adventure', 'Sci-Fi']
Computing the cosine similarity matrix...
Done computing similarity matrix.

Top 3 movie recommendations for user 1 in the genres 'Adventure, Sci-Fi':
                                title  predicted_rating
4574                   Android (1982)               4.8
5155       Alien Contamination (1980)               4.8
4469  Galaxy of Terror (Quest) (1981)               4.8


In [None]:
!pip install scikit-surprise
# Import necessary libraries
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
from PIL import Image
from io import BytesIO
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split

# Load the dataset
url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
response = requests.get(url)
data_bytes = BytesIO(response.content)

# Load movies dataset
movies_df = pd.read_csv('movies.csv')

# Define genre map
genre_map = {
    0: "unknown", 1: "Action", 2: "Adventure", 3: "Animation", 4: "Children's",
    5: "Comedy", 6: "Crime", 7: "Documentary", 8: "Drama", 9: "Fantasy",
    10: "Film-Noir", 11: "Horror", 12: "Musical", 13: "Mystery", 14: "Romance",
    15: "Sci-Fi", 16: "Thriller", 17: "War", 18: "Western"
}

# Load ratings data
ratings_df = pd.read_csv(data_bytes, sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])

# Create Surprise Reader object
reader = Reader(rating_scale=(1, 5))

# Load data into Surprise Dataset
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)
import pandas as pd

# Load movies data
movies_df = pd.read_csv('movies.csv')

# Extract release year from title and build separate column
movies_df['release_year'] = movies_df['title'].str.extract(r'\((\d{4})\)$')
movies_df['release_year'] = pd.to_numeric(movies_df['release_year'])

# Load ratings data
ratings_df = pd.read_csv('ratings.csv')
# Prompt the user to input their preferred genres
print("\nAvailable genres:")
for genre_id, genre_name in genre_map.items():
    print(f"{genre_id}. {genre_name}")
genre_choices = input("\nEnter the numbers corresponding to your preferred genres (separated by commas): ")
genre_choices = [int(choice.strip()) for choice in genre_choices.split(',')]

# Filter genres based on user input
preferred_genres = [genre_map.get(choice) for choice in genre_choices if genre_map.get(choice)]

# Ensure the input is valid
if not preferred_genres:
    print("Invalid genre numbers. Please enter valid numbers corresponding to the genres.")
else:
    # Prompt the user to input other preferences
    min_rating = float(input("\nEnter the minimum rating (e.g., 3.5): "))
    num_recommendations = int(input("Enter the number of recommendations: "))
    release_year_start = int(input("Enter the start year of the release year range (e.g., 1990): "))
    release_year_end = int(input("Enter the end year of the release year range (e.g., 2000): "))


# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

# Define the collaborative filtering model
sim_options = {'name': 'cosine', 'user_based': False}
model = KNNBasic(sim_options=sim_options)

# Train the model
model.fit(trainset)


# Import the SVD algorithm from Surprise
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

# Create a Surprise Reader object
reader = Reader(rating_scale=(0.5, 5))

# Load data into Surprise Dataset
data_surprise = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

# Split the data into training and testing sets
trainset_surprise, testset_surprise = train_test_split(data_surprise, test_size=0.2)

# Define the SVD model
svd_model = SVD()

# Train the SVD model
svd_model.fit(trainset_surprise)

# Modify the get_movie_recommendations function to use SVD for recommendations
def get_movie_recommendations(user_id, preferred_genres, min_rating, release_year_start, release_year_end, num_recommendations=5):
    # Get list of movie IDs not rated by the user
    user_movies = ratings_df[ratings_df['userId'] == user_id]['movieId']
    all_movies = ratings_df['movieId'].unique()
    unrated_movies = list(set(all_movies) - set(user_movies))

    # Predict ratings for unrated movies using SVD
    predicted_ratings = [svd_model.predict(user_id, movie_id).est for movie_id in unrated_movies]

    # Create DataFrame of unrated movies and predicted ratings
    recommendations_df = pd.DataFrame({'movieId': unrated_movies, 'predicted_rating': predicted_ratings})

    # Merge with movies DataFrame to get movie titles, genres, and release years
    recommendations_df = pd.merge(recommendations_df, movies_df, on='movieId')

    # Extract release year from movie titles and build a separate column
    recommendations_df['release_year'] = recommendations_df['title'].str.extract(r'\((\d{4})\)').astype(float)

    # Filter movies based on user preferences
    genre_filtered_movies = recommendations_df[recommendations_df['genres'].str.contains('|'.join(preferred_genres), na=False)]
    rating_filtered_movies = genre_filtered_movies[genre_filtered_movies['predicted_rating'] >= min_rating]
    year_filtered_movies = rating_filtered_movies[(rating_filtered_movies['release_year'] >= release_year_start) & (rating_filtered_movies['release_year'] <= release_year_end)]

    # Sort by predicted rating and get top recommendations
    top_recommendations = year_filtered_movies.sort_values(by='predicted_rating', ascending=False).head(num_recommendations)

    return top_recommendations[['movieId', 'title', 'predicted_rating', 'release_year']]

# Example usage
user_id = 1
recommendations = get_movie_recommendations(user_id, preferred_genres, min_rating, release_year_start, release_year_end, num_recommendations)
print(f"\nTop {len(recommendations)} movie recommendations based on your preferences:")
print(recommendations.rename(columns={'title': 'Title', 'predicted_rating': 'Rating'})[['Title', 'Rating']].to_string(index=False))




Available genres:
0. unknown
1. Action
2. Adventure
3. Animation
4. Children's
5. Comedy
6. Crime
7. Documentary
8. Drama
9. Fantasy
10. Film-Noir
11. Horror
12. Musical
13. Mystery
14. Romance
15. Sci-Fi
16. Thriller
17. War
18. Western

Enter the numbers corresponding to your preferred genres (separated by commas): 5,6

Enter the minimum rating (e.g., 3.5): 4
Enter the number of recommendations: 7
Enter the start year of the release year range (e.g., 1990): 1990
Enter the end year of the release year range (e.g., 2000): 2010
Computing the cosine similarity matrix...
Done computing similarity matrix.

Top 7 movie recommendations based on your preferences:
                                               Title   Rating
          Life Is Beautiful (La Vita è bella) (1997) 5.000000
                                Departed, The (2006) 5.000000
                         Boondock Saints, The (2000) 5.000000
                                     Superbad (2007) 4.999711
                        

In [None]:
!pip install scikit-surprise
import pandas as pd
import requests
from io import BytesIO
from surprise import Dataset, Reader, KNNBasic, SVD
from surprise.model_selection import train_test_split

# Load movies data
movies_df = pd.read_csv('movies.csv')

# Load ratings data
url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
response = requests.get(url)
data_bytes = BytesIO(response.content)
ratings_df = pd.read_csv(data_bytes, sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])

# Define genre map
genre_map = {
    0: "unknown", 1: "Action", 2: "Adventure", 3: "Animation", 4: "Children's",
    5: "Comedy", 6: "Crime", 7: "Documentary", 8: "Drama", 9: "Fantasy",
    10: "Film-Noir", 11: "Horror", 12: "Musical", 13: "Mystery", 14: "Romance",
    15: "Sci-Fi", 16: "Thriller", 17: "War", 18: "Western"
}

def get_movie_recommendations(user_id, preferred_genres, min_rating, release_year_start, release_year_end, num_recommendations=5):
    # Split the data into training and testing sets
    trainset, _ = train_test_split(data, test_size=0.2)

    # Define the collaborative filtering model
    sim_options = {'name': 'cosine', 'user_based': False}
    model = KNNBasic(sim_options=sim_options)

    # Train the model
    model.fit(trainset)

    # Split the data into training and testing sets for SVD
    trainset_surprise, _ = train_test_split(data_surprise, test_size=0.2)

    # Define the SVD model
    svd_model = SVD()

    # Train the SVD model
    svd_model.fit(trainset_surprise)

    # Get list of movie IDs not rated by the user
    user_movies = ratings_df[ratings_df['userId'] == user_id]['movieId']
    all_movies = ratings_df['movieId'].unique()
    unrated_movies = list(set(all_movies) - set(user_movies))

    # Predict ratings for unrated movies using SVD
    predicted_ratings = [svd_model.predict(user_id, movie_id).est for movie_id in unrated_movies]

    # Create DataFrame of unrated movies and predicted ratings
    recommendations_df = pd.DataFrame({'movieId': unrated_movies, 'predicted_rating': predicted_ratings})

    # Merge with movies DataFrame to get movie titles, genres, and release years
    recommendations_df = pd.merge(recommendations_df, movies_df, on='movieId')

    # Extract release year from movie titles and build a separate column
    recommendations_df['release_year'] = recommendations_df['title'].str.extract(r'\((\d{4})\)').astype(float)

    # Filter movies based on user preferences
    genre_filtered_movies = recommendations_df[recommendations_df['genres'].str.contains('|'.join(preferred_genres), na=False)]
    rating_filtered_movies = genre_filtered_movies[genre_filtered_movies['predicted_rating'] >= min_rating]
    year_filtered_movies = rating_filtered_movies[(rating_filtered_movies['release_year'] >= release_year_start) & (rating_filtered_movies['release_year'] <= release_year_end)]

    # Sort by predicted rating and get top recommendations
    top_recommendations = year_filtered_movies.sort_values(by='predicted_rating', ascending=False).head(num_recommendations)

    return top_recommendations[['movieId', 'title', 'predicted_rating', 'release_year']]

# Prompt the user for input
print("\nAvailable genres:")
for genre_id, genre_name in genre_map.items():
    print(f"{genre_id}. {genre_name}")
genre_choices = input("\nEnter the numbers corresponding to your preferred genres (separated by commas): ")
genre_choices = [int(choice.strip()) for choice in genre_choices.split(',')]

# Filter genres based on user input
preferred_genres = [genre_map.get(choice) for choice in genre_choices if genre_map.get(choice)]

# Ensure the input is valid
if not preferred_genres:
    print("Invalid genre numbers. Please enter valid numbers corresponding to the genres.")
else:
    # Prompt the user for other preferences
    min_rating = float(input("\nEnter the minimum rating (e.g., 3.5): "))
    num_recommendations = int(input("Enter the number of recommendations: "))
    release_year_start = int(input("Enter the start year of the release year range (e.g., 1990): "))
    release_year_end = int(input("Enter the end year of the release year range (e.g., 2000): "))

    # Example usage
    user_id = 1
    recommendations = get_movie_recommendations(user_id, preferred_genres, min_rating, release_year_start, release_year_end, num_recommendations)
    print(f"\nTop {len(recommendations)} movie recommendations based on your preferences:")
    print(recommendations.rename(columns={'title': 'Title', 'predicted_rating': 'Rating'})[['Title', 'Rating']].to_string(index=False))



Available genres:
0. unknown
1. Action
2. Adventure
3. Animation
4. Children's
5. Comedy
6. Crime
7. Documentary
8. Drama
9. Fantasy
10. Film-Noir
11. Horror
12. Musical
13. Mystery
14. Romance
15. Sci-Fi
16. Thriller
17. War
18. Western

Enter the numbers corresponding to your preferred genres (separated by commas): 4,5

Enter the minimum rating (e.g., 3.5): 4
Enter the number of recommendations: 7
Enter the start year of the release year range (e.g., 1990): 1990
Enter the end year of the release year range (e.g., 2000): 2010
Computing the cosine similarity matrix...
Done computing similarity matrix.

Top 7 movie recommendations based on your preferences:
                                                 Title   Rating
                                  Trainspotting (1996) 5.000000
          Rosencrantz and Guildenstern Are Dead (1990) 4.950506
Wallace & Gromit: The Best of Aardman Animation (1996) 4.945095
                Wallace & Gromit: A Close Shave (1995) 4.873557
           Wal