In [36]:
#importing relevant libraries
#standard libraries
import pandas as pd
import numpy as np
#from surprise import Reader

#visualization Libraries
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


from surprise import Reader, Dataset
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise.prediction_algorithms import SVD, KNNBasic, KNNBaseline,KNNWithMeans
from surprise.model_selection import GridSearchCV

In [37]:
mov_df = pd.read_csv('Data/movies.csv')
mov_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [38]:
ratings_df = pd.read_csv('Data/ratings.csv')
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [39]:
# Load ratings dataset
df = pd.read_csv('Data/ratings.csv')
new_df = df.drop(columns='timestamp')

# Load movies dataset
df_movies = pd.read_csv('Data/movies.csv')

# Create Surprise dataset
reader = Reader()
data = Dataset.load_from_df(new_df, reader)
dataset = data.build_full_trainset()

In [74]:
from surprise.model_selection import cross_validate

def evaluate_models(data):
    # Split the data into training and test sets
    trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

    # Define a list of models to evaluate
    models = [
        SVD(),
        KNNBasic(sim_options={'name': 'pearson', 'user_based': True}),
        KNNWithMeans(sim_options={'name': 'pearson', 'user_based': True})
    ]

    # Evaluate each model and store the results
    results = []
    for model in models:
        # Perform cross-validation
        cv_results = cross_validate(model, data, measures=['RMSE'], cv=5, verbose=False)

        # Get the average RMSE from cross-validation
        rmse = cv_results['test_rmse'].mean()

        # Store the model and its performance
        results.append({'model': model.__class__.__name__, 'rmse': rmse})

    # Sort the results based on the RMSE in ascending order
    sorted_results = sorted(results, key=lambda x: x['rmse'])

    # Print the results
    for result in sorted_results:
        print(f"Model: {result['model']}, RMSE: {result['rmse']}")

    # Select the best performing model
    best_model = sorted_results[0]['model']
    print(f"Best performing model: {best_model}")

evaluate_models(data)


Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Model: SVD, RMSE: 0.8718384107182733
Model: KNNWithMeans, RMSE: 0.8951910310793385
Model: KNNBasic, RMSE: 0.9723624740595549
Best performing model: SVD


In [75]:
# Perform grid search for SVD
params = {'n_factors': [20, 50, 100], 'reg_all': [0.02, 0.05, 0.1]}
g_s_svd = GridSearchCV(SVD, param_grid=params, n_jobs=-1)
g_s_svd.fit(data)

print(g_s_svd.best_score)
print(g_s_svd.best_params)


{'rmse': 0.8696831141953553, 'mae': 0.6684918488722612}
{'rmse': {'n_factors': 20, 'reg_all': 0.05}, 'mae': {'n_factors': 20, 'reg_all': 0.02}}


In [76]:
# Train SVD model
svd = SVD(n_factors=50, reg_all=0.05)
svd.fit(dataset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x17320422bb0>

In [79]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see popular movies:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return recommend_popular_movies(ratings_df, movies_df, num_recommendations=5)
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre=None, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    if genre:
        # Filter movies based on genre
        movies_df = movies_df[movies_df['genres'].str.contains(genre, case=False, na=False)]
    
    # Merge with movies_df to get the movie titles
    recommended_movies = popularity_df.merge(movies_df, on='movieId', how='left')['title'].head(num_recommendations)

    return recommended_movies

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Get user ratings using the movie_rater function
user_rating = movie_rater(movies_df, 4, 'Comedy')

# Add the new ratings to the original ratings DataFrame
user_ratings = pd.DataFrame(user_rating)
new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

# Define the reader
reader = Reader(rating_scale=(1, 5))

# Load the data from the DataFrame
new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

# Train a model using the new combined DataFrame
svd_ = SVD(n_factors=50, reg_all=0.05)
svd_.fit(new_data.build_full_trainset())

# Check if the user has provided any ratings
if len(user_rating) > 0:
    print("\nUser's rated movies:")
    for idx, rating in enumerate(user_rating):
        movie_title = movies_df.loc[movies_df['movieId'] == rating['movieId'], 'title'].values[0]
        print(f"Rating #{idx+1}: {movie_title} - Rating: {rating['rating']}")
else:
    # Generate personalized recommendations for unrated movies
    user_id = 1000
    user_unrated_movies = movies_df[~movies_df['movieId'].isin(new_ratings_df['movieId'])]
    predictions = []
    for movie_id in user_unrated_movies['movieId'].unique():
        predicted_rating = svd_.predict(user_id, movie_id).est
        predictions.append((movie_id, predicted_rating))

    # Order the personalized recommendations from highest to lowest rated
    ranked_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

    # Print the personalized recommendations for the user
    print("\nPersonalized movie recommendations:")
    n = 5
    for idx, rec in enumerate(ranked_predictions[:n]):
        title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
        print(f"Recommendation #{idx+1}: {title}")

# Print popular movie recommendations if no ratings were provided
if len(user_rating) == 0:
    print("\nNo ratings provided. Here are the top popular movies:")
    popular_movies = recommend_popular_movies(ratings_df, movies_df, genre='Comedy', num_recommendations=5)
    print(popular_movies)




      movieId                title                               genres
1394     1912  Out of Sight (1998)  Comedy|Crime|Drama|Romance|Thriller
      movieId            title  genres
6688    58156  Semi-Pro (2008)  Comedy
      movieId                      title        genres
2621     3506  North Dallas Forty (1979)  Comedy|Drama
      movieId                  title        genres
8970   137595  Magic Mike XXL (2015)  Comedy|Drama

User's rated movies:
Rating #1: Out of Sight (1998) - Rating: 4
Rating #2: Semi-Pro (2008) - Rating: 4
Rating #3: North Dallas Forty (1979) - Rating: 4
Rating #4: Magic Mike XXL (2015) - Rating: 4


In [82]:



import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see popular movies:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return []  # Return an empty list for popular movies
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend personalized movies based on user ratings
def recommend_personalized_movies(ratings_df, movies_df, num_recommendations=5):
    # Load ratings data into surprise Dataset
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

    # Train the SVD model
    svd = SVD(n_factors=50, reg_all=0.05)
    trainset = data.build_full_trainset()
    svd.fit(trainset)

    # Get unrated movies for the user
    user_id = 1000
    user_unrated_movies = movies_df[~movies_df['movieId'].isin(ratings_df[ratings_df['userId'] == user_id]['movieId'])]

    # Predict ratings for unrated movies
    predictions = []
    for movie_id in user_unrated_movies['movieId'].unique():
        predicted_rating = svd.predict(user_id, movie_id).est
        predictions.append((movie_id, predicted_rating))

    # Order the predictions from highest to lowest rated
    ranked_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

    # Print the top movie recommendations
    n = num_recommendations
    for idx, rec in enumerate(ranked_predictions[:n]):
        title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
        print(f"Recommendation #{idx+1}: {title}")

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Get user ratings using the movie_rater function
user_rating = movie_rater(movies_df, 4, 'Comedy')

# Add the new ratings to the original ratings DataFrame
user_ratings = pd.DataFrame(user_rating)
new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

# Check if user provided ratings
if len(user_rating) > 0:
    # Generate movie recommendations based on user ratings
    recommend_personalized_movies(new_ratings_df, movies_df, num_recommendations=5)
else:
    # Generate popular movie recommendations if no ratings were provided
    popular_movies = recommend_popular_movies(ratings_df, movies_df, num_recommendations=5)
    print("\nNo ratings provided. Here are the top popular movies:")
    print(popular_movies)


      movieId                       title          genres
2132     2837  Bedrooms & Hallways (1998)  Comedy|Romance

No ratings provided. Here are the top popular movies:
0                                      Lamerica (1994)
1                 Heidi Fleiss: Hollywood Madam (1995)
2                                  Lesson Faust (1994)
3    Jonah Who Will Be 25 in the Year 2000 (Jonas q...
4                                  Belle époque (1992)
Name: title, dtype: object


In [90]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see personalized recommendations:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return None  # Indicates the user wants personalized recommendations
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': float(rating)}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Filter movies by genre (if provided)
    if genre:
        popular_movies = movies_df[movies_df['genres'].str.contains(genre)]
        popular_movies = popular_movies.merge(popularity_df, on='movieId', how='left')
    else:
        popular_movies = popularity_df.merge(movies_df, on='movieId', how='left')

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popular_movies.head(num_recommendations)

    # Return the recommended movies
    return top_movies['title']

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Prompt the user to enter their user ID
user_id = input('Enter your user ID: ')

# Convert user_id to int data type
user_id = int(user_id)

# Check if the user already exists in the ratings dataset
if user_id in ratings_df['userId'].unique():
    # Get user ratings using the movie_rater function
    user_rating = movie_rater(movies_df, 4, genre='Comedy')

    if user_rating is None:
        # User wants personalized recommendations
        # Generate movie recommendations based on collaborative filtering
        # Rest of the code for personalized recommendations...
        print("Personalized recommendations based on user's ratings")
        # ...
    else:
        # User has provided ratings
        # Add the new ratings to the original ratings DataFrame
        user_ratings = pd.DataFrame(user_rating)
        new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

        # Define the reader
        reader = Reader(rating_scale=(1, 5))

        # Load the data from the DataFrame
        new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

        # Train the SVD model with the updated ratings
        svd = SVD()
        svd.fit(new_data.build_full_trainset())

        # Predict ratings for unrated movies
        predictions = []
        for movie_id in movies_df['movieId'].unique():
            predicted_rating = svd.predict(user_id, movie_id).est
            predictions.append((movie_id, predicted_rating))

        # Sort the predicted ratings
        ranked_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

        # Print the top 5 movie recommendations for the user
        print("Top 5 movie recommendations based on your ratings:")
        for idx, rec in enumerate(ranked_predictions[:5]):
            title = movies_df.loc[movies_df['movieId'] == rec[0], 'title'].values[0]
            print(f"Recommendation #{idx+1}: {title}")
else:
    # New user without any ratings
    print("Popular movie recommendations:")
    genre = input("Enter a genre to get recommendations based on that (leave blank for all genres): ")
    recommendations = recommend_popular_movies(ratings_df, movies_df, genre)
    for idx, rec in enumerate(recommendations):
        print(f"Recommendation #{idx+1}: {rec}")




Popular movie recommendations:


In [113]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000  # Set a default userID for new users
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see personalized recommendations:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return None  # Indicates the user wants personalized recommendations
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': float(rating)}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Filter movies by genre (if provided)
    if genre:
        popular_movies = movies_df[movies_df['genres'].str.contains(genre)]
        popular_movies = popular_movies.merge(popularity_df, on='movieId', how='left')
    else:
        popular_movies = popularity_df.merge(movies_df, on='movieId', how='left')

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popular_movies.head(num_recommendations)

    # Return the recommended movies
    return top_movies['title']

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Prompt the user to enter their user ID
user_id = input('Enter your user ID: ')

# Convert user_id to int data type
user_id = int(user_id)

# Check if the user already exists in the ratings dataset
if user_id in ratings_df['userId'].unique():
    # Get user ratings using the movie_rater function
    user_rating = movie_rater(movies_df, 4, genre='Comedy')

    if user_rating is None:
        # User wants personalized recommendations
        print("Personalized recommendations based on user's ratings")
        
        # Extract the user's ratings from the ratings dataframe
        user_ratings = ratings_df[ratings_df['userId'] == user_id]

        # Group the ratings by movie and calculate the average rating for each movie
        movie_ratings = user_ratings.groupby('movieId')['rating'].mean().reset_index()

        # Merge movie ratings with movie metadata
        personalized_movies = movie_ratings.merge(movies_df, on='movieId', how='left')

        # Sort the movies based on the average rating
        personalized_movies = personalized_movies.sort_values(by='rating', ascending=False)

        # Get the top 5 movie recommendations
        recommendations = personalized_movies['title'].head(5)

        # Print the recommendations
        for idx, rec in enumerate(recommendations):
            print(f"Recommendation #{idx+1}: {rec}")

    else:
        # User has provided ratings
        # Add the new ratings to the original ratings DataFrame
        user_ratings = pd.DataFrame(user_rating)
        new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

        # Define the reader
        reader = Reader(rating_scale=(1, 5))

        # Load the data from the DataFrame
        new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

        # Train the SVD model with the updated ratings
        svd = SVD()
        svd.fit(new_data.build_full_trainset())

        # Predict ratings for unrated movies
        predictions = []
        for movie_id in movies_df['movieId'].unique():
            predicted_rating = svd.predict(user_id, movie_id).est
            predictions.append((movie_id, predicted_rating))

        # Sort the predicted ratings
        ranked_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

        # Print the top 5 movie recommendations for the user
        print("Top 5 movie recommendations based on your ratings:")
        for idx, rec in enumerate(ranked_predictions[:5]):
            title = movies_df.loc[movies_df['movieId'] == rec[0], 'title'].values[0]
            print(f"Recommendation #{idx+1}: {title}")

else:
    # New user without any ratings
    print("Popular movie recommendations:")
    genre = input("Enter a genre to get recommendations based on that (leave blank for all genres): ")
    recommendations = recommend_popular_movies(ratings_df, movies_df, genre, num_recommendations=5)
    for idx, rec in enumerate(recommendations):
        print(f"Recommendation #{idx+1}: {rec}")





      movieId                    title                 genres
4547     6753  Secondhand Lions (2003)  Children|Comedy|Drama
Personalized recommendations based on user's ratings
Recommendation #1: Toy Story (1995)
Recommendation #2: Cliffhanger (1993)
Recommendation #3: Three Musketeers, The (1993)
Recommendation #4: Robin Hood: Men in Tights (1993)
Recommendation #5: RoboCop 3 (1993)


In [108]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000  # Set a default userID for new users
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see personalized recommendations:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return None  # Indicates the user wants personalized recommendations
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': float(rating)}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Filter movies by genre (if provided)
    if genre:
        popular_movies = movies_df[movies_df['genres'].str.contains(genre)]
        popular_movies = popular_movies.merge(popularity_df, on='movieId', how='left')
    else:
        popular_movies = popularity_df.merge(movies_df, on='movieId', how='left')

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popular_movies.head(num_recommendations)

    # Return the recommended movies
    return top_movies['title']

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Prompt the user to enter their user ID
user_id = input('Enter your user ID: ')

# Convert user_id to int data type
user_id = int(user_id)

# Check if the user already exists in the ratings dataset
if user_id in ratings_df['userId'].unique():
    # Get user ratings using the movie_rater function
    user_rating = movie_rater(movies_df, 4, genre='Comedy')

    if user_rating is None:
        # User wants personalized recommendations
        print("Personalized recommendations based on user's ratings")
        
        # Rest of the code for personalized recommendations...
        # Generate movie recommendations based on collaborative filtering or other personalized recommendation algorithms
        # ...
        
    else:
        # User has provided ratings
        # Add the new ratings to the original ratings DataFrame
        user_ratings = pd.DataFrame(user_rating)
        new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

        # Define the reader
        reader = Reader(rating_scale=(1, 5))

        # Load the data from the DataFrame
        new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

        # Train the SVD model with the updated ratings
        svd = SVD()
        svd.fit(new_data.build_full_trainset())

        # Predict ratings for unrated movies
        predictions = []
        for movie_id in movies_df['movieId'].unique():
            predicted_rating = svd.predict(user_id, movie_id).est
            predictions.append((movie_id, predicted_rating))

        # Sort the predicted ratings
        ranked_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

        # Print the top 5 movie recommendations for the user
        print("Top 5 movie recommendations based on your ratings:")
        for idx, rec in enumerate(ranked_predictions[:5]):
            title = movies_df.loc[movies_df['movieId'] == rec[0], 'title'].values[0]
            print(f"Recommendation #{idx+1}: {title}")

else:
    # New user without any ratings
    print("Popular movie recommendations:")
    genre = input("Enter a genre to get recommendations based on that (leave blank for all genres): ")
    recommendations = recommend_popular_movies(ratings_df, movies_df, genre)
    for idx, rec in enumerate(recommendations):
        print(f"Recommendation #{idx+1}: {rec}")



     movieId              title  genres
212      248  Houseguest (1994)  Comedy
Personalized recommendations based on user's ratings


In [102]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000  # Set a default userID for new users
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see personalized recommendations:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return 'escape'  # Indicates the user wants personalized recommendations
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': float(rating)}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Filter movies by genre (if provided)
    if genre:
        popular_movies = movies_df[movies_df['genres'].str.contains(genre)]
        popular_movies = popular_movies.merge(popularity_df, on='movieId', how='left')
    else:
        popular_movies = popularity_df.merge(movies_df, on='movieId', how='left')

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popular_movies.head(num_recommendations)

    # Return the recommended movies
    return top_movies['title']

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Prompt the user to enter their user ID
user_id = input('Enter your user ID: ')

# Convert user_id to int data type
user_id = int(user_id)

# Check if the user already exists in the ratings dataset
if user_id in ratings_df['userId'].unique():
    # Get user ratings using the movie_rater function
    user_rating = movie_rater(movies_df, 4, genre='Comedy')

    if user_rating == 'escape':
        # User wants personalized recommendations
        # Generate movie recommendations based on collaborative filtering
        print("Personalized recommendations based on user's ratings")
        # ...
    else:
        # User has provided ratings
        # Add the new ratings to the original ratings DataFrame
        user_ratings = pd.DataFrame(user_rating)
        new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

        # Define the reader
        reader = Reader(rating_scale=(1, 5))

        # Load the data from the DataFrame
        new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

        # Train the SVD model with the updated ratings
        svd = SVD()
        svd.fit(new_data.build_full_trainset())

        # Predict ratings for unrated movies
        predictions = []
        for movie_id in movies_df['movieId'].unique():
            predicted_rating = svd.predict(user_id, movie_id).est
            predictions.append((movie_id, predicted_rating))

        # Sort the predicted ratings
        ranked_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

        # Print the top 5 movie recommendations for the user
        print("Top 5 movie recommendations based on your ratings:")
        for idx, rec in enumerate(ranked_predictions[:5]):
            title = movies_df.loc[movies_df['movieId'] == rec[0], 'title'].values[0]
            print(f"Recommendation #{idx+1}: {title}")

else:
    # New user without any ratings
    print("Popular movie recommendations:")
    genre = input("Enter a genre to get recommendations based on that (leave blank for all genres): ")
    recommendations = recommend_popular_movies(ratings_df, movies_df, genre)
    if recommendations.empty:
        print("No recommendations found.")
    else:
        for idx, rec in enumerate(recommendations):
            print(f"Recommendation #{idx+1}: {rec}")



      movieId                                title  genres
2358     3120  Distinguished Gentleman, The (1992)  Comedy
Personalized recommendations based on user's ratings


In [73]:
import pandas as pd

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see popular movies:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return recommend_popular_movies(df, df_movies)
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Get user ratings using the movie_rater function
user_rating = movie_rater(df_movies, 4, 'Comedy')

# Add the new ratings to the original ratings DataFrame
user_ratings = pd.DataFrame(user_rating)
new_ratings_df = pd.concat([new_df, user_ratings], axis=0)
new_data = Dataset.load_from_df(new_ratings_df, reader)

# Train a model using the new combined DataFrame
svd_ = SVD(n_factors=50, reg_all=0.05)
svd_.fit(new_data.build_full_trainset())

# Generate movie recommendations for the user
list_of_movies = []
for m_id in new_df['movieId'].unique():
    list_of_movies.append((m_id, svd_.predict(1000, m_id)[3]))

# Order the recommendations from highest to lowest rated
ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True)

# Print the top 5 movie recommendations for the user
n = 5
for idx, rec in enumerate(ranked_movies):
    title = df_movies.loc[df_movies['movieId'] == int(rec[0])]['title']
    print('Recommendation #', idx+1, ':', title, '\n')

def recommend_popular_movies(ratings_df, movies_df, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popularity_df.head(num_recommendations)

    # Merge with movies_df to get the movie titles
    recommended_movies = top_movies.merge(movies_df, on='movieId', how='left')['title']

    return recommended_movies

# Example usage
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

recommended_movies = recommend_popular_movies(ratings_df, movies_df, num_recommendations=5)
print("Recommended movies:")
print(recommended_movies)


      movieId                        title        genres
7539    84950  Take Me Home Tonight (2011)  Comedy|Drama


ValueError: too many values to unpack (expected 3)

In [None]:
def recommend_popular_movies(ratings_df, movies_df, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popularity_df.head(num_recommendations)

    # Merge with movies_df to get the movie titles
    recommended_movies = top_movies.merge(movies_df, on='movieId', how='left')['title']

    return recommended_movies

# Example usage
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

recommended_movies = recommend_popular_movies(ratings_df, movies_df, num_recommendations=5)
print("Recommended movies:")
print(recommended_movies)

Recommended movies:
0                                      Lamerica (1994)
1                 Heidi Fleiss: Hollywood Madam (1995)
2                                  Lesson Faust (1994)
3    Jonah Who Will Be 25 in the Year 2000 (Jonas q...
4                                  Belle époque (1992)
Name: title, dtype: object


In [61]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see popular movies:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return recommend_popular_movies(ratings_df, movies_df, genre=genre, num_recommendations=5)
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre=None, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    if genre:
        # Filter movies based on genre
        movies_df = movies_df[movies_df['genres'].str.contains(genre, case=False, na=False)]
    
    # Merge with movies_df to get the movie titles
    recommended_movies = popularity_df.merge(movies_df, on='movieId', how='left')['title'].head(num_recommendations)

    return recommended_movies

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Get user ratings using the movie_rater function
user_rating = movie_rater(movies_df, 4, 'Comedy')

# Add the new ratings to the original ratings DataFrame
user_ratings = pd.DataFrame(user_rating)
new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

# Define the reader
reader = Reader(rating_scale=(1, 5))

# Load the data from the DataFrame
new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

# Train a model using the new combined DataFrame
svd_ = SVD(n_factors=50, reg_all=0.05)
svd_.fit(new_data.build_full_trainset())

# Generate movie recommendations for the user
list_of_movies = []
for m_id in movies_df['movieId'].unique():
    list_of_movies.append((m_id, svd_.predict(1000, m_id)[3]))

# Order the recommendations from highest to lowest rated
ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True)

# Print the top 5 movie recommendations for the user
n = 5
for idx, rec in enumerate(ranked_movies[:n]):
    title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
    print('Recommendation #', idx+1, ':', title)

# Print popular movie recommendations if no ratings were provided
if len(user_rating) == 0:
    print("\nNo ratings provided. Here are the top popular movies:")
    recommended_movies = recommend_popular_movies(new_ratings_df, movies_df, genre='Comedy', num_recommendations=5)
    print(recommended_movies)



     movieId                       title  \
899     1197  Princess Bride, The (1987)   

                                      genres  
899  Action|Adventure|Comedy|Fantasy|Romance  
Recommendation # 1 : Toy Story (1995)
Recommendation # 2 : Jumanji (1995)
Recommendation # 3 : Grumpier Old Men (1995)
Recommendation # 4 : Waiting to Exhale (1995)
Recommendation # 5 : Father of the Bride Part II (1995)


In [None]:
# Print the top 5 movie recommendations for the user
n = 5
for idx, rec in enumerate(ranked_movies[:n]):
    title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
    print('Recommendation #', idx+1, ':', title)



In [54]:
# Print popular movie recommendations if no ratings were provided
if len(user_rating) == 1:
    print("\nNo ratings provided. Here are the top popular movies:")
    recommended_movies = recommend_popular_movies(new_ratings_df, movies_df, genre='Comedy', num_recommendations=5)
    print(recommended_movies)


In [49]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see popular movies:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return recommend_popular_movies(ratings_df, movies_df, num_recommendations=5)
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popularity_df.head(num_recommendations)

    # Merge with movies_df to get the movie titles
    recommended_movies = top_movies.merge(movies_df, on='movieId', how='left')['title']

    return recommended_movies

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Get user ratings using the movie_rater function
user_rating = movie_rater(movies_df, 4, 'Comedy')

# Add the new ratings to the original ratings DataFrame
user_ratings = pd.DataFrame(user_rating)
new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

# Define the reader
reader = Reader(rating_scale=(1, 5))

# Load the data from the DataFrame
new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

# Train a model using the new combined DataFrame
svd_ = SVD(n_factors=50, reg_all=0.05)
svd_.fit(new_data.build_full_trainset())

# Generate movie recommendations for the user
list_of_movies = []
for m_id in movies_df['movieId'].unique():
    list_of_movies.append((m_id, svd_.predict(1000, m_id)[3]))

# Order the recommendations from highest to lowest rated
ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True)

# Print the top 5 movie recommendations for the user
n = 5
for idx, rec in enumerate(ranked_movies[:n]):
    title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
    print('Recommendation #', idx+1, ':', title)

# Print the top 5 movie recommendations for the user
n = 5
for idx, rec in enumerate(ranked_movies[:n]):
    title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
    print('Recommendation #', idx+1, ':', title)


      movieId                        title  genres
3131     4214  Revenge of the Nerds (1984)  Comedy
Recommendation # 1 : Toy Story (1995)
Recommendation # 2 : Jumanji (1995)
Recommendation # 3 : Grumpier Old Men (1995)
Recommendation # 4 : Waiting to Exhale (1995)
Recommendation # 5 : Father of the Bride Part II (1995)


In [None]:
# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen:\n')
        if rating == 'n':
            continue
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Get user ratings using the movie_rater function
user_rating = movie_rater(df_movies, 4, 'Comedy')

In [None]:
# Add the new ratings to the original ratings DataFrame
user_ratings = pd.DataFrame(user_rating)
new_ratings_df = pd.concat([new_df, user_ratings], axis=0)
new_data = Dataset.load_from_df(new_ratings_df, reader)

In [None]:
# Generate movie recommendations for the user
list_of_movies = []
for m_id in new_df['movieId'].unique():
    list_of_movies.append((m_id, svd.predict(1000, m_id)[3]))
# Order the recommendations from highest to lowest rated
ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True)

In [None]:
# Print the top 5 movie recommendations for the user
n = 5
for idx, rec in enumerate(ranked_movies):
    title = df_movies.loc[df_movies['movieId'] == int(rec[0])]['title']
    print('Recommendation #', idx+1, ':', title, '\n')
    n -= 1
    if n == 0:
        break

In [72]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see popular movies:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            rating_list = []
            break
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': rating}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre=None, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    if genre:
        # Filter movies based on genre
        movies_df = movies_df[movies_df['genres'].str.contains(genre, case=False, na=False)]

    # Merge with movies_df to get the movie titles
    recommended_movies = popularity_df.merge(movies_df, on='movieId', how='left')['title'].head(num_recommendations)

    return recommended_movies

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Get user ratings using the movie_rater function
user_rating = movie_rater(movies_df, 4, 'Comedy')

# Check if the user rating list is empty
if len(user_rating) == 0:
    print("\nNo ratings provided. Here are the top popular movies:")
    popular_movies = recommend_popular_movies(ratings_df, movies_df, genre='Comedy', num_recommendations=5)
    print(popular_movies)
else:
    # Add the new ratings to the original ratings DataFrame
    user_ratings = pd.DataFrame(user_rating)
    new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

    # Define the reader
    reader = Reader(rating_scale=(1, 5))

    # Load the data from the DataFrame
    new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

    # Train a model using the new combined DataFrame
    svd_ = SVD(n_factors=50, reg_all=0.05)
    svd_.fit(new_data.build_full_trainset())

    # Generate movie recommendations for the user
    list_of_movies = []
    for m_id in movies_df['movieId'].unique():
        list_of_movies.append((m_id, svd_.predict(1000, m_id)[3]))

    # Order the recommendations from highest to lowest rated
    ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True)
# Print the top 5 movie recommendations for the user
    n = 5
    for idx, rec in enumerate(ranked_movies[:n]):
        title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
        print('Recommendation #', idx+1, ':', title)
    

      movieId                title          genres
7248    74154  When in Rome (2010)  Comedy|Romance
      movieId              title          genres
9015   140301  The Escort (2015)  Comedy|Romance
      movieId                       title  genres
3163     4255  Freddy Got Fingered (2001)  Comedy
      movieId                                              title  \
6299    48082  Science of Sleep, The (La science des rêves) (...   

                            genres  
6299  Comedy|Drama|Fantasy|Romance  
Recommendation # 1 : Shawshank Redemption, The (1994)
Recommendation # 2 : Lawrence of Arabia (1962)
Recommendation # 3 : Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
Recommendation # 4 : Usual Suspects, The (1995)
Recommendation # 5 : Princess Bride, The (1987)


In [71]:
  # Print the top 5 movie recommendations for the user
    n = 5
    for idx, rec in enumerate(ranked_movies[:n]):
        title = movies_df.loc[movies_df['movieId'] == int(rec[0]), 'title'].values[0]
        print('Recommendation #', idx+1, ':', title)

IndentationError: unexpected indent (<ipython-input-71-f5c2b906584f>, line 2)

In [114]:
import pandas as pd
from surprise import Dataset, Reader, SVD

# Function to get user ratings for movies
def movie_rater(movie_df, num, genre=None):
    userID = 1000  # Set a default userID for new users
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen, or type "escape" to see personalized recommendations:\n')
        if rating == 'n':
            continue
        elif rating.lower() == 'escape':
            return None  # Indicates the user wants personalized recommendations
        else:
            rating_one_movie = {'userId': userID, 'movieId': movie['movieId'].values[0], 'rating': float(rating)}
            rating_list.append(rating_one_movie)
            num -= 1
    return rating_list

# Function to recommend popular movies
def recommend_popular_movies(ratings_df, movies_df, genre, num_recommendations=5):
    # Calculate average ratings and number of ratings for each movie
    average_ratings = ratings_df.groupby('movieId')['rating'].mean()
    num_ratings = ratings_df.groupby('movieId')['rating'].count()

    # Create a DataFrame with movie popularity metrics
    popularity_df = pd.DataFrame({'average_rating': average_ratings, 'num_ratings': num_ratings})

    # Sort movies based on popularity metrics (e.g., average rating and number of ratings)
    popularity_df = popularity_df.sort_values(by=['average_rating', 'num_ratings'], ascending=False)

    # Filter movies by genre (if provided)
    if genre:
        popular_movies = movies_df[movies_df['genres'].str.contains(genre)]
        popular_movies = popular_movies.merge(popularity_df, on='movieId', how='left')
    else:
        popular_movies = popularity_df.merge(movies_df, on='movieId', how='left')

    # Get the top-rated or most popular movies from the sorted DataFrame
    top_movies = popular_movies.head(num_recommendations)

    # Return the recommended movies
    return top_movies['title']

# Load ratings and movies data
ratings_df = pd.read_csv('Data/ratings.csv')
movies_df = pd.read_csv('Data/movies.csv')

# Prompt the user to enter their user ID
user_id = input('Enter your user ID: ')

# Convert user_id to int data type
user_id = int(user_id)

# Check if the user already exists in the ratings dataset
if user_id in ratings_df['userId'].unique():
    # Get user ratings using the movie_rater function
    user_rating = movie_rater(movies_df, 4, genre='Comedy')

    if user_rating is None:
        # User wants personalized recommendations
        print("Personalized recommendations based on user's ratings")
        
        # Extract the user's ratings from the ratings dataframe
        user_ratings = ratings_df[ratings_df['userId'] == user_id]

        # Group the ratings by movie and calculate the average rating for each movie
        movie_ratings = user_ratings.groupby('movieId')['rating'].mean().reset_index()

        # Merge movie ratings with movie metadata
        personalized_movies = movie_ratings.merge(movies_df, on='movieId', how='left')

        # Sort the movies based on the average rating
        personalized_movies = personalized_movies.sort_values(by='rating', ascending=False)

        # Get the top 5 movie recommendations
        recommendations = personalized_movies['title'].head(5)

        # Print the recommendations
        for idx, rec in enumerate(recommendations):
            print(f"Recommendation #{idx+1}: {rec}")

    else:
        # User has provided ratings
        # Add the new ratings to the original ratings DataFrame
        user_ratings = pd.DataFrame(user_rating)
        new_ratings_df = pd.concat([ratings_df, user_ratings], ignore_index=True)

        # Define the reader
        reader = Reader(rating_scale=(1, 5))

        # Load the data from the DataFrame
        new_data = Dataset.load_from_df(new_ratings_df[['userId', 'movieId', 'rating']], reader)

        # Train the SVD model with the updated ratings
        svd = SVD()
        svd.fit(new_data.build_full_trainset())

        # Predict ratings for unrated movies
        predictions = []
        for movie_id in movies_df['movieId'].unique():
            predicted_rating = svd.predict(user_id, movie_id).est
            predictions.append((movie_id, predicted_rating))

        # Sort the predicted ratings
        ranked_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

        # Print the top 5 movie recommendations for the user
        print("Top 5 movie recommendations based on your ratings:")
        for idx, rec in enumerate(ranked_predictions[:5]):
            title = movies_df.loc[movies_df['movieId'] == rec[0], 'title'].values[0]
            print(f"Recommendation #{idx+1}: {title}")

else:
    # New user without any ratings
    print("Popular movie recommendations:")
    genre = input("Enter a genre to get recommendations based on that (leave blank for all genres): ")
    recommendations = recommend_popular_movies(ratings_df, movies_df, genre, num_recommendations=5)
    for idx, rec in enumerate(recommendations):
        print(f"Recommendation #{idx+1}: {rec}")

Popular movie recommendations:
Recommendation #1: Toy Story (1995)
Recommendation #2: Grumpier Old Men (1995)
Recommendation #3: Waiting to Exhale (1995)
Recommendation #4: Father of the Bride Part II (1995)
Recommendation #5: Sabrina (1995)
