In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")

# Step 1: Define Hardcoded Movie Data
# Create a list of movies with their details

In [2]:
movies_data = [
    {"movieId": "Movie_1", "title": "The Shawshank Redemption", "genres": "Drama"},
    {"movieId": "Movie_2", "title": "The Godfather", "genres": "Crime|Drama"},
    {"movieId": "Movie_3", "title": "The Dark Knight", "genres": "Action|Adventure"},
    {"movieId": "Movie_4", "title": "Pulp Fiction", "genres": "Crime|Drama"},
    {"movieId": "Movie_5", "title": "Forrest Gump", "genres": "Drama|Romance"},
    {"movieId": "Movie_6", "title": "Inception", "genres": "Action|Sci-Fi"},
    {"movieId": "Movie_7", "title": "The Matrix", "genres": "Action|Sci-Fi"},
    {"movieId": "Movie_8", "title": "Titanic", "genres": "Drama|Romance"},
    {"movieId": "Movie_9", "title": "Avatar", "genres": "Action|Adventure|Fantasy"},
    {"movieId": "Movie_10", "title": "Gladiator", "genres": "Action|Adventure|Drama"},
    {"movieId": "Movie_11", "title": "Jurassic Park", "genres": "Action|Adventure|Sci-Fi"},
    {"movieId": "Movie_12", "title": "Star Wars", "genres": "Action|Adventure|Sci-Fi"},
    {"movieId": "Movie_13", "title": "The Lion King", "genres": "Animation|Adventure|Drama"},
    {"movieId": "Movie_14", "title": "Toy Story", "genres": "Animation|Comedy|Family"},
    {"movieId": "Movie_15", "title": "Finding Nemo", "genres": "Animation|Adventure|Comedy"},
]

# Convert the list of movies into a DataFrame

In [3]:
movies = pd.DataFrame(movies_data)

# Create synthetic ratings data for users

In [4]:
np.random.seed(42)

# Synthetic user IDs

In [5]:
user_ids = [f"User_{i}" for i in range(1, 51)]

# Create a list of ratings

In [6]:
ratings_data = []
for user_id in user_ids:
    for movie in movies_data:
        if np.random.rand() < 0.6:  # Randomly assign ratings to simulate sparsity
            rating = np.random.randint(1, 6)
            ratings_data.append([user_id, movie["movieId"], rating])

# Convert the list of ratings into a DataFrame

In [7]:
ratings = pd.DataFrame(ratings_data, columns=["userId", "movieId", "rating"])

In [8]:
# Display sample data
print("Sample Movie Data:")
print(movies.head())
print("\nSample Ratings Data:")
print(ratings.head())

Sample Movie Data:
   movieId                     title            genres
0  Movie_1  The Shawshank Redemption             Drama
1  Movie_2             The Godfather       Crime|Drama
2  Movie_3           The Dark Knight  Action|Adventure
3  Movie_4              Pulp Fiction       Crime|Drama
4  Movie_5              Forrest Gump     Drama|Romance

Sample Ratings Data:
   userId  movieId  rating
0  User_1  Movie_1       5
1  User_1  Movie_2       5
2  User_1  Movie_3       2
3  User_1  Movie_4       3
4  User_1  Movie_5       5


In [9]:
# Preprocess ratings data
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [10]:

# Train SVD model
svd = SVD()
svd.fit(trainset)

# Evaluate the model
predictions = svd.test(testset)
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)
print(f"\nCollaborative Filtering RMSE: {rmse}")
print(f"Collaborative Filtering MAE: {mae}")

RMSE: 1.4093
MAE:  1.2271

Collaborative Filtering RMSE: 1.4092911080612185
Collaborative Filtering MAE: 1.22711193252963


# Function to recommend movies based on collaborative filtering

In [11]:
def get_collaborative_recommendations(user_id, n_recommendations=10):
    # Get list of all movieIds
    all_movie_ids = movies['movieId'].unique()

    # Predict ratings for all movies not rated by the user
    user_movies = ratings[ratings['userId'] == user_id]['movieId']
    unrated_movies = [movie_id for movie_id in all_movie_ids if movie_id not in user_movies]

    predictions = [(movie_id, svd.predict(user_id, movie_id).est) for movie_id in unrated_movies]

    # Sort predictions by estimated rating
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Get top N recommendations
    top_recommendations = predictions[:n_recommendations]

    # Map movieIds to movie titles
    recommended_movies = []
    for movie_id, rating in top_recommendations:
        title = movies[movies['movieId'] == movie_id]['title'].iloc[0]
        recommended_movies.append((title, rating))

    return recommended_movies

In [12]:
# Preprocess movies data
movies['genres'] = movies['genres'].str.replace('|', ' ')
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute cosine similarity matrix

In [13]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend movies based on content-based filtering

In [14]:
def get_content_based_recommendations(movie_title, n_recommendations=10):
    # Get index of the movie
    idx = movies[movies['title'] == movie_title].index[0]

    # Get similarity scores for the movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort movies by similarity score
    sim_scores.sort(key=lambda x: x[1], reverse=True)

    # Get top N similar movies
    top_indices = [i[0] for i in sim_scores[1:n_recommendations+1]]

    # Get movie titles
    recommended_movies = movies.iloc[top_indices]['title'].tolist()

    return recommended_movies

# Combine collaborative and content-based filtering

In [15]:
def get_hybrid_recommendations(user_id, movie_title, n_recommendations=10):
    # Get collaborative filtering recommendations
    collab_recs = get_collaborative_recommendations(user_id, n_recommendations=20)

    # Get content-based filtering recommendations
    content_recs = get_content_based_recommendations(movie_title, n_recommendations=20)

    # Combine recommendations
    combined_recs = list(set([rec[0] for rec in collab_recs] + content_recs))

    # Limit to top N recommendations
    return combined_recs[:n_recommendations]

# Feature 1: Cold Start Problem Handling

In [16]:
def handle_cold_start(user_id, n_recommendations=10):
    # Check if the user has rated any movies
    if user_id not in ratings['userId'].unique():
        # Recommend popular movies
        popular_movies = ratings.groupby('movieId')['rating'].count().sort_values(ascending=False).head(n_recommendations)
        recommended_movies = movies[movies['movieId'].isin(popular_movies.index)]['title'].tolist()
        return recommended_movies
    else:
        # Use collaborative filtering for existing users
        return [rec[0] for rec in get_collaborative_recommendations(user_id, n_recommendations)]

# Feature 2: Genre-Based Recommendations

In [17]:
def get_genre_based_recommendations(genre, n_recommendations=10):
    genre_movies = movies[movies['genres'].str.contains(genre)]
    genre_movies = genre_movies.sample(frac=1).reset_index(drop=True)  # Shuffle
    return genre_movies['title'].head(n_recommendations).tolist()

# Feature 3: User Similarity Analysis

In [18]:
def find_similar_users(user_id, n_users=5):
    user_ratings = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    user_similarity = cosine_similarity(user_ratings)
    user_similarity_df = pd.DataFrame(user_similarity, index=user_ratings.index, columns=user_ratings.index)

    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:n_users+1]
    return similar_users.tolist()

# Feature 4: Visualization of Recommendations

In [19]:
def visualize_recommendations(recommendations):
    titles, ratings = zip(*recommendations)
    plt.figure(figsize=(10, 6))
    plt.barh(titles, ratings, color='skyblue')
    plt.xlabel('Predicted Rating')
    plt.title('Top Recommendations')
    plt.gca().invert_yaxis()
    plt.show()

 Feature 5: Save Recommendations to CSV

In [20]:
def save_recommendations_to_csv(recommendations, filename='recommendations.csv'):
    df = pd.DataFrame(recommendations, columns=['Title', 'Predicted Rating'])
    df.to_csv(filename, index=False)
    print(f"\nRecommendations saved to {filename}.")

# Feature 6: Batch Recommendations for Multiple Users

In [21]:
def batch_recommendations(user_ids, n_recommendations=10):
    results = {}
    for user_id in user_ids:
        recommendations = get_collaborative_recommendations(user_id, n_recommendations)
        results[user_id] = recommendations
    return results

# Feature 7: Advanced Evaluation Metrics

In [22]:
def evaluate_model(model, testset):
    predictions = model.test(testset)
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)
    precision_at_k = calculate_precision_at_k(predictions, k=10)
    recall_at_k = calculate_recall_at_k(predictions, k=10)
    return {
        'RMSE': rmse,
        'MAE': mae,
        'Precision@K': precision_at_k,
        'Recall@K': recall_at_k
    }

def calculate_precision_at_k(predictions, k=10, threshold=3.5):
    user_est_true = {}
    for uid, _, true_r, est, _ in predictions:
        if uid not in user_est_true:
            user_est_true[uid] = []
        user_est_true[uid].append((est, true_r))

    precisions = []
    for uid, user_ratings in user_est_true.items():
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        top_k = user_ratings[:k]
        relevant = sum((true_r >= threshold) for (_, true_r) in top_k)
        precisions.append(relevant / k)

    return np.mean(precisions)

def calculate_recall_at_k(predictions, k=10, threshold=3.5):
    user_est_true = {}
    for uid, _, true_r, est, _ in predictions:
        if uid not in user_est_true:
            user_est_true[uid] = []
        user_est_true[uid].append((est, true_r))

    recalls = []
    for uid, user_ratings in user_est_true.items():
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        top_k = user_ratings[:k]
        relevant = sum((true_r >= threshold) for (_, true_r) in user_ratings)
        if relevant > 0:
            recall = sum((true_r >= threshold) for (_, true_r) in top_k) / relevant
            recalls.append(recall)

    return np.mean(recalls)


# Additional Helper Functions

In [23]:
def get_movie_details(movie_title):
    """Get details of a specific movie."""
    movie = movies[movies['title'] == movie_title]
    if not movie.empty:
        return movie.to_dict(orient='records')[0]
    return None

def get_user_profile(user_id):
    """Get profile of a specific user."""
    user_ratings = ratings[ratings['userId'] == user_id]
    if not user_ratings.empty:
        return user_ratings.merge(movies, on='movieId').to_dict(orient='records')
    return None

def get_top_rated_movies_by_user(user_id, n_movies=5):
    """Get top-rated movies by a specific user."""
    user_ratings = ratings[ratings['userId'] == user_id]
    top_movies = user_ratings.sort_values(by='rating', ascending=False).head(n_movies)
    return top_movies.merge(movies, on='movieId')['title'].tolist()

def get_least_rated_movies_by_user(user_id, n_movies=5):
    """Get least-rated movies by a specific user."""
    user_ratings = ratings[ratings['userId'] == user_id]
    least_movies = user_ratings.sort_values(by='rating', ascending=True).head(n_movies)
    return least_movies.merge(movies, on='movieId')['title'].tolist()


# Main function for Bangla Input/Output

In [24]:
def main():
    print("\nWelcome! This system will provide movie recommendations.")
    print("Please choose from the following options:")
    print("1. Get recommendations using Collaborative Filtering")
    print("2. Get recommendations using Content-Based Filtering")
    print("3. Get recommendations using Hybrid Filtering")
    print("4. Get recommendations based on Genre")
    print("5. Handle Cold Start Problem")
    print("6. Analyze User Similarity")
    print("7. Get Batch Recommendations")
    print("8. Evaluate Model")
    choice = input("Enter your choice (1/2/3/4/5/6/7/8): ")

    if choice == "1":
        user_id = input("Enter your User ID (e.g., User_1): ")
        n_recommendations = int(input("How many recommendations do you want? (Default 10): ") or 10)
        recommendations = get_collaborative_recommendations(user_id, n_recommendations)
        print("\nYour Top Recommendations (Collaborative Filtering):")
        for title, rating in recommendations:
            print(f"{title} (Predicted Rating: {rating:.2f})")
        visualize_recommendations(recommendations)

    elif choice == "2":
        movie_title = input("Enter your favorite movie title: ")
        n_recommendations = int(input("How many recommendations do you want? (Default 10): ") or 10)
        recommendations = get_content_based_recommendations(movie_title, n_recommendations)
        print("\nYour Top Recommendations (Content-Based Filtering):")
        for title in recommendations:
            print(title)

    elif choice == "3":
        user_id = input("Enter your User ID (e.g., User_1): ")
        movie_title = input("Enter your favorite movie title: ")
        n_recommendations = int(input("How many recommendations do you want? (Default 10): ") or 10)
        recommendations = get_hybrid_recommendations(user_id, movie_title, n_recommendations)
        print("\nYour Top Recommendations (Hybrid Filtering):")
        for title in recommendations:
            print(title)

    elif choice == "4":
        genre = input("Enter your favorite genre (e.g., Action): ")
        n_recommendations = int(input("How many recommendations do you want? (Default 10): ") or 10)
        recommendations = get_genre_based_recommendations(genre, n_recommendations)
        print("\nYour Top Recommendations (Genre-Based):")
        for title in recommendations:
            print(title)

    elif choice == "5":
        user_id = input("Enter your User ID (e.g., User_1): ")
        n_recommendations = int(input("How many recommendations do you want? (Default 10): ") or 10)
        recommendations = handle_cold_start(user_id, n_recommendations)
        print("\nYour Top Recommendations (Cold Start Solution):")
        for title in recommendations:
            print(title)

    elif choice == "6":
        user_id = input("Enter your User ID (e.g., User_1): ")
        n_users = int(input("How many similar users do you want? (Default 5): ") or 5)
        similar_users = find_similar_users(user_id, n_users)
        print("\nYour Similar Users:")
        for user in similar_users:
            print(user)

    elif choice == "7":
        user_ids = input("Enter User IDs (comma-separated, e.g., User_1,User_2): ").split(",")
        n_recommendations = int(input("How many recommendations do you want? (Default 10): ") or 10)
        batch_results = batch_recommendations(user_ids, n_recommendations)
        print("\nBatch Recommendations:")
        for user_id, recs in batch_results.items():
            print(f"\nUser {user_id}:")
            for title, rating in recs:
                print(f"{title} (Predicted Rating: {rating:.2f})")

    elif choice == "8":
        evaluation_results = evaluate_model(svd, testset)
        print("\nModel Evaluation Results:")
        for metric, value in evaluation_results.items():
            print(f"{metric}: {value:.4f}")

    else:
        print("Invalid option! Please try again.")


In [25]:
if __name__ == "__main__":
    main()


Welcome! This system will provide movie recommendations.
Please choose from the following options:
1. Get recommendations using Collaborative Filtering
2. Get recommendations using Content-Based Filtering
3. Get recommendations using Hybrid Filtering
4. Get recommendations based on Genre
5. Handle Cold Start Problem
6. Analyze User Similarity
7. Get Batch Recommendations
8. Evaluate Model


StdinNotImplementedError: raw_input was called, but this frontend does not support input requests.