In [8]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
import matplotlib.pyplot as plt


class RecommenderSystem:
    """
    Class for content and collaborative
    filtering recommendations using CSV snapshots.
    """

    def __init__(self, data_path=""):
        # Initialize with path to CSV snapshots
        self.data_path = data_path
        self.movies = self.load_movies()
        self.ratings = self.load_ratings()

    def load_movies(self):
        """
        Load movie data from the snapshot CSV file.
        """

        try:
            movies_df = pd.read_csv(f"{self.data_path}/movies.csv")
            return movies_df
        except FileNotFoundError as e:
            print(f"Error loading movies data: {e}")
            return pd.DataFrame()

    def load_ratings(self):
        """
        Load ratings data from the snapshot CSV file.
        """

        try:
            ratings_df = pd.read_csv(f"{self.data_path}/ratings.csv")
            return ratings_df
        except FileNotFoundError as e:
            print(f"Error loading ratings data: {e}")
            return pd.DataFrame()

    def content_based_filtering(self, movie_title, top_n=10):
        """
        Content-based filtering recommendation
        based on movie genres.
        """

        tfidf = TfidfVectorizer(stop_words="english")
        tfidf_matrix = tfidf.fit_transform(self.movies["genres"])

        cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

        # Find the index of the movie that matches the title
        try:
            idx = self.movies[self.movies["title"] == movie_title].index[0]
        except IndexError:
            return [f"Movie titled '{movie_title}' not found in the database."]

        # Get similarity scores for all movies
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        # Get the top-n movie indices based on similarity
        movie_indices = [i[0] for i in sim_scores[1 : top_n + 1]]
        return self.movies["title"].iloc[movie_indices].tolist()

    def collaborative_filtering(self):
        """
        Train a collaborative filtering model
        using SVD from the Surprise library.
        """

        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(
            self.ratings[["user_id", "movie_id", "rating"]], reader
        )

        # Split data into training and testing sets
        trainset, testset = train_test_split(data, test_size=0.25)
        svd = SVD()
        svd.fit(trainset)

        # Test the model
        predictions = svd.test(testset)
        return svd, predictions

    def recommend_movies(self, user_id, svd_model, top_n=10):
        """
        Recommend top-n movies for a given
        user using collaborative filtering.
        """

        all_movie_ids = self.ratings["movie_id"].unique()
        rated_movie_ids = self.ratings[self.ratings["user_id"] == user_id]["movie_id"]
        movies_not_rated = set(all_movie_ids) - set(rated_movie_ids)

        predictions = [
            (movie_id, svd_model.predict(user_id, movie_id).est)
            for movie_id in movies_not_rated
        ]

        # Sort movies by predicted rating and select the top recommendations
        predictions.sort(key=lambda x: x[1], reverse=True)
        recommended_movie_ids = [movie_id for movie_id, _ in predictions[:top_n]]

        recommended_movies = self.movies[
            self.movies["movie_id"].isin(recommended_movie_ids)
        ]
        return recommended_movies["title"].tolist()

In [None]:
class Command:
    """
    This command runs the recommender system
    """

    def __init__(self, data_path="data/"):
        # Initialize the recommender and run collaborative filtering
        self.recommender = RecommenderSystem(data_path)
        self.svd_model, self.predictions = self.recommender.collaborative_filtering()

    def run(self):
        # Content-based recommendations
        content_recommendations = self.recommender.content_based_filtering(
            "Toy Story (1995)"
        )
        print("Content-based Recommendations:", ", ".join(content_recommendations))
        print(pd.DataFrame(content_recommendations, columns=["Recommendations"]))

        # Collaborative filtering recommendations
        collab_recommendations = self.recommender.recommend_movies(1, self.svd_model)
        print("\nCollaborative Filtering Recommendations:")
        print(pd.DataFrame(collab_recommendations, columns=["Recommendations"]))


if __name__ == "__main__":
    Command(data_path="data/").run()