In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split


class RecommenderSystem:
    """
    Class for content and collaborative
    filtering recommendations using direct
    PostgreSQL querying.
    """

    def __init__(self):
        # Establish a connection to PostgreSQL using SQLAlchemy
        db_uri = "postgresql://timeless:password@localhost:5432/devdb"
        self.engine = create_engine(db_uri)
        self.movies = self.load_movies()
        self.ratings = self.load_ratings()

    def load_movies(self):
        """
        Directly load movie data from
        PostgreSQL into a DataFrame.
        """

        query = "SELECT movie_id, title, genres FROM core_movie;"
        movies_df = pd.read_sql_query(query, self.engine)
        return movies_df

    def load_ratings(self):
        """
        Directly load ratings data from
        PostgreSQL into a DataFrame.
        """

        query = "SELECT user_id, movie_id, rating FROM core_ratings;"
        ratings_df = pd.read_sql_query(query, self.engine)
        return ratings_df

    def content_based_filtering(self, movie_title, top_n=10):
        """
        Content-based filtering recommendation
        based on movie genres.
        """

        tfidf = TfidfVectorizer(stop_words="english")
        tfidf_matrix = tfidf.fit_transform(self.movies["genres"])

        cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

        # Find the index of the movie that matches the title
        try:
            idx = self.movies[self.movies["title"] == movie_title].index[0]
        except IndexError:
            return f"Movie titled '{movie_title}' not found in the database."

        # Get similarity scores for all movies
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        # Get the top-n movie indices based on similarity
        movie_indices = [i[0] for i in sim_scores[1 : top_n + 1]]
        return self.movies["title"].iloc[movie_indices]

    def collaborative_filtering(self):
        """
        Train a collaborative filtering model
        using SVD from the Surprise library.
        """

        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(
            self.ratings[["user_id", "movie_id", "rating"]], reader
        )

        # Split data into training and testing sets
        trainset, testset = train_test_split(data, test_size=0.25)
        svd = SVD()
        svd.fit(trainset)

        # Test the model
        predictions = svd.test(testset)
        return svd, predictions

    def recommend_movies(self, user_id, svd_model, top_n=10):
        """
        Recommend top-n movies for a given
        user using collaborative filtering.
        """

        all_movie_ids = self.ratings["movie_id"].unique()
        rated_movie_ids = self.ratings[self.ratings["user_id"] == user_id]["movie_id"]
        movies_not_rated = set(all_movie_ids) - set(rated_movie_ids)

        predictions = [
            (movie_id, svd_model.predict(user_id, movie_id).est)
            for movie_id in movies_not_rated
        ]

        # Sort movies by predicted rating and select the top recommendations
        predictions.sort(key=lambda x: x[1], reverse=True)
        recommended_movie_ids = [movie_id for movie_id, _ in predictions[:top_n]]

        recommended_movies = self.movies[
            self.movies["movie_id"].isin(recommended_movie_ids)
        ]
        return recommended_movies["title"].tolist()

In [2]:
from textwrap import fill
import pandas as pd


class Command:
    """
    This command runs the recommender system
    """

    def __init__(self):
        # Initialize the recommender and run collaborative filtering
        self.recommender = RecommenderSystem()
        self.svd_model, self.predictions = self.recommender.collaborative_filtering()

    def run(self):
        # Content-based recommendations
        content_recommendations = self.recommender.content_based_filtering(
            "Toy Story (1995)"
        )
        print("Content-based Recommendations:")

        # Format the output using textwrap
        print(fill(", ".join(content_recommendations), width=80))

        # Collaborative filtering recommendations
        collab_recommendations = self.recommender.recommend_movies(1, self.svd_model)
        print("\nCollaborative Filtering Recommendations:")

        # Display as a DataFrame for better structure
        recommendations_df = pd.DataFrame(
            collab_recommendations, columns=["Recommendations"]
        )
        print(recommendations_df)


if __name__ == "__main__":
    Command().run()

Content-based Recommendations:
DuckTales: The Movie - Treasure of the Lost Lamp (1990), Wild, The (2006), Tale
of Despereaux, The (2008), Asterix and the Vikings (Astérix et les Vikings)
(2006), Aladdin (1992), Boxtrolls, The (2014), Brother Bear 2 (2006), Moana
(2016), Here Comes the Grump (2018), Frozen II (2019)

Collaborative Filtering Recommendations:
                           Recommendations
0         Shawshank Redemption, The (1994)
1                        Streetwise (1984)
2  The Work of Director Spike Jonze (2003)
3                  Band of Brothers (2001)
4                                   Cosmos
5                  Twelve Angry Men (1954)
6    National Theatre Live: Fleabag (2019)
7                          The Mole (2020)
8                   Planet Earth II (2016)
9                      Planet Earth (2006)
