In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

def create_and_load_data():
    """Creates a small, synthetic movie dataset."""
    data = {
        'MovieID': [1, 2, 3, 4, 5, 6, 7],
        'Title': [
            'Toy Story',
            'Jumanji',
            'Heat',
            'Pulp Fiction',
            'Braveheart',
            'The Dark Knight',
            'Avatar'
        ],
        'Genres': [
            'Animation|Children|Comedy',
            'Adventure|Fantasy',
            'Action|Crime|Drama',
            'Crime|Drama',
            'Action|Drama|War',
            'Action|Crime|Drama',
            'Action|Adventure|Sci-Fi'
        ]
    }
    df = pd.DataFrame(data)
    # Replace pipe separator with spaces for better TF-IDF tokenization
    df['Genres'] = df['Genres'].str.replace('|', ' ')
    return df

def build_content_model(movies_df):
    """
    Builds the Content-Based Filtering model artifacts (TF-IDF and Cosine Similarity).
    """
    # 1. Initialize TF-IDF Vectorizer
    # TF-IDF converts text (genres) into a numerical feature vector
    tfidf = TfidfVectorizer(stop_words='english')

    # 2. Fit and transform the 'Genres' data
    tfidf_matrix = tfidf.fit_transform(movies_df['Genres'])

    # 3. Compute the Cosine Similarity Matrix
    # linear_kernel is used for fast calculation of the dot product (cosine similarity)
    # between normalized vectors.
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    return cosine_sim

def get_recommendations(title, movies_df, cosine_sim, num_recommendations=5):
    """
    Generates movie recommendations based on genre similarity (Content-Based Filtering).
    """
    # 1. Create a Series mapping movie titles to their index in the DataFrame
    indices = pd.Series(movies_df.index, index=movies_df['Title'])

    if title not in indices:
        print(f"Error: Movie '{title}' not found in the dataset.")
        return []

    # 2. Get the index of the movie that matches the title
    idx = indices[title]

    # 3. Get the pairwise similarity scores for the target movie
    # Enumerate helps keep track of the original index
    sim_scores = list(enumerate(cosine_sim[idx]))

    # 4. Sort the movies based on the similarity scores in descending order
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # 5. Get the top N scores (excluding the movie itself at index 0)
    sim_scores = sim_scores[1:num_recommendations + 1]

    # 6. Get the movie indices from the sorted list
    movie_indices = [i[0] for i in sim_scores]

    # 7. Return the titles of the recommended movies
    return movies_df['Title'].iloc[movie_indices].tolist()

def main():
    """Main function to run the recommender system."""

    print("--- Hybrid Movie Recommendation System (Simple Demo) ---")

    # 1. Data Preparation
    movies_df = create_and_load_data()
    print("\n[INFO] Loaded Dataset:")
    print(movies_df[['Title', 'Genres']])

    # 2. Model Building
    cosine_sim_matrix = build_content_model(movies_df)
    print("\n[INFO] Content-Based Model Built (Cosine Similarity Matrix).")

    # 3. User Interaction (Input)
    print("\nAvailable movies to choose from:")
    print(", ".join(movies_df['Title'].tolist()))

    # Get user input and normalize case
    user_input = input("\nEnter a movie title from the list to get recommendations: ").strip()

    # Find the closest matching title in the dataset
    try:
        # Simple case-insensitive lookup, assuming exact match for this demo
        target_title = movies_df[movies_df['Title'].str.lower() == user_input.lower()]['Title'].iloc[0]
    except IndexError:
        print(f"\n[RESULT] Sorry, '{user_input}' was not found in our small database.")
        return

    # 4. Generate Recommendations (Output)
    recommendations = get_recommendations(target_title, movies_df, cosine_sim_matrix, num_recommendations=3)

    print("\n--- TOP RECOMMENDATIONS ---")
    if recommendations:
        print(f"Based on your selection: **{target_title}** (Genre: {movies_df[movies_df['Title'] == target_title]['Genres'].iloc[0]})")
        print("Your 3 recommended movies are:")
        for i, movie in enumerate(recommendations):
            genres = movies_df[movies_df['Title'] == movie]['Genres'].iloc[0]
            print(f"  {i+1}. **{movie}** (Genres: {genres})")
    else:
        print("No recommendations could be generated.")

# Execute the main function
if __name__ == '__main__':
    main()

--- Hybrid Movie Recommendation System (Simple Demo) ---

[INFO] Loaded Dataset:
             Title                     Genres
0        Toy Story  Animation Children Comedy
1          Jumanji          Adventure Fantasy
2             Heat         Action Crime Drama
3     Pulp Fiction                Crime Drama
4       Braveheart           Action Drama War
5  The Dark Knight         Action Crime Drama
6           Avatar    Action Adventure Sci-Fi

[INFO] Content-Based Model Built (Cosine Similarity Matrix).

Available movies to choose from:
Toy Story, Jumanji, Heat, Pulp Fiction, Braveheart, The Dark Knight, Avatar
