<a href="https://colab.research.google.com/github/om123-collab/playground-f4tod5ym/blob/master/AI_Projects_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

def create_content_based_recommender(df):
  """
  Creates a content-based recommender system.

  Args:
    df: Pandas DataFrame containing movie data with columns:
       - 'title': Movie title
       - 'genres': Movie genres (comma-separated string)

  Returns:
    A function that takes a movie title as input and returns a list of
    recommended movie titles.
  """

  # Create a CountVectorizer object to convert text to numerical features
  cv = CountVectorizer(stop_words='english')

  # Create a document-term matrix
  vectorized_genres = cv.fit_transform(df['genres'])

  # Calculate cosine similarity between movies
  cosine_sim = cosine_similarity(vectorized_genres, vectorized_genres)

  def recommend_movies(title, cosine_sim=cosine_sim, df=df, k=6):
    """
    Recommends movies based on content similarity.

    Args:
      title: Title of the movie to recommend for.
      cosine_sim: Cosine similarity matrix.
      df: Movie DataFrame.
      k: Number of recommendations to return.

    Returns:
      A list of recommended movie titles.
    """

    # Get index of the movie in the DataFrame
    idx = df[df['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with the given movie
    scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on similarity scores
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the top k most similar movies
    movie_indices = [i[0] for i in scores[1:k+1]]

    # Return the top k recommended movie titles
    return df['title'].iloc[movie_indices].tolist()

  return recommend_movies

# Sample Movie Data (Replace with your actual data)
movie_data = pd.DataFrame({
    'title': ['The Shawshank Redemption', 'The Godfather', 'The Dark Knight', '12 Angry Men', 'Pulp Fiction'],
    'genres': ['Drama', 'Crime, Drama', 'Action, Thriller', 'Drama, Crime', 'Crime, Thriller']
})

# Create the recommendation function
recommend_movies_func = create_content_based_recommender(movie_data)

# Get recommendations for a movie
recommendations = recommend_movies_func('The Dark Knight')
print(f"Recommended movies for 'The Dark Knight': {recommendations}")

Recommended movies for 'The Dark Knight': ['Pulp Fiction', 'The Shawshank Redemption', 'The Godfather', '12 Angry Men']
