In [46]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
movie_data = {
    'movie_id': ['movie1', 'movie2', 'movie3', 'movie4', 'movie5'],
    'title': ['The Avengers', 'Notting Hill', 'Interstellar', 'The Shawshank Redemption', 'The Conjuring'],
    'genre': [['action', 'adventure'], ['comedy', 'romance'], ['action', 'sci-fi'], ['drama', 'crime'], ['horror', 'thriller']],
    'director': ['Joss Whedon', 'Roger Michell', 'Christopher Nolan', 'Frank Darabont', 'James Wan'],
    'actor': [['Robert Downey Jr.', 'Chris Evans'], ['Julia Roberts', 'Hugh Grant'], ['Matthew McConaughey', 'Anne Hathaway'], ['Tim Robbins', 'Morgan Freeman'], ['Patrick Wilson', 'Vera Farmiga']],
    'rating': [4.5, 3.8, 4.2, 4.7, 3.9]
}
movies = pd.DataFrame(movie_data)
print(movies)


  movie_id                     title                genre           director  \
0   movie1              The Avengers  [action, adventure]        Joss Whedon   
1   movie2              Notting Hill    [comedy, romance]      Roger Michell   
2   movie3              Interstellar     [action, sci-fi]  Christopher Nolan   
3   movie4  The Shawshank Redemption       [drama, crime]     Frank Darabont   
4   movie5             The Conjuring   [horror, thriller]          James Wan   

                                  actor  rating  
0      [Robert Downey Jr., Chris Evans]     4.5  
1           [Julia Roberts, Hugh Grant]     3.8  
2  [Matthew McConaughey, Anne Hathaway]     4.2  
3         [Tim Robbins, Morgan Freeman]     4.7  
4        [Patrick Wilson, Vera Farmiga]     3.9  


In [47]:
def get_item_similarity(movie1, movie2):
    """
    Calculate the cosine similarity between two movies based on their features.
    
    Parameters:
    movie1 (str): The ID of the first movie.
    movie2 (str): The ID of the second movie.
    
    Returns:
    float: The cosine similarity between the two movies.
    """
    movie1_features = movies.loc[movies['movie_id'] == movie1, ['genre', 'director', 'actor']].values[0]
    movie2_features = movies.loc[movies['movie_id'] == movie2, ['genre', 'director', 'actor']].values[0]
    
    feature_values = list(set([item for sublist in movie1_features + movie2_features for item in sublist]))
    movie1_vector = [1 if feature in movie1_features else 0 for feature in feature_values]
    movie2_vector = [1 if feature in movie2_features else 0 for feature in feature_values]
    
    return cosine_similarity([movie1_vector], [movie2_vector])[0][0]


In [48]:
def get_content_based_recommendations(target_movie):
    """
    Get the top 3 movie recommendations based on content-based filtering.
    
    Parameters:
    target_movie (str): The ID of the target movie.
    
    Returns:
    list: A list of the titles of the top 3 recommended movies.
    """
    similarities = [(movie, get_item_similarity(target_movie, movie)) for movie in movies['movie_id'] if movie != target_movie]
    similarities.sort(key=lambda x: x[1], reverse=True)
    
    return [movies.loc[movies['movie_id'] == movie, 'title'].values[0] for movie in [movie for movie, _ in similarities[:3]]]


In [51]:

target_movie = 'movie1'
recommendations = get_content_based_recommendations(target_movie)

print(f"Content-Based Recommendations for '{movies.loc[movies['movie_id'] == target_movie, 'title'].values[0]}':")
for movie in recommendations:
    print(f"- {movie}")

Content-Based Recommendations for 'The Avengers':
- Notting Hill
- Interstellar
- The Shawshank Redemption
