In [None]:
# task 4 - Recommendation system

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

def content_based_recommendations(movie_title, movie_data, tfidf_matrix):
    """ Recommend movies similar to the given movie title based on content."""
    indices = pd.Series(movie_data.index, index=movie_data['title']).drop_duplicates()
    idx = indices[movie_title]
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6]
    movie_indices = [i[0] for i in sim_scores]
    return movie_data['title'].iloc[movie_indices]

def collaborative_filtering_recommendations(user_id, ratings, movie_data):
    """ Recommend movies based on collaborative filtering using k-NN."""
    user_movie_ratings = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    sparse_matrix = csr_matrix(user_movie_ratings)
    model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
    model_knn.fit(sparse_matrix)
    user_index = user_id - 1
    n_neighbors = min(5, len(user_movie_ratings))  # Ensure n_neighbors is valid
    distances, indices = model_knn.kneighbors(user_movie_ratings.iloc[user_index, :].values.reshape(1, -1), n_neighbors=n_neighbors)
    recommended_movies = []
    for idx in indices[0]:
        if idx != user_index:
            recommended_movies.extend(ratings[ratings['userId'] == idx + 1]['movieId'].tolist())
    recommended_movies = list(set(recommended_movies))[:5]
    return movie_data[movie_data['movieId'].isin(recommended_movies)]['title']

if __name__ == "__main__":
    # Load sample data
    movies = pd.DataFrame({
        'movieId': [1, 2, 3, 4, 5],
        'title': ["Movie A", "Movie B", "Movie C", "Movie D", "Movie E"],
        'genres': ["Action Adventure", "Romance Drama", "Sci-Fi", "Action Thriller", "Comedy"]
    })
    
    ratings = pd.DataFrame({
        'userId': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
        'movieId': [1, 2, 2, 3, 3, 4, 4, 5, 5, 1],
        'rating': [5, 4, 4, 5, 3, 4, 5, 3, 4, 2]
    })
    
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movies['genres'])
    
    print("Content-Based Recommendations for 'Movie A':")
    print(content_based_recommendations("Movie A", movies, tfidf_matrix))
    
    print("Collaborative Filtering Recommendations for User 1:")
    print(collaborative_filtering_recommendations(1, ratings, movies))
