In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load the MovieLens dataset 
movies_df = pd.read_csv('movies.csv')

# Display the first few rows of the dataset
print("Sample of the MovieLens dataset:")
print(movies_df.head())

# Function to create a content-based recommendation system
def content_based_recommendation(user_preferences, movies_df):
    # Combine relevant features into a single column
    movies_df['Features'] = movies_df['genres']

    # Create a TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df['Features'])

    # Calculate the cosine similarity between movies
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Get indices of movies
    indices = pd.Series(movies_df.index, index=movies_df['title'])
    genre_indices = pd.Series(movies_df.index, index=movies_df['genres'])

    # Function to get movie names based on a given genre
    def get_names_by_genre(genre):
        try:
            idx = genre_indices[genre]
            sim_scores = sorted(list(enumerate(cosine_sim[idx].flatten())), key=lambda x: x[1], reverse=True)
        
            # Filter out invalid indices
            valid_indices = [i[0] for i in sim_scores if i[0] < len(movies_df)]
        
            movie_names = movies_df['title'].iloc[valid_indices]
            return movie_names
        except KeyError:
            print(f"No movies found for the genre '{genre}'.")
            return pd.Series()


    # Get movie names based on user preferences
    user_recommendations = pd.Series()
    for preference in user_preferences:
        user_recommendations = user_recommendations.append(get_names_by_genre(preference))

    return user_recommendations.unique()

# Get user preferences dynamically
user_preferences = input("Enter your movie preferences (comma-separated genres): ").split(',')

# Get content-based recommendations for the user
recommendations = content_based_recommendation(user_preferences, movies_df)

# Display a subset of recommendations
if len(recommendations) > 10:
    print("Recommended Movie Indices:")
    print(recommendations[:10])
else:
    print("Recommended Movie Indices:")
    print(recommendations)
        


Sample of the MovieLens dataset:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
Enter your movie preferences (comma-separated genres): Comedy,Drama


  user_recommendations = pd.Series()
  user_recommendations = user_recommendations.append(get_names_by_genre(preference))


Recommended Movie Indices:
['Father of the Bride Part II (1995)' 'Four Rooms (1995)'
 'Ace Ventura: When Nature Calls (1995)' 'Bio-Dome (1996)' 'Friday (1995)'
 'Black Sheep (1996)' 'Mr. Wrong (1996)' 'Happy Gilmore (1996)'
 'Steal Big, Steal Little (1995)' 'Flirting With Disaster (1996)']


  user_recommendations = user_recommendations.append(get_names_by_genre(preference))
