In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
movies_df = pd.read_csv('tmdb_5000_movies.csv')
movies_df['overview'] = movies_df['overview'].fillna('')
movies_df['genres'] = movies_df['genres'].fillna('')

def combine_features(row):
    return row['overview'] + ' ' + ' '.join([genre['name'] for genre in eval(row['genres'])])

movies_df['combined_features'] = movies_df.apply(combine_features, axis=1)


In [3]:
tfidf = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf.fit_transform(movies_df['combined_features'])

In [4]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [5]:
def get_recommendations(movie_list, cosine_sim=cosine_sim):
    indices = [movies_df[movies_df['title'] == title].index[0] for title in movie_list if title in movies_df['title'].values]
    sim_scores = sum(cosine_sim[idx] for idx in indices) / len(indices)
    sim_scores = list(enumerate(sim_scores))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return movies_df['title'].iloc[movie_indices]


In [6]:
user_input = input("Enter the movies you have seen, separated by commas: ")
user_movies = [movie.strip() for movie in user_input.split(',')]

# recommendations
recommended_movies = get_recommendations(user_movies)

# Display recommendation
print("Recommendations for you:")
print(recommended_movies)

Enter the movies you have seen, separated by commas:  Avatar


Recommendations for you:
3604                       Apollo 18
4401             The Helix... Loaded
634                       The Matrix
2130                    The American
1341            The Inhabited Island
529                 Tears of the Sun
311     The Adventures of Pluto Nash
942                 The Book of Life
1610                           Hanna
2628             Blood and Chocolate
Name: title, dtype: object
