In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
df = pd.read_csv("../Data/movie_dataset.csv")

# Fill missing values
df = df.fillna('')

# Combine important features into a single column
def combine_features(row):
    return row['keywords'] + " " + row['cast'] + " " + row['genres'] + " " + row['director']

df["combined_features"] = df.apply(combine_features, axis=1)

# Convert text to vectors
vectorizer = CountVectorizer()
count_matrix = vectorizer.fit_transform(df["combined_features"])

# Compute cosine similarity
cosine_sim = cosine_similarity(count_matrix)

# Function to get movie recommendations
def get_recommendations(movie_title):
    movie_index = df[df.title == movie_title].index[0]
    similar_movies = list(enumerate(cosine_sim[movie_index]))
    sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:6]
    
    print(f"Top 5 recommendations for '{movie_title}':")
    for i in sorted_similar_movies:
        print(df.iloc[i[0]]['title'])

# Example
get_recommendations("Avatar")

Top 5 recommendations for 'Avatar':
Guardians of the Galaxy
Aliens
Star Wars: Clone Wars: Volume 1
Star Trek Into Darkness
Star Trek Beyond
