In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load data
movies = pd.read_csv('ml-latest-small/movies.csv')
ratings = pd.read_csv('ml-latest-small/ratings.csv')

# TF-IDF vectorization
# Convert genres to lowercase and remove any whitespace
# in order to avoid duplicates due to capitalization or spacing
movies['genres'] = movies['genres'].str.lower().str.replace(' ', '')
# If a genre is not defined for a movie, replace it with an empty string
movies['genres'] = movies['genres'].fillna('')
# Create a TF-IDF vectorizer object
tfidf = TfidfVectorizer(stop_words='english')
# Fit the vectorizer to the genres column and transform the data
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Define a function to get recommendations based on a given movie title
def get_recommendations(title, cosine_sim=cosine_sim, movies=movies):
    # Get the index of the movie that matches the title
    idx = movies.loc[movies['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with the input movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the top 10 most similar movies
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# Example usage of the get_recommendations function
get_recommendations('Toy Story (1995)')

1706                                          Antz (1998)
2355                                   Toy Story 2 (1999)
2809       Adventures of Rocky and Bullwinkle, The (2000)
3000                     Emperor's New Groove, The (2000)
3568                                Monsters, Inc. (2001)
6194                                     Wild, The (2006)
6486                               Shrek the Third (2007)
6948                       Tale of Despereaux, The (2008)
7760    Asterix and the Vikings (Astérix et les Viking...
8219                                         Turbo (2013)
Name: title, dtype: object