In [125]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors

user_reviews = pd.read_csv('user_reviews.csv', index_col=1)
movie_genres = pd.read_csv('movie_genres.csv', index_col=1).drop('Unnamed: 0', axis=1)

Get put rating and categories in a list

In [126]:
genre_dict = movie_genres.copy().iloc[:, 1:].apply(lambda row: row.tolist(), axis=1).to_dict()
movie_genres_list = movie_genres.copy().iloc[:, 1:].apply(lambda row: row.tolist(), axis=1).to_list()
ratings_dict = user_reviews.copy().T.to_dict()
print(genre_dict['The Net'])
print(ratings_dict['Alana'])

[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0]
{'Unnamed: 0': 10.0, 'The Net': 0.0, "Happily N'Ever After": 2.0, 'Tomorrowland': 0.0, 'American Hero': 0.0, 'Das Boot': 0.0, 'Final Destination 3': 0.0, 'Licence to Kill': 0.0, 'The Hundred-Foot Journey': 0.0, 'The Matrix': 0.0, 'Creature': 0.0, 'The Basket': 0.0, 'Star Trek: The Motion Picture': 0.0, 'The Hunger Games': 0.0, 'The Hurt Locker': 0.0, 'Flatliners': 0.0, 'The Blues Brothers': 0.0, 'The Last Exorcism': 0.0, 'Event Horizon': 0.0, 'Vicky Cristina Barcelona': 0.0, 'The Woman in Black': 0.0, 'Galaxy Quest': 0.0, 'Troy': 0.0, 'The Messengers': 0.0, 'Heaven Is for Real': 0.0, 'The Iceman': 0.0, 'Me and Orson Welles': 0.0, 'The Blair Witch Project': 0.0, 'Seven Samurai': 0.0, 'Con Air': 0.0, 'Little Children': 0.0, 'Novocaine': 0.0, 'Supercross': 0.0, 'Pathology': 0.0, 'Cutthroat Island': 0.0, 'Black Book': 0.0, 'Nixon': 0.0, 'Whale Rider': 0.0, 'Bucky Larson: Born to Be a Star': 0.0, 'Southpaw': 0.0, 'The 

We have a dictonary of users and their ratings, and movie titles with their categories.

We now start to use KNN to classify the similarity of the movies using the user ratings, for now we disregard the categories

In [127]:
movie_names = list(genre_dict.keys())
movie_genres_matrix = np.array(movie_genres)
movie_ratings_matrix = np.array(user_reviews)

# Create the KNN model
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=6, n_jobs=-1)

# Fit the model on the combined feature matrix
knn.fit(movie_genres_matrix)

Similar movies by movie category using KNN

In [128]:
def find_similar_movies(movie_name, movie_names, features_matrix, knn_model):
    # Check if the movie is in our database
    if movie_name not in movie_names:
        return "Movie not found."

    # Find the index of the movie
    movie_index = movie_names.index(movie_name)

    # Get the feature vector for the selected movie
    movie_features = features_matrix[movie_index].reshape(1, -1)

    # Find the 5 nearest neighbors (or however many neighbors you want)
    distances, indices = knn_model.kneighbors(movie_features)

    # Retrieve the names of the nearest neighbors
    similar_movies = [movie_names[index] for index in indices[0]]

    return similar_movies

# KNN Gernre similarity
print(find_similar_movies('Harry Potter and the Order of the Phoenix', movie_names, movie_genres_matrix, knn)[1:])

['Harry Potter and the Prisoner of Azkaban', 'Harry Potter and the Chamber of Secrets', 'Harry Potter and the Half-Blood Prince', 'The Spiderwick Chronicles', 'Alice in Wonderland']


User profile work

In [129]:
def user_profile(reviews_df, genres_df, user_idx) -> pd.Series:

    reviews = reviews_df.iloc[user_idx, 2:]

    rated_movies = reviews[reviews != 0]
    unrated_movies = reviews[reviews == 0]

    genres_rated_movies = genres_df.loc[rated_movies.index]
    genres_unrated_movies = genres_df.loc[unrated_movies.index]
    
    profile = genres_rated_movies.T.dot(rated_movies)
    profile /= profile.sum() #normalize

    return profile

In [130]:
def find_movie_from_profile(user_profile, features_matrix, knn_model):

    user_profile = np.array(user_profile).reshape(1, -1)

    # Find the 5 nearest neighbors (or however many neighbors you want)
    distances, indices = knn_model.kneighbors(user_profile)

    # Retrieve the names of the nearest neighbors
    similar_movies = [movie_names[index] for index in indices[0]]

    return similar_movies

User1 = user_profile(user_reviews, movie_genres, 0).to_list()
#print(User1)

print(find_movie_from_profile(User1, movie_genres_matrix, knn)[1:])

["Perrier's Bounty", 'Crouching Tiger, Hidden Dragon', 'Top Gun', 'The Good Thief', 'True Romance']
