In [1]:
import pickle
import pandas as pd
import numpy as np

In [2]:
# Load prediction rules from data files
M = pickle.load(open("product_features.dat", "rb"))

# Swap the rows and columns of product_features just so it's easier to work with
M = np.transpose(M)

In [3]:
# Load movie titles
movies_df = pd.read_csv('movies.csv', index_col='movie_id')

In [4]:
# Choose a movie to find similar movies to. Let's find movies similar to movie #5:
movie_id = 5

# Get movie #1's name and genre
movie_information = movies_df.loc[movie_id]

print("We are finding movies similar to this movie:")
movie_information

We are finding movies similar to this movie:


title    The Big City Judge 2
genre             legal drama
Name: 5, dtype: object

In [5]:
# Get the features for movie #1 we found via matrix factorization
current_movie_features = M[movie_id - 1]

print("The attributes for this movie are:")
current_movie_features

The attributes for this movie are:


array([ 1.01290407, -0.84690299, -0.75039295,  0.63562955, -0.97885446,
       -1.6712716 , -0.95825561,  0.28137912, -0.30292413, -0.05226884,
       -0.10080464, -0.24149324, -0.03170635, -0.83671211,  1.0796329 ])

In [8]:
# The main logic for finding similar movies:

# 1. Subtract the current movie's features from every other movie's features
difference = M - current_movie_features

# 2. Take the absolute value of that difference (so all numbers are positive)
absolute_difference = np.abs(difference)

# 3. Each movie has several features. Sum those features to get a total 'difference score' for each movie
total_difference = np.sum(absolute_difference, axis=1)

# 4. Create a new column in the movie list with the difference score for each movie
movies_df['difference_score'] = total_difference

# 5. Sort the movie list by difference score, from least different to most different
sorted_movie_list = movies_df.sort_values('difference_score')

# 6. Print the result, showing the 5 most similar movies to movie_id #1
print("The five most similar movies are:")
sorted_movie_list.head()

The five most similar movies are:


Unnamed: 0_level_0,title,genre,difference_score
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5,The Big City Judge 2,legal drama,0.0
10,Surrounded by Zombies 1,"horror, zombie fiction",2.775594
8,Sci-Fi Murder Detectives,"supernatural, mystery",3.495761
9,Biker Gangs,"crime drama, action",3.515093
3,The Sheriff 2,"crime drama, western",4.038468
