In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir("../")

In [3]:
from ease_recommender import *
from npmi_recommender import *

import pickle as p

In [4]:
print("loading cache data...")
D = p.load(open("cached_data/movie_lens_preprocessed.p", "rb"))

row = D["userId"]
col = D["movieId"]
data = D["rating"]

movies = D["movies"]

print("done")

loading cache data...
done


In [5]:
def find_match_using_terms(terms, movies=movies, case_insensitive=False):
    assert type(terms) in (list, tuple, set)
    
    title = movies.title
    if case_insensitive:
        title = title.str.lower()
    
    matches = True
    for term in terms:
        matches &= title.str.contains(term)
        
    matches = np.where(matches)[0]

    if len(matches) > 1:
        raise Exception("Multiple matches found, filter down to a single match", movies.loc[matches, "title"].tolist())
        
    return matches[0]

In [6]:
mat = csr_matrix((data.astype(bool), (row, col))).astype(np.int64)

In [7]:
a = find_match_using_terms(["Sense and Sensibility", "1995"])
# a = find_match_using_terms(["Knives Out", "2019"])
# a = find_match_using_terms(["Witness", "1957"])

movies.loc[a]

title         Sense and Sensibility (1995)
genres                       Drama|Romance
imdbId                              114388
tmdbId                              4584.0
avg_rating                        7.891793
num_votes                            18677
Name: 16, dtype: object

In [8]:
b = find_match_using_terms(["Pride and Prejudice", "1995"])
# b = find_match_using_terms(["Death", "Nile", "1978"])
# b = find_match_using_terms(["Amadeus"])

movies.loc[b]

title         Pride and Prejudice (1995)
genres                     Drama|Romance
imdbId                            112130
tmdbId                          164721.0
avg_rating                       7.98947
num_votes                           2607
Name: 7382, dtype: object

In [9]:
# lambda_ = optimize_lambda_using_a_to_b_matching(mat, a, b, fast_approximation=False)
lambda_ = optimize_lambda_using_a_to_b_matching(mat, a, b, fast_approximation=True)

lambda_: 1000000
error: 182.999999
lambda_: 100000.0
error: 42.99999
lambda_: 10000.0
error: 11.9999
lambda_: 1000.0
error: 9.999
lambda_: 100.0
error: 13.99


In [10]:
# check error for EASE with optimized lambda_
a_to_b_error_metric(mat, a, b, lambda_, lambda_penalty=False)

lambda_: 1000
error: 9


9

In [11]:
# check error for NPMI in comparison
a_to_b_error_metric_npmi(mat, a, b, temp=1)

temp: 1
error: 1782.0


1782.0

In [12]:
top_k = 20

In [13]:
# using EASE

similarity_scores = calculate_ease_for_item_cg(mat, a, lambda_)

top_k_matches = movies.loc[np.argsort(-similarity_scores)[:top_k].tolist()]

top_k_matches

Unnamed: 0_level_0,title,genres,imdbId,tmdbId,avg_rating,num_votes
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
819,Emma (1996),Comedy|Drama|Romance,116191,3573.0,7.48511,6486
27,Persuasion (1995),Drama|Romance,114117,17015.0,8.076637,2719
7382,Pride and Prejudice (1995),Drama|Romance,112130,164721.0,7.98947,2607
510,"Remains of the Day, The (1993)",Drama|Romance,107943,1245.0,7.783682,8651
605,Jane Eyre (1996),Drama|Romance,116684,47333.0,7.271146,1807
57,"Postman, The (Postino, Il) (1994)",Comedy|Drama|Romance,110877,11010.0,7.927828,10200
10352,Pride & Prejudice (2005),Drama|Romance,414387,4348.0,7.703508,6645
492,Much Ado About Nothing (1993),Comedy|Romance,107616,11971.0,7.734885,11266
258,Little Women (1994),Drama,110367,9587.0,7.199093,7447
35,Dead Man Walking (1995),Crime|Drama,112818,687.0,7.847888,17814


In [14]:
# using EASE

similarity_scores = calculate_ease_for_item_cg(mat, b, lambda_)

top_k_matches = movies.loc[np.argsort(-similarity_scores)[:top_k].tolist()]

top_k_matches

Unnamed: 0_level_0,title,genres,imdbId,tmdbId,avg_rating,num_votes
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
14199,Persuasion (2007),Drama|Romance,844330,13949.0,7.719715,330
17597,North & South (2004),Drama|Romance,417349,147269.0,8.057732,400
10352,Pride & Prejudice (2005),Drama|Romance,414387,4348.0,7.703508,6645
13158,"Young Victoria, The (2009)",Drama|Romance,962736,18320.0,7.472767,679
27,Persuasion (1995),Drama|Romance,114117,17015.0,8.076637,2719
15923,Jane Eyre (2011),Drama|Romance,1229822,38684.0,7.451439,824
16728,Northanger Abbey (2007),Drama|Romance,844794,18093.0,7.429658,201
11401,Becoming Jane (2007),Drama|Romance,416508,2977.0,7.129006,837
2984,Mansfield Park (1999),Comedy|Drama|Romance,178737,10399.0,7.523058,1191
16,Sense and Sensibility (1995),Drama|Romance,114388,4584.0,7.891793,18677


In [15]:
# using normalized pointwise mutual information

similarity_scores = npmi_batch(mat, a)

top_k_matches = movies.loc[np.argsort(-similarity_scores)[:top_k].tolist()]

top_k_matches

Unnamed: 0_level_0,title,genres,imdbId,tmdbId,avg_rating,num_votes
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
819,Emma (1996),Comedy|Drama|Romance,116191,3573.0,7.48511,6486
27,Persuasion (1995),Drama|Romance,114117,17015.0,8.076637,2719
510,"Remains of the Day, The (1993)",Drama|Romance,107943,1245.0,7.783682,8651
492,Much Ado About Nothing (1993),Comedy|Romance,107616,11971.0,7.734885,11266
262,Like Water for Chocolate (Como agua para choco...,Drama|Fantasy|Romance,103994,18183.0,7.827167,9183
57,"Postman, The (Postino, Il) (1994)",Comedy|Drama|Romance,110877,11010.0,7.927828,10200
529,Shadowlands (1993),Drama|Romance,108101,10445.0,7.772285,3548
258,Little Women (1994),Drama,110367,9587.0,7.199093,7447
352,Four Weddings and a Funeral (1994),Comedy|Romance,109831,712.0,7.284194,19987
504,"Piano, The (1993)",Drama|Romance,107822,713.0,7.368306,12289


In [16]:
# using normalized pointwise mutual information

similarity_scores = npmi_batch(mat, b)

top_k_matches = movies.loc[np.argsort(-similarity_scores)[:top_k].tolist()]

top_k_matches

Unnamed: 0_level_0,title,genres,imdbId,tmdbId,avg_rating,num_votes
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
14199,Persuasion (2007),Drama|Romance,844330,13949.0,7.719715,330
17597,North & South (2004),Drama|Romance,417349,147269.0,8.057732,400
16728,Northanger Abbey (2007),Drama|Romance,844794,18093.0,7.429658,201
13158,"Young Victoria, The (2009)",Drama|Romance,962736,18320.0,7.472767,679
10352,Pride & Prejudice (2005),Drama|Romance,414387,4348.0,7.703508,6645
11401,Becoming Jane (2007),Drama|Romance,416508,2977.0,7.129006,837
15923,Jane Eyre (2011),Drama|Romance,1229822,38684.0,7.451439,824
24799,Sense & Sensibility (2008),Drama|Romance,847150,315010.0,7.693069,76
20467,Cranford (2007),Drama,974077,64047.0,7.877193,46
29509,Little Dorrit (2008),Drama,1178522,47084.0,7.922222,73


In [17]:
# average the two rankings

similarity_scores_a = calculate_ease_for_item_cg(mat, a, lambda_)
similarity_ranking_a = np.full(len(similarity_scores_a), -1.0)
similarity_ranking_a[np.argsort(-similarity_scores_a)] = 1.0 - ((1.0 + np.arange(len(similarity_ranking_a)))/len(similarity_ranking_a))

similarity_scores_b = calculate_ease_for_item_cg(mat, b, lambda_)
similarity_ranking_b = np.full(len(similarity_scores_b), -1.0)
similarity_ranking_b[np.argsort(-similarity_scores_b)] = 1.0 - ((1.0 + np.arange(len(similarity_ranking_b)))/len(similarity_ranking_b))

similarity_scores = similarity_ranking_a * similarity_ranking_b
# similarity_scores = similarity_scores_a * similarity_scores_b

top_k_matches = movies.loc[np.argsort(-similarity_scores)[:top_k].tolist()]

for m in movies.loc[np.argsort(-similarity_scores)[:top_k], "title"]:
    print(m)

Persuasion (1995)
Pride & Prejudice (2005)
Emma (1996)
Mansfield Park (1999)
Becoming Jane (2007)
Persuasion (2007)
North & South (2004)
Jane Eyre (2011)
Importance of Being Earnest, The (2002)
Northanger Abbey (2007)
Jane Eyre (1996)
Duchess, The (2008)
Roman Holiday (1953)
Far from the Madding Crowd (2015)
Bridget Jones's Diary (2001)
The Queen (2006)
Fiddler on the Roof (1971)
Pride and Prejudice (1940)
Phantom of the Opera, The (2004)
Room with a View, A (1986)
