In [1]:
from gensim.models.doc2vec import Doc2Vec
import deepdish as dd
import numpy as np
from scipy import spatial
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def cosine_sim(v1, v2):
    #return cosine_similarity(v1,v2)
    #return 1 - spatial.distance.cosine(v1, v2)
    return np.sum(v1 * v2) / (np.sqrt(np.sum(np.square(v1))) * np.sqrt(np.sum(np.square(v2))))

def angular_sim(v1, v2):
    return 1 - (np.arccos(cosine_sim(v1,v2))/np.pi)

# Document Vectors Space

In [3]:
path = '../doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5'
model = Doc2Vec.load(path)

In [4]:
# free willy
fw_imdb_id = 106965
fw_movie_id = 455

# free willy 2
fw2_imdb_id = 113114
fw2_movie_id = 169

In [5]:
model.docvecs.similarity('{}.txt'.format(fw_imdb_id),'{}.txt'.format(fw2_imdb_id))

0.86940241592695133

In [6]:
# jurassic park
jp_imdb_id = 107290
jp_movie_id = 480

# jurassic park 2
jp2_imdb_id = 119567
jp2_movie_id = 1544

In [7]:
model.docvecs.similarity('{}.txt'.format(jp_imdb_id),'{}.txt'.format(jp2_imdb_id))

0.67955040301080083

In [8]:
# scream
scr_imdb_id = 117571
scr_movie_id = 1407

# scream 2
scr2_imdb_id = 120082
scr2_movie_id = 1717

In [9]:
model.docvecs.similarity('{}.txt'.format(scr_imdb_id),'{}.txt'.format(scr2_imdb_id))

0.75141428516212705

In [10]:
# species
sp_imdb_id = 114508
sp_movie_id = 196

# species 2
sp2_imdb_id = 120841
sp2_movie_id = 1862

In [11]:
model.docvecs.similarity('{}.txt'.format(sp_imdb_id),'{}.txt'.format(sp2_imdb_id))

0.68311800802591438

In [12]:
# star wars episode v
swv_imdb_id = 80684
swv_movie_id = 1196

# star wars episode vi
swvi_imdb_id = 86190
swvi_movie_id = 1210

In [13]:
model.docvecs.similarity('{}.txt'.format(swv_imdb_id),'{}.txt'.format(swvi_imdb_id))

0.92307505174504856

In [14]:
# toy story
ts_imdb_id = 114709
ts_movie_id = 1

# toy story 2
ts2_imdb_id = 120363
ts2_movie_id = 3114

In [15]:
model.docvecs.similarity('{}.txt'.format(ts_imdb_id),'{}.txt'.format(ts2_imdb_id))

0.75291341575190129

# Item Factor Space

## MPCFs (No side information)

In [16]:
loaded = dd.io.load('../models/mpcf/2016-05-27_20.57.43_no-si_ml-1m_e20_tt-0.7_task-22.h5')
items_map = loaded['movies'] # maps movie_id to 0-based index
item_factors = loaded['params']['Q']

fw_id = items_map[fw_movie_id]
fw2_id = items_map[fw2_movie_id]
jp_id = items_map[jp_movie_id]
jp2_id = items_map[jp2_movie_id]
scr_id = items_map[scr_movie_id]
scr2_id = items_map[scr2_movie_id]
sp_id = items_map[sp_movie_id]
sp2_id = items_map[sp2_movie_id]
swv_id = items_map[swv_movie_id]
swvi_id = items_map[swvi_movie_id]
ts_id = items_map[ts_movie_id]
ts2_id = items_map[ts2_movie_id]

In [17]:
cosine_sim(item_factors[fw_id], item_factors[fw2_id])

0.69230954456395477

In [18]:
cosine_sim(item_factors[jp_id], item_factors[jp2_id])

0.58373697691542414

In [19]:
cosine_sim(item_factors[scr_id], item_factors[scr2_id])

0.68680092354665312

In [20]:
cosine_sim(item_factors[sp_id], item_factors[sp2_id])

0.61696458093518236

In [21]:
cosine_sim(item_factors[swv_id], item_factors[swvi_id])

0.69006246771456703

In [22]:
cosine_sim(item_factors[ts_id], item_factors[ts2_id])

0.6695954615805696

## MPCFs-SI (with side information)

In [23]:
loaded = dd.io.load('../models/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0.h5')
items_map = loaded['items'] # maps movie_id to 0-based index
item_factors = loaded['params']['Q']

fw_id = items_map[fw_movie_id]
fw2_id = items_map[fw2_movie_id]
jp_id = items_map[jp_movie_id]
jp2_id = items_map[jp2_movie_id]
scr_id = items_map[scr_movie_id]
scr2_id = items_map[scr2_movie_id]
sp_id = items_map[sp_movie_id]
sp2_id = items_map[sp2_movie_id]
swv_id = items_map[swv_movie_id]
swvi_id = items_map[swvi_movie_id]
ts_id = items_map[ts_movie_id]
ts2_id = items_map[ts2_movie_id]

In [24]:
cosine_sim(item_factors[fw_id], item_factors[fw2_id])

0.70510736702346399

In [25]:
cosine_sim(item_factors[jp_id], item_factors[jp2_id])

0.61707655058330768

In [26]:
cosine_sim(item_factors[scr_id], item_factors[scr2_id])

0.72329704465422329

In [27]:
cosine_sim(item_factors[sp_id], item_factors[sp2_id])

0.70643524738020902

In [28]:
cosine_sim(item_factors[swv_id], item_factors[swvi_id])

0.72205251954929983

In [29]:
cosine_sim(item_factors[ts_id], item_factors[ts2_id])

0.66780390794391142