In [1]:
from gensim.models.doc2vec import Doc2Vec
import deepdish as dd
import numpy as np
from scipy import spatial
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from IPython.display import display, HTML

In [2]:
def cosine_sim(v1, v2):
    #return cosine_similarity(v1,v2)
    #return 1 - spatial.distance.cosine(v1, v2)
    return np.sum(v1 * v2) / (np.sqrt(np.sum(np.square(v1))) * np.sqrt(np.sum(np.square(v2))))

def angular_sim(v1, v2):
    return 1 - (np.arccos(cosine_sim(v1,v2))/np.pi)

## Compare DocVecs

In [3]:
path = '../doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5'
model = Doc2Vec.load(path)

In [4]:
# jurassic park
jp_imdb_id = 107290
jp_movie_id = 82
# 261 ratings in our ML-100k

# jurassic park 2 (Lost World: Jurassic Park, The (1997))
jp2_imdb_id = 119567
jp2_movie_id = 252
# 158 ratings in our ML-100k

In [5]:
model.docvecs.similarity('{}.txt'.format(jp_imdb_id),'{}.txt'.format(jp2_imdb_id))

0.67955040301080083

In [6]:
# similar movies to jurassic park
model.docvecs.most_similar(positive=['{}.txt'.format(jp_imdb_id)])

[('119675.txt', 0.7279843091964722),
 ('119567.txt', 0.679550290107727),
 ('120004.txt', 0.6715301871299744),
 ('90605.txt', 0.6436644792556763),
 ('118928.txt', 0.633784294128418),
 ('97443.txt', 0.6167540550231934),
 ('117998.txt', 0.6146465539932251),
 ('56931.txt', 0.6136928200721741),
 ('88760.txt', 0.6116420030593872),
 ('118689.txt', 0.5930735468864441)]

In [7]:
# disimilar movies to jurassic park
model.docvecs.most_similar(negative=['{}.txt'.format(jp_imdb_id)])

[('16630.txt', 0.11265313625335693),
 ('36868.txt', 0.09027586132287979),
 ('98635.txt', 0.08478951454162598),
 ('89853.txt', 0.08220337331295013),
 ('99334.txt', 0.0779241994023323),
 ('57710.txt', 0.07723505795001984),
 ('113083.txt', 0.07643953710794449),
 ('168740.txt', 0.07486752420663834),
 ('189142.txt', 0.06650149822235107),
 ('156887.txt', 0.04408472776412964)]

In [8]:
# most dissimilar to jurassic park
harry_imdb_id = 98635
harry_movie_id = 216
# title "When Harry Met Sally"

In [9]:
# Scream
scr_imdb_id = 117571
scr_movie_id = 288

# Scream 2
scr2_imdb_id = 120082
scr2_movie_id = 895

In [10]:
model.docvecs.similarity('{}.txt'.format(scr_imdb_id),'{}.txt'.format(scr2_imdb_id))

0.75141428516212705

In [11]:
# similar movies to scream
model.docvecs.most_similar(positive=['{}.txt'.format(scr_imdb_id)])

[('134084.txt', 0.7863492369651794),
 ('120082.txt', 0.7514142394065857),
 ('91954.txt', 0.6668806076049805),
 ('155776.txt', 0.6493260264396667),
 ('111686.txt', 0.6400280594825745),
 ('146336.txt', 0.636536180973053),
 ('192731.txt', 0.6228945851325989),
 ('204626.txt', 0.616219699382782),
 ('110632.txt', 0.6104223728179932),
 ('103919.txt', 0.6087551712989807)]

In [12]:
# disimilar movies to scream
model.docvecs.most_similar(negative=['{}.txt'.format(scr_imdb_id)])

[('65938.txt', 0.0465000718832016),
 ('92048.txt', 0.03354673832654953),
 ('120633.txt', 0.027388297021389008),
 ('85859.txt', 0.026076868176460266),
 ('200071.txt', 0.0013969168066978455),
 ('66473.txt', -0.00011243671178817749),
 ('110167.txt', -0.0009681433439254761),
 ('98105.txt', -0.00425439327955246),
 ('181984.txt', -0.007469929754734039),
 ('62218.txt', -0.007664471864700317)]

In [13]:
# dissimilar to scream
# Local Hero (1983)
hero_imdb_id = 85859
hero_movie_id = 516

In [14]:
# GoldenEye (james bond)
ge_imdb_id = 113189
ge_movie_id = 2

# tomorrow never dies (james bond)
tom_imdb_id = 120347
tom_movie_id = 751

In [15]:
model.docvecs.similarity('{}.txt'.format(ge_imdb_id),'{}.txt'.format(tom_imdb_id))

0.62593487374729428

In [16]:
# similar movies to golden eye
model.docvecs.most_similar(positive=['{}.txt'.format(ge_imdb_id)])

[('97742.txt', 0.7034809589385986),
 ('104299.txt', 0.6447733640670776),
 ('58150.txt', 0.6374518275260925),
 ('120347.txt', 0.6259348392486572),
 ('71807.txt', 0.6256387829780579),
 ('117060.txt', 0.6214303970336914),
 ('76752.txt', 0.6121035814285278),
 ('113948.txt', 0.5996736288070679),
 ('79574.txt', 0.5983946323394775),
 ('110989.txt', 0.5890414714813232)]

In [17]:
# disimilar movies to golden eye
model.docvecs.most_similar(negative=['{}.txt'.format(ge_imdb_id)])

[('125664.txt', 0.09645520150661469),
 ('81150.txt', 0.06718134880065918),
 ('34492.txt', 0.062411241233348846),
 ('123209.txt', 0.05626177042722702),
 ('114354.txt', 0.0500088669359684),
 ('176422.txt', 0.047035086899995804),
 ('126604.txt', 0.039543554186820984),
 ('48473.txt', 0.0331258624792099),
 ('160338.txt', 0.032457709312438965),
 ('97940.txt', 0.03106977790594101)]

In [18]:
# dissimilar to golden eye
# title Pather Panchali (1955)
pather_imdb_id = 48473
pather_movie_id = 1449

In [19]:
# Die Hard
dh_imdb_id = 95016
dh_movie_id = 144

# Die Hard 2
dh2_imdb_id = 99423
dh2_movie_id = 226

# Die Hard 3
dh3_imdb_id = 112864
dh3_movie_id = 550

In [20]:
model.docvecs.similarity('{}.txt'.format(dh_imdb_id),'{}.txt'.format(dh2_imdb_id))

0.43955123247182337

In [21]:
model.docvecs.similarity('{}.txt'.format(dh_imdb_id),'{}.txt'.format(dh3_imdb_id))

0.43330402057502726

In [22]:
model.docvecs.similarity('{}.txt'.format(dh2_imdb_id),'{}.txt'.format(dh3_imdb_id))

0.38723222829802006

In [23]:
# similar movies to die hard
model.docvecs.most_similar(positive=['{}.txt'.format(dh_imdb_id)])

[('89003.txt', 0.6158862113952637),
 ('92857.txt', 0.5996342301368713),
 ('97216.txt', 0.5971028804779053),
 ('113326.txt', 0.5600196123123169),
 ('100403.txt', 0.5567358732223511),
 ('92105.txt', 0.555894136428833),
 ('70355.txt', 0.5538268089294434),
 ('73802.txt', 0.5478305220603943),
 ('105698.txt', 0.5445008277893066),
 ('211938.txt', 0.5342406034469604)]

In [24]:
# disimilar movies to die hard
model.docvecs.most_similar(negative=['{}.txt'.format(dh_imdb_id)])

[('124315.txt', 0.1448143869638443),
 ('91939.txt', 0.14431971311569214),
 ('113347.txt', 0.13826590776443481),
 ('116384.txt', 0.12819638848304749),
 ('108394.txt', 0.1220436617732048),
 ('120550.txt', 0.096884585916996),
 ('82406.txt', 0.09276328235864639),
 ('105201.txt', 0.08727686107158661),
 ('109454.txt', 0.08726204931735992),
 ('88683.txt', 0.08208546042442322)]

In [25]:
# dissimilar to die hard
# title How to Make an American Quilt (1995)
how_imdb_id = 113347
how_movie_id = 949

In [26]:
# Home Alone
ha_imdb_id = 99785
ha_movie_id = 94

# Home Alone 3
ha3_imdb_id = 119303
ha3_movie_id = 894

In [27]:
model.docvecs.similarity('{}.txt'.format(ha_imdb_id),'{}.txt'.format(ha3_imdb_id))

0.55925471340334842

In [28]:
# similar movies to home alone
model.docvecs.most_similar(positive=['{}.txt'.format(ha_imdb_id)])

[('104431.txt', 0.8639849424362183),
 ('109480.txt', 0.7129810452461243),
 ('90305.txt', 0.6148812770843506),
 ('168987.txt', 0.5842260122299194),
 ('93148.txt', 0.5789504051208496),
 ('124879.txt', 0.5781936049461365),
 ('113247.txt', 0.5694748163223267),
 ('120686.txt', 0.5652754306793213),
 ('163983.txt', 0.5646089315414429),
 ('105428.txt', 0.5628369450569153)]

In [29]:
# disimilar movies to home alone
model.docvecs.most_similar(negative=['{}.txt'.format(ha_imdb_id)])

[('102975.txt', 0.11109223961830139),
 ('84855.txt', 0.09658187627792358),
 ('83851.txt', 0.0954277366399765),
 ('43265.txt', 0.0899960845708847),
 ('92007.txt', 0.07761909067630768),
 ('47437.txt', 0.07060440629720688),
 ('113870.txt', 0.060495421290397644),
 ('119994.txt', 0.04926643893122673),
 ('151568.txt', 0.048287682235240936),
 ('118607.txt', 0.0470268651843071)]

In [30]:
# dissimilar movie to home alone
# Star Trek VI: The Undiscovered Country (1991)
stvi_imdb_id = 102975
stvi_movie_id = 227

# Item Factor Space

In [31]:
def find_most_similar(i_id, items_map, item_factors, df_movies, asc=False):
    results = []
    sim = 0
    sim_movie_id = None
    for m_id, idx in items_map.iteritems():
        if idx != i_id:
            cs = cosine_sim(item_factors[i_id], item_factors[idx])
            results.append({'movie_id': m_id, 'sim': cs})
    
    df_results = pd.DataFrame(results).merge(df_movies, on='movie_id', how='inner')
    
    return df_results.sort_values('sim', ascending=asc).reset_index(drop=True)

df_movies = pd.read_csv('../data/ml-100k/processed/movies.csv')

def get_cosine_sim(path, movie_id1, movie_id2):
    loaded = dd.io.load(path)
    items_map = loaded['movies'] if 'movies' in loaded else loaded['items']# maps movie_id to 0-based index
    item_factors = loaded['params']['Q']
    
    iid1 = items_map[movie_id1]
    iid2 = items_map[movie_id2]
    return cosine_sim(item_factors[iid1], item_factors[iid2])

def print_similarities(path, nb_similar=10):
    loaded = dd.io.load(path)
    items_map = loaded['items'] # maps movie_id to 0-based index
    item_factors = loaded['params']['Q']
    
    jp_id = items_map[jp_movie_id]
    jp2_id = items_map[jp2_movie_id]
    harry_id = items_map[harry_movie_id]
    print "Jurassic Park - Jurassic Park 2 (similar in Doc2Vec):", cosine_sim(item_factors[jp_id], item_factors[jp2_id])
    print "Jurassic Park - When Harry Met Sally (dissimilar in Doc2Vec):", cosine_sim(item_factors[jp_id], item_factors[harry_id])
    print "most similar to jurassic park"
    df_jp_sim = find_most_similar(jp_id, items_map, item_factors, df_movies)
    display(df_jp_sim[:nb_similar])
    print
    
    scr_id = items_map[scr_movie_id]
    scr2_id = items_map[scr2_movie_id]
    hero_id = items_map[hero_movie_id]
    print "Scream - Scream 2 (similar in Doc2Vec):", cosine_sim(item_factors[scr_id], item_factors[scr2_id])
    print "Scream - Local Hero (dissimilar in Doc2Vec):", cosine_sim(item_factors[scr_id], item_factors[hero_id])
    df_scr_sim = find_most_similar(scr_id, items_map, item_factors, df_movies)
    display(df_scr_sim[:nb_similar])
    print
    
    ge_id = items_map[ge_movie_id]
    tom_id = items_map[tom_movie_id]
    pather_id = items_map[pather_movie_id]
    print "GoldenEye - Tomorrow Never Dies (similar in Doc2Vec):", cosine_sim(item_factors[ge_id], item_factors[tom_id])
    print "GoldenEye - Pather Panchali (dissimilar in Doc2Vec):", cosine_sim(item_factors[ge_id], item_factors[pather_id])
    df_ge_sim = find_most_similar(ge_id, items_map, item_factors, df_movies)
    display(df_ge_sim[:nb_similar])
    print
    
    dh_id = items_map[dh_movie_id]
    dh2_id = items_map[dh2_movie_id]
    dh3_id = items_map[dh3_movie_id]
    how_id = items_map[how_movie_id]
    print "Die Hard - Die Hard 2 (similar in Doc2Vec):", cosine_sim(item_factors[dh_id], item_factors[dh2_id])
    print "Die Hard - Die Hard 3 (similar in Doc2Vec):", cosine_sim(item_factors[dh_id], item_factors[dh3_id])
    print "Die Hard 2 - Die Hard 3 (similar in Doc2Vec):", cosine_sim(item_factors[dh2_id], item_factors[dh3_id])
    print "Die Hard - How to Make an American Quilt (dissimilar in Doc2Vec):", cosine_sim(item_factors[dh_id], item_factors[how_id])
    df_dh_sim = find_most_similar(dh_id, items_map, item_factors, df_movies)
    display(df_dh_sim[:nb_similar])
    print
    
    ha_id = items_map[ha_movie_id]
    ha3_id = items_map[ha3_movie_id]
    stvi_id = items_map[stvi_movie_id]
    print "Home Alone - Home Alone 3 (similar in Doc2Vec):", cosine_sim(item_factors[ha_id], item_factors[ha3_id])
    print "Home Alone -  Star Trek VI(dissimilar in Doc2Vec):", cosine_sim(item_factors[ha_id], item_factors[stvi_id])
    df_ha_sim = find_most_similar(ha_id, items_map, item_factors, df_movies)
    display(df_ha_sim[:nb_similar])
    print

## MPCFs (no side information)

In [32]:
path = '../models/mpcf/2016-06-20_18.30.50_no-si_ml-100k_e20_tt-0.7_base.h5'
print_similarities(path)

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.307188856903
Jurassic Park - When Harry Met Sally (dissimilar in Doc2Vec): 0.529416477775
most similar to jurassic park


Unnamed: 0,movie_id,sim,imdb_id,title
0,568,0.831198,111257,Speed (1994)
1,161,0.80724,92099,Top Gun (1986)
2,385,0.793797,111503,True Lies (1994)
3,403,0.773805,96895,Batman (1989)
4,210,0.755096,97576,Indiana Jones and the Last Crusade (1989)
5,172,0.737102,80684,"Empire Strikes Back, The (1980)"
6,96,0.716313,103064,Terminator 2: Judgment Day (1991)
7,423,0.706601,83866,E.T. the Extra-Terrestrial (1982)
8,265,0.70613,99810,"Hunt for Red October, The (1990)"
9,143,0.705074,59742,"Sound of Music, The (1965)"



Scream - Scream 2 (similar in Doc2Vec): 0.429243437179
Scream - Local Hero (dissimilar in Doc2Vec): -0.358853423224


Unnamed: 0,movie_id,sim,imdb_id,title
0,294,0.627511,119528,Liar Liar (1997)
1,333,0.543173,119174,"Game, The (1997)"
2,258,0.512409,118884,Contact (1997)
3,748,0.505662,120053,"Saint, The (1997)"
4,260,0.482276,119081,Event Horizon (1997)
5,245,0.47181,118972,"Devil's Own, The (1997)"
6,327,0.466111,118887,Cop Land (1997)
7,895,0.429243,120082,Scream 2 (1997)
8,324,0.425835,116922,Lost Highway (1997)
9,358,0.414363,120177,Spawn (1997)



GoldenEye - Tomorrow Never Dies (similar in Doc2Vec): 0.0748455622714
GoldenEye - Pather Panchali (dissimilar in Doc2Vec): -0.10025061365


Unnamed: 0,movie_id,sim,imdb_id,title
0,233,0.836485,105690,Under Siege (1992)
1,231,0.797503,103776,Batman Returns (1992)
2,62,0.792063,111282,Stargate (1994)
3,576,0.790383,106582,Cliffhanger (1993)
4,550,0.789667,112864,Die Hard: With a Vengeance (1995)
5,68,0.786011,109506,"Crow, The (1994)"
6,385,0.778654,111503,True Lies (1994)
7,226,0.774614,99423,Die Hard 2 (1990)
8,403,0.759476,96895,Batman (1989)
9,29,0.750235,112462,Batman Forever (1995)



Die Hard - Die Hard 2 (similar in Doc2Vec): 0.530948099455
Die Hard - Die Hard 3 (similar in Doc2Vec): 0.517881252318
Die Hard 2 - Die Hard 3 (similar in Doc2Vec): 0.87104935339
Die Hard - How to Make an American Quilt (dissimilar in Doc2Vec): -0.107362895802


Unnamed: 0,movie_id,sim,imdb_id,title
0,96,0.807006,103064,Terminator 2: Judgment Day (1991)
1,176,0.784709,90605,Aliens (1986)
2,174,0.772935,82971,Raiders of the Lost Ark (1981)
3,89,0.76332,83658,Blade Runner (1982)
4,183,0.740811,78748,Alien (1979)
5,265,0.739333,99810,"Hunt for Red October, The (1990)"
6,210,0.735773,97576,Indiana Jones and the Last Crusade (1989)
7,172,0.721103,80684,"Empire Strikes Back, The (1980)"
8,79,0.721078,106977,"Fugitive, The (1993)"
9,228,0.705977,84726,Star Trek: The Wrath of Khan (1982)



Home Alone - Home Alone 3 (similar in Doc2Vec): 0.0137370370321
Home Alone -  Star Trek VI(dissimilar in Doc2Vec): 0.486987432609


Unnamed: 0,movie_id,sim,imdb_id,title
0,393,0.769532,107614,Mrs. Doubtfire (1993)
1,395,0.742394,107977,Robin Hood: Men in Tights (1993)
2,67,0.736521,109040,Ace Ventura: Pet Detective (1994)
3,401,0.726361,112572,"Brady Bunch Movie, The (1995)"
4,386,0.716101,106220,Addams Family Values (1993)
5,451,0.703531,77631,Grease (1978)
6,755,0.687068,113497,Jumanji (1995)
7,80,0.680621,107144,Hot Shots! Part Deux (1993)
8,239,0.669144,105435,Sneakers (1992)
9,384,0.667049,110622,Naked Gun 33 1/3: The Final Insult (1994)





## MPCFs-SI

In [33]:
path = '../models/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1.h5'
print_similarities(path)

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.273379095669
Jurassic Park - When Harry Met Sally (dissimilar in Doc2Vec): 0.480445312627
most similar to jurassic park


Unnamed: 0,movie_id,sim,imdb_id,title
0,568,0.775353,111257,Speed (1994)
1,385,0.772546,111503,True Lies (1994)
2,161,0.763758,92099,Top Gun (1986)
3,210,0.757045,97576,Indiana Jones and the Last Crusade (1989)
4,403,0.709903,96895,Batman (1989)
5,96,0.699094,103064,Terminator 2: Judgment Day (1991)
6,423,0.681233,83866,E.T. the Extra-Terrestrial (1982)
7,172,0.673854,80684,"Empire Strikes Back, The (1980)"
8,79,0.662833,106977,"Fugitive, The (1993)"
9,176,0.660326,90605,Aliens (1986)



Scream - Scream 2 (similar in Doc2Vec): 0.579332605409
Scream - Local Hero (dissimilar in Doc2Vec): -0.408750483537


Unnamed: 0,movie_id,sim,imdb_id,title
0,895,0.579333,120082,Scream 2 (1997)
1,294,0.552121,119528,Liar Liar (1997)
2,333,0.501955,119174,"Game, The (1997)"
3,307,0.472539,118971,"Devil's Advocate, The (1997)"
4,268,0.449602,118842,Chasing Amy (1997)
5,324,0.437129,116922,Lost Highway (1997)
6,327,0.426807,118887,Cop Land (1997)
7,748,0.407937,120053,"Saint, The (1997)"
8,301,0.399115,119360,In & Out (1997)
9,258,0.390954,118884,Contact (1997)



GoldenEye - Tomorrow Never Dies (similar in Doc2Vec): 0.299366538454
GoldenEye - Pather Panchali (dissimilar in Doc2Vec): -0.161315075679


Unnamed: 0,movie_id,sim,imdb_id,title
0,231,0.731728,103776,Batman Returns (1992)
1,233,0.728996,105690,Under Siege (1992)
2,385,0.718681,111503,True Lies (1994)
3,29,0.707129,112462,Batman Forever (1995)
4,62,0.705769,111282,Stargate (1994)
5,403,0.697331,96895,Batman (1989)
6,550,0.694792,112864,Die Hard: With a Vengeance (1995)
7,568,0.688382,111257,Speed (1994)
8,576,0.678463,106582,Cliffhanger (1993)
9,578,0.670249,106697,Demolition Man (1993)



Die Hard - Die Hard 2 (similar in Doc2Vec): 0.556490768362
Die Hard - Die Hard 3 (similar in Doc2Vec): 0.574504764882
Die Hard 2 - Die Hard 3 (similar in Doc2Vec): 0.841317085475
Die Hard - How to Make an American Quilt (dissimilar in Doc2Vec): -0.138823082892


Unnamed: 0,movie_id,sim,imdb_id,title
0,96,0.773018,103064,Terminator 2: Judgment Day (1991)
1,176,0.730316,90605,Aliens (1986)
2,174,0.707963,82971,Raiders of the Lost Ark (1981)
3,89,0.692798,83658,Blade Runner (1982)
4,385,0.688572,111503,True Lies (1994)
5,183,0.683713,78748,Alien (1979)
6,79,0.681405,106977,"Fugitive, The (1993)"
7,568,0.671966,111257,Speed (1994)
8,188,0.645556,93058,Full Metal Jacket (1987)
9,265,0.638964,99810,"Hunt for Red October, The (1990)"



Home Alone - Home Alone 3 (similar in Doc2Vec): 0.153868710543
Home Alone -  Star Trek VI(dissimilar in Doc2Vec): 0.367866340836


Unnamed: 0,movie_id,sim,imdb_id,title
0,393,0.725918,107614,Mrs. Doubtfire (1993)
1,395,0.689413,107977,Robin Hood: Men in Tights (1993)
2,386,0.682324,106220,Addams Family Values (1993)
3,401,0.682288,112572,"Brady Bunch Movie, The (1995)"
4,739,0.677635,100405,Pretty Woman (1990)
5,780,0.672605,109686,Dumb & Dumber (1994)
6,755,0.670207,113497,Jumanji (1995)
7,451,0.662926,77631,Grease (1978)
8,80,0.65566,107144,Hot Shots! Part Deux (1993)
9,384,0.653977,110622,Naked Gun 33 1/3: The Final Insult (1994)





In [34]:
path = '../models/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1.h5'
print_similarities(path)

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.273379095669
Jurassic Park - When Harry Met Sally (dissimilar in Doc2Vec): 0.480445312627
most similar to jurassic park


Unnamed: 0,movie_id,sim,imdb_id,title
0,568,0.775353,111257,Speed (1994)
1,385,0.772546,111503,True Lies (1994)
2,161,0.763758,92099,Top Gun (1986)
3,210,0.757045,97576,Indiana Jones and the Last Crusade (1989)
4,403,0.709903,96895,Batman (1989)
5,96,0.699094,103064,Terminator 2: Judgment Day (1991)
6,423,0.681233,83866,E.T. the Extra-Terrestrial (1982)
7,172,0.673854,80684,"Empire Strikes Back, The (1980)"
8,79,0.662833,106977,"Fugitive, The (1993)"
9,176,0.660326,90605,Aliens (1986)



Scream - Scream 2 (similar in Doc2Vec): 0.579332605409
Scream - Local Hero (dissimilar in Doc2Vec): -0.408750483537


Unnamed: 0,movie_id,sim,imdb_id,title
0,895,0.579333,120082,Scream 2 (1997)
1,294,0.552121,119528,Liar Liar (1997)
2,333,0.501955,119174,"Game, The (1997)"
3,307,0.472539,118971,"Devil's Advocate, The (1997)"
4,268,0.449602,118842,Chasing Amy (1997)
5,324,0.437129,116922,Lost Highway (1997)
6,327,0.426807,118887,Cop Land (1997)
7,748,0.407937,120053,"Saint, The (1997)"
8,301,0.399115,119360,In & Out (1997)
9,258,0.390954,118884,Contact (1997)



GoldenEye - Tomorrow Never Dies (similar in Doc2Vec): 0.299366538454
GoldenEye - Pather Panchali (dissimilar in Doc2Vec): -0.161315075679


Unnamed: 0,movie_id,sim,imdb_id,title
0,231,0.731728,103776,Batman Returns (1992)
1,233,0.728996,105690,Under Siege (1992)
2,385,0.718681,111503,True Lies (1994)
3,29,0.707129,112462,Batman Forever (1995)
4,62,0.705769,111282,Stargate (1994)
5,403,0.697331,96895,Batman (1989)
6,550,0.694792,112864,Die Hard: With a Vengeance (1995)
7,568,0.688382,111257,Speed (1994)
8,576,0.678463,106582,Cliffhanger (1993)
9,578,0.670249,106697,Demolition Man (1993)



Die Hard - Die Hard 2 (similar in Doc2Vec): 0.556490768362
Die Hard - Die Hard 3 (similar in Doc2Vec): 0.574504764882
Die Hard 2 - Die Hard 3 (similar in Doc2Vec): 0.841317085475
Die Hard - How to Make an American Quilt (dissimilar in Doc2Vec): -0.138823082892


Unnamed: 0,movie_id,sim,imdb_id,title
0,96,0.773018,103064,Terminator 2: Judgment Day (1991)
1,176,0.730316,90605,Aliens (1986)
2,174,0.707963,82971,Raiders of the Lost Ark (1981)
3,89,0.692798,83658,Blade Runner (1982)
4,385,0.688572,111503,True Lies (1994)
5,183,0.683713,78748,Alien (1979)
6,79,0.681405,106977,"Fugitive, The (1993)"
7,568,0.671966,111257,Speed (1994)
8,188,0.645556,93058,Full Metal Jacket (1987)
9,265,0.638964,99810,"Hunt for Red October, The (1990)"



Home Alone - Home Alone 3 (similar in Doc2Vec): 0.153868710543
Home Alone -  Star Trek VI(dissimilar in Doc2Vec): 0.367866340836


Unnamed: 0,movie_id,sim,imdb_id,title
0,393,0.725918,107614,Mrs. Doubtfire (1993)
1,395,0.689413,107977,Robin Hood: Men in Tights (1993)
2,386,0.682324,106220,Addams Family Values (1993)
3,401,0.682288,112572,"Brady Bunch Movie, The (1995)"
4,739,0.677635,100405,Pretty Woman (1990)
5,780,0.672605,109686,Dumb & Dumber (1994)
6,755,0.670207,113497,Jumanji (1995)
7,451,0.662926,77631,Grease (1978)
8,80,0.65566,107144,Hot Shots! Part Deux (1993)
9,384,0.653977,110622,Naked Gun 33 1/3: The Final Insult (1994)





## MFNN

In [35]:
path = '../models/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13.h5'
print_similarities(path)

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.204469000271
Jurassic Park - When Harry Met Sally (dissimilar in Doc2Vec): 0.537278884589
most similar to jurassic park


Unnamed: 0,movie_id,sim,imdb_id,title
0,161,0.804401,92099,Top Gun (1986)
1,568,0.801794,111257,Speed (1994)
2,210,0.764174,97576,Indiana Jones and the Last Crusade (1989)
3,385,0.743221,111503,True Lies (1994)
4,403,0.702832,96895,Batman (1989)
5,97,0.659933,99348,Dances with Wolves (1990)
6,176,0.655251,90605,Aliens (1986)
7,566,0.650147,109444,Clear and Present Danger (1994)
8,96,0.648384,103064,Terminator 2: Judgment Day (1991)
9,172,0.645155,80684,"Empire Strikes Back, The (1980)"



Scream - Scream 2 (similar in Doc2Vec): 0.524464461286
Scream - Local Hero (dissimilar in Doc2Vec): -0.121788876372


Unnamed: 0,movie_id,sim,imdb_id,title
0,895,0.524464,120082,Scream 2 (1997)
1,294,0.514633,119528,Liar Liar (1997)
2,928,0.475754,115963,"Craft, The (1996)"
3,327,0.473138,118887,Cop Land (1997)
4,123,0.442715,116365,"Frighteners, The (1996)"
5,333,0.439139,119174,"Game, The (1997)"
6,307,0.421222,118971,"Devil's Advocate, The (1997)"
7,245,0.420843,118972,"Devil's Own, The (1997)"
8,358,0.417618,120177,Spawn (1997)
9,332,0.407663,119468,Kiss the Girls (1997)



GoldenEye - Tomorrow Never Dies (similar in Doc2Vec): 0.139674573443
GoldenEye - Pather Panchali (dissimilar in Doc2Vec): 0.107753114901


Unnamed: 0,movie_id,sim,imdb_id,title
0,233,0.795321,105690,Under Siege (1992)
1,578,0.736798,106697,Demolition Man (1993)
2,576,0.728226,106582,Cliffhanger (1993)
3,231,0.725539,103776,Batman Returns (1992)
4,403,0.709661,96895,Batman (1989)
5,550,0.70131,112864,Die Hard: With a Vengeance (1995)
6,385,0.69415,111503,True Lies (1994)
7,226,0.676522,99423,Die Hard 2 (1990)
8,62,0.67445,111282,Stargate (1994)
9,29,0.657263,112462,Batman Forever (1995)



Die Hard - Die Hard 2 (similar in Doc2Vec): 0.452885625382
Die Hard - Die Hard 3 (similar in Doc2Vec): 0.442737529205
Die Hard 2 - Die Hard 3 (similar in Doc2Vec): 0.84400195971
Die Hard - How to Make an American Quilt (dissimilar in Doc2Vec): -0.0303850029868


Unnamed: 0,movie_id,sim,imdb_id,title
0,96,0.743999,103064,Terminator 2: Judgment Day (1991)
1,174,0.705314,82971,Raiders of the Lost Ark (1981)
2,176,0.671135,90605,Aliens (1986)
3,79,0.627153,106977,"Fugitive, The (1993)"
4,183,0.626954,78748,Alien (1979)
5,89,0.618435,83658,Blade Runner (1982)
6,228,0.618337,84726,Star Trek: The Wrath of Khan (1982)
7,172,0.613932,80684,"Empire Strikes Back, The (1980)"
8,204,0.611076,88763,Back to the Future (1985)
9,173,0.593673,93779,"Princess Bride, The (1987)"



Home Alone - Home Alone 3 (similar in Doc2Vec): 0.016913508952
Home Alone -  Star Trek VI(dissimilar in Doc2Vec): 0.436893229174


Unnamed: 0,movie_id,sim,imdb_id,title
0,393,0.729927,107614,Mrs. Doubtfire (1993)
1,67,0.715282,109040,Ace Ventura: Pet Detective (1994)
2,239,0.689945,105435,Sneakers (1992)
3,72,0.687251,110912,"Mask, The (1994)"
4,395,0.686091,107977,Robin Hood: Men in Tights (1993)
5,386,0.670129,106220,Addams Family Values (1993)
6,401,0.666889,112572,"Brady Bunch Movie, The (1995)"
7,80,0.647544,107144,Hot Shots! Part Deux (1993)
8,755,0.640429,113497,Jumanji (1995)
9,451,0.63302,77631,Grease (1978)





### Removes all but 10 ratings of Jurassic Parc and Jurassic Parc 2 in train

In [36]:
path = '../models/mpcf-si/2016-06-21_14.22.39_si_ml-100k_e20_tt-0.7_only-some-jurassic-park.h5'
print_similarities(path)

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.12227672814
Jurassic Park - When Harry Met Sally (dissimilar in Doc2Vec): 0.256542233922
most similar to jurassic park


Unnamed: 0,movie_id,sim,imdb_id,title
0,94,0.620338,99785,Home Alone (1990)
1,393,0.58604,107614,Mrs. Doubtfire (1993)
2,239,0.576149,105435,Sneakers (1992)
3,102,0.55924,65421,"Aristocats, The (1970)"
4,161,0.555631,92099,Top Gun (1986)
5,625,0.55275,57546,"Sword in the Stone, The (1963)"
6,90,0.552522,108174,So I Married an Axe Murderer (1993)
7,67,0.549473,109040,Ace Ventura: Pet Detective (1994)
8,1035,0.544797,106611,Cool Runnings (1993)
9,231,0.541791,103776,Batman Returns (1992)



Scream - Scream 2 (similar in Doc2Vec): 0.402523295713
Scream - Local Hero (dissimilar in Doc2Vec): -0.296020775688


Unnamed: 0,movie_id,sim,imdb_id,title
0,294,0.617016,119528,Liar Liar (1997)
1,258,0.532833,118884,Contact (1997)
2,333,0.50965,119174,"Game, The (1997)"
3,327,0.505153,118887,Cop Land (1997)
4,245,0.481254,118972,"Devil's Own, The (1997)"
5,300,0.477263,118571,Air Force One (1997)
6,748,0.460274,120053,"Saint, The (1997)"
7,328,0.454731,118883,Conspiracy Theory (1997)
8,260,0.426987,119081,Event Horizon (1997)
9,301,0.4267,119360,In & Out (1997)



GoldenEye - Tomorrow Never Dies (similar in Doc2Vec): 0.160582234081
GoldenEye - Pather Panchali (dissimilar in Doc2Vec): -0.208168103504


Unnamed: 0,movie_id,sim,imdb_id,title
0,231,0.794583,103776,Batman Returns (1992)
1,403,0.791651,96895,Batman (1989)
2,62,0.777413,111282,Stargate (1994)
3,550,0.776625,112864,Die Hard: With a Vengeance (1995)
4,29,0.77487,112462,Batman Forever (1995)
5,161,0.755206,92099,Top Gun (1986)
6,385,0.75196,111503,True Lies (1994)
7,576,0.748991,106582,Cliffhanger (1993)
8,68,0.747917,109506,"Crow, The (1994)"
9,233,0.745506,105690,Under Siege (1992)



Die Hard - Die Hard 2 (similar in Doc2Vec): 0.57078776072
Die Hard - Die Hard 3 (similar in Doc2Vec): 0.543557089413
Die Hard 2 - Die Hard 3 (similar in Doc2Vec): 0.889180622315
Die Hard - How to Make an American Quilt (dissimilar in Doc2Vec): -0.0994191034534


Unnamed: 0,movie_id,sim,imdb_id,title
0,96,0.835495,103064,Terminator 2: Judgment Day (1991)
1,176,0.793252,90605,Aliens (1986)
2,79,0.749264,106977,"Fugitive, The (1993)"
3,183,0.732109,78748,Alien (1979)
4,174,0.720982,82971,Raiders of the Lost Ark (1981)
5,568,0.704427,111257,Speed (1994)
6,89,0.689018,83658,Blade Runner (1982)
7,210,0.688383,97576,Indiana Jones and the Last Crusade (1989)
8,172,0.68614,80684,"Empire Strikes Back, The (1980)"
9,188,0.684212,93058,Full Metal Jacket (1987)



Home Alone - Home Alone 3 (similar in Doc2Vec): 0.0136217749488
Home Alone -  Star Trek VI(dissimilar in Doc2Vec): 0.49904588112


Unnamed: 0,movie_id,sim,imdb_id,title
0,393,0.790873,107614,Mrs. Doubtfire (1993)
1,67,0.777707,109040,Ace Ventura: Pet Detective (1994)
2,451,0.768422,77631,Grease (1978)
3,395,0.767491,107977,Robin Hood: Men in Tights (1993)
4,72,0.742056,110912,"Mask, The (1994)"
5,386,0.740624,106220,Addams Family Values (1993)
6,401,0.729107,112572,"Brady Bunch Movie, The (1995)"
7,167,0.69516,81375,Private Benjamin (1980)
8,384,0.691154,110622,Naked Gun 33 1/3: The Final Insult (1994)
9,391,0.686196,107362,Last Action Hero (1993)





In [37]:
item_factors = loaded['params']['Q']
items_map = loaded['items'] # maps movie_id to 0-based index
jp_id = items_map[jp_movie_id]
jp2_id = items_map[jp2_movie_id]
harry_id = items_map[harry_movie_id]
scr_id = items_map[scr_movie_id]
scr2_id = items_map[scr2_movie_id]
ge_id = items_map[ge_movie_id]
tom_id = items_map[tom_movie_id]
pather_id = items_map[pather_movie_id]

NameError: name 'loaded' is not defined

In [None]:
cosine_sim(item_factors[jp_id], item_factors[jp2_id])

In [None]:
cosine_sim(item_factors[jp_id], item_factors[harry_id])

In [None]:
cosine_sim(item_factors[scr_id], item_factors[scr2_id])

In [None]:
cosine_sim(item_factors[ge_id], item_factors[tom_id])