In [1]:
from gensim.models.doc2vec import Doc2Vec
import deepdish as dd
import numpy as np
from scipy import spatial
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from IPython.display import display, HTML

In [2]:
def cosine_sim(v1, v2):
    #return cosine_similarity(v1,v2)
    #return 1 - spatial.distance.cosine(v1, v2)
    return np.sum(v1 * v2) / (np.sqrt(np.sum(np.square(v1))) * np.sqrt(np.sum(np.square(v2))))

def angular_sim(v1, v2):
    return 1 - (np.arccos(cosine_sim(v1,v2))/np.pi)

# Document Vectors Space

In [3]:
path = '../doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5'
model = Doc2Vec.load(path)

In [4]:
# free willy
fw_imdb_id = 106965
fw_movie_id = 455

# free willy 2
fw2_imdb_id = 113114
fw2_movie_id = 169

In [5]:
model.docvecs.similarity('{}.txt'.format(fw_imdb_id),'{}.txt'.format(fw2_imdb_id))

0.86940241592695133

In [6]:
# similar movies to free willy
# free willy 2 is most similar
model.docvecs.most_similar(positive=['{}.txt'.format(fw_imdb_id)])

[('113114.txt', 0.8694024085998535),
 ('119152.txt', 0.8322009444236755),
 ('91557.txt', 0.6343326568603516),
 ('77766.txt', 0.629421591758728),
 ('113028.txt', 0.6236975789070129),
 ('116329.txt', 0.6219675540924072),
 ('134619.txt', 0.6181328892707825),
 ('98627.txt', 0.6160564422607422),
 ('117427.txt', 0.6138113141059875),
 ('97388.txt', 0.612464189529419)]

In [7]:
# disimilar movies to free willy
model.docvecs.most_similar(negative=['{}.txt'.format(fw_imdb_id)])

[('119280.txt', 0.014068834483623505),
 ('116192.txt', 0.009390600025653839),
 ('110588.txt', 0.00923667848110199),
 ('113987.txt', 0.000695057213306427),
 ('59319.txt', -0.0006838738918304443),
 ('96332.txt', -0.0033706873655319214),
 ('117561.txt', -0.019551947712898254),
 ('144214.txt', -0.020484495908021927),
 ('112346.txt', -0.023167015984654427),
 ('125439.txt', -0.028610385954380035)]

In [8]:
# dissimilar to free willy
# Mrs. Brown, drama/romance
brown_imdb_id = 119280
brown_movie_id = 1643

In [9]:
model.docvecs.similarity('{}.txt'.format(fw_imdb_id),'{}.txt'.format(brown_imdb_id))

-0.014068827695707734

In [10]:
# jurassic park
jp_imdb_id = 107290
jp_movie_id = 480

# jurassic park 2
jp2_imdb_id = 119567
jp2_movie_id = 1544

In [11]:
model.docvecs.similarity('{}.txt'.format(jp_imdb_id),'{}.txt'.format(jp2_imdb_id))

0.67955040301080083

In [12]:
# similar movies to jurassic park
# jurassic park 2 is second most similar movie
model.docvecs.most_similar(positive=['{}.txt'.format(jp_imdb_id)])

[('119675.txt', 0.7279843091964722),
 ('119567.txt', 0.679550290107727),
 ('120004.txt', 0.6715301871299744),
 ('90605.txt', 0.6436644792556763),
 ('118928.txt', 0.633784294128418),
 ('97443.txt', 0.6167540550231934),
 ('117998.txt', 0.6146465539932251),
 ('56931.txt', 0.6136928200721741),
 ('88760.txt', 0.6116420030593872),
 ('118689.txt', 0.5930735468864441)]

In [13]:
# disimilar movies to jurassic park
model.docvecs.most_similar(negative=['{}.txt'.format(jp_imdb_id)])

[('16630.txt', 0.11265313625335693),
 ('36868.txt', 0.09027586132287979),
 ('98635.txt', 0.08478951454162598),
 ('89853.txt', 0.08220337331295013),
 ('99334.txt', 0.0779241994023323),
 ('57710.txt', 0.07723505795001984),
 ('113083.txt', 0.07643953710794449),
 ('168740.txt', 0.07486752420663834),
 ('189142.txt', 0.06650149822235107),
 ('156887.txt', 0.04408472776412964)]

In [14]:
# most dissimilar to jurassic park
# Battling Butler, comedy
butler_imdb_id = 16630
butler_movie_id = 3012

In [15]:
model.docvecs.similarity('{}.txt'.format(jp_imdb_id),'{}.txt'.format(butler_imdb_id))

-0.11265313836867154

In [16]:
# scream
scr_imdb_id = 117571
scr_movie_id = 1407

# scream 2
scr2_imdb_id = 120082
scr2_movie_id = 1717

In [17]:
model.docvecs.similarity('{}.txt'.format(scr_imdb_id),'{}.txt'.format(scr2_imdb_id))

0.75141428516212705

In [18]:
# similar movies to scream
# scream 2 is second most similar
model.docvecs.most_similar(positive=['{}.txt'.format(scr_imdb_id)])

[('134084.txt', 0.7863492369651794),
 ('120082.txt', 0.7514142394065857),
 ('91954.txt', 0.6668806076049805),
 ('155776.txt', 0.6493260264396667),
 ('111686.txt', 0.6400280594825745),
 ('146336.txt', 0.636536180973053),
 ('192731.txt', 0.6228945851325989),
 ('204626.txt', 0.616219699382782),
 ('110632.txt', 0.6104223728179932),
 ('103919.txt', 0.6087551712989807)]

In [19]:
# disimilar movies to scream
model.docvecs.most_similar(negative=['{}.txt'.format(scr_imdb_id)])

[('65938.txt', 0.0465000718832016),
 ('92048.txt', 0.03354673832654953),
 ('120633.txt', 0.027388297021389008),
 ('85859.txt', 0.026076868176460266),
 ('200071.txt', 0.0013969168066978455),
 ('66473.txt', -0.00011243671178817749),
 ('110167.txt', -0.0009681433439254761),
 ('98105.txt', -0.00425439327955246),
 ('181984.txt', -0.007469929754734039),
 ('62218.txt', -0.007664471864700317)]

In [20]:
# most dissimilar to scream
# Kelly's Heroes
kelly_imdb_id = 65938
kelly_movie_id = 3836

In [21]:
model.docvecs.similarity('{}.txt'.format(scr_imdb_id),'{}.txt'.format(kelly_imdb_id))

-0.04650007406425781

In [22]:
# species
sp_imdb_id = 114508
sp_movie_id = 196

# species 2
sp2_imdb_id = 120841
sp2_movie_id = 1862

In [23]:
model.docvecs.similarity('{}.txt'.format(sp_imdb_id),'{}.txt'.format(sp2_imdb_id))

0.68311800802591438

In [24]:
# similar movies to species
# species 2 is most similar
model.docvecs.most_similar(positive=['{}.txt'.format(sp_imdb_id)])

[('120841.txt', 0.6831179857254028),
 ('90583.txt', 0.6602948904037476),
 ('204626.txt', 0.6501474976539612),
 ('139239.txt', 0.6449568271636963),
 ('105226.txt', 0.6214965581893921),
 ('164052.txt', 0.6181344389915466),
 ('95179.txt', 0.6133553981781006),
 ('84783.txt', 0.6096972823143005),
 ('115710.txt', 0.6073347926139832),
 ('82533.txt', 0.6071861982345581)]

In [25]:
# disimilar movies to species
model.docvecs.most_similar(negative=['{}.txt'.format(sp_imdb_id)])

[('66206.txt', 0.07634155452251434),
 ('16630.txt', 0.0496206060051918),
 ('57546.txt', 0.04905722290277481),
 ('50825.txt', 0.04533801227807999),
 ('75232.txt', 0.040982794016599655),
 ('76137.txt', 0.03134193271398544),
 ('106226.txt', 0.029320180416107178),
 ('107501.txt', 0.020478807389736176),
 ('75704.txt', 0.018507108092308044),
 ('43274.txt', 0.011801136657595634)]

In [26]:
# most disimilar to species
# Patton
patton_imdb_id = 66206
patton_movie_id = 1272

In [27]:
model.docvecs.similarity('{}.txt'.format(sp_imdb_id),'{}.txt'.format(patton_imdb_id))

-0.076341565258958299

In [28]:
# star wars episode v
swv_imdb_id = 80684
swv_movie_id = 1196

# star wars episode vi
swvi_imdb_id = 86190
swvi_movie_id = 1210

In [29]:
model.docvecs.similarity('{}.txt'.format(swv_imdb_id),'{}.txt'.format(swvi_imdb_id))

0.92307505174504856

In [30]:
# similar movies to star wars episode v
# star wars episode vi is most similar
model.docvecs.most_similar(positive=['{}.txt'.format(swv_imdb_id)])

[('86190.txt', 0.9230749607086182),
 ('120915.txt', 0.8119794130325317),
 ('76759.txt', 0.8067136406898499),
 ('84827.txt', 0.7332901954650879),
 ('120738.txt', 0.7259986400604248),
 ('117731.txt', 0.6888055801391602),
 ('119707.txt', 0.6791511178016663),
 ('70909.txt', 0.6599252820014954),
 ('97368.txt', 0.6569679379463196),
 ('84315.txt', 0.6349420547485352)]

In [31]:
# disimilar movies to star wars episode v
model.docvecs.most_similar(negative=['{}.txt'.format(swv_imdb_id)])

[('145653.txt', 0.01300012692809105),
 ('94155.txt', 0.010185103863477707),
 ('217630.txt', 0.0010292734950780869),
 ('104797.txt', 0.0009782575070858002),
 ('90863.txt', -0.009507261216640472),
 ('37884.txt', -0.010835811495780945),
 ('91024.txt', -0.011850625276565552),
 ('97322.txt', -0.016994968056678772),
 ('166396.txt', -0.02006503939628601),
 ('118556.txt', -0.02777113951742649)]

In [32]:
# most disimilar to star wars
# Angela\'s Ashes
angela_imdb_id = 145653
angela_movie_id = 3179

In [33]:
model.docvecs.similarity('{}.txt'.format(swv_imdb_id),'{}.txt'.format(angela_imdb_id))

-0.013000124588363082

In [34]:
# toy story
ts_imdb_id = 114709
ts_movie_id = 1

# toy story 2
ts2_imdb_id = 120363
ts2_movie_id = 3114

In [35]:
model.docvecs.similarity('{}.txt'.format(ts_imdb_id),'{}.txt'.format(ts2_imdb_id))

0.75291341575190129

In [36]:
# similar movies to toy story
# toy story 2 is most similar
model.docvecs.most_similar(positive=['{}.txt'.format(ts_imdb_id)])

[('120363.txt', 0.7529133558273315),
 ('115433.txt', 0.6592516303062439),
 ('89961.txt', 0.6522800922393799),
 ('33563.txt', 0.6440500617027283),
 ('120913.txt', 0.6346996426582336),
 ('55254.txt', 0.6268846988677979),
 ('122718.txt', 0.6225907206535339),
 ('101329.txt', 0.618421196937561),
 ('96787.txt', 0.6112702488899231),
 ('84649.txt', 0.6037213802337646)]

In [37]:
# disimilar movies to toy story
model.docvecs.most_similar(negative=['{}.txt'.format(ts_imdb_id)])

[('101640.txt', 0.02567880228161812),
 ('112857.txt', 0.02182948589324951),
 ('118798.txt', 0.019348686560988426),
 ('74958.txt', 0.018655510619282722),
 ('112714.txt', 0.014902409166097641),
 ('109450.txt', 0.0139361172914505),
 ('48254.txt', 0.01313953846693039),
 ('123385.txt', 0.007183991372585297),
 ('96073.txt', 0.003403082489967346),
 ('119375.txt', -0.0002711638808250427)]

In [38]:
# most disimilar to toy story
# Raise the Red Lantern
lantern_imdb_id = 101640
lantern_movie_id = 1280

In [39]:
model.docvecs.similarity('{}.txt'.format(ts_imdb_id),'{}.txt'.format(lantern_imdb_id))

-0.025678801003992269

# Item Factor Space

In [40]:
def find_most_similar(i_id, items_map, item_factors, df_movies, asc=False):
    results = []
    sim = 0
    sim_movie_id = None
    for m_id, idx in items_map.iteritems():
        if idx != i_id:
            cs = cosine_sim(item_factors[i_id], item_factors[idx])
            results.append({'movie_id': m_id, 'sim': cs})
    
    df_results = pd.DataFrame(results).merge(df_movies, on='movie_id', how='inner')
    
    return df_results.sort_values('sim', ascending=asc).reset_index(drop=True)

df_movies = pd.read_csv('../data/ml-1m/processed/movies-enhanced.csv')
df_movies = df_movies[['movie_id', 'title', 'genres']]

def get_cosine_sim(path, movie_id1, movie_id2):
    loaded = dd.io.load(path)
    items_map = loaded['movies'] if 'movies' in loaded else loaded['items']# maps movie_id to 0-based index
    item_factors = loaded['params']['Q']
    
    iid1 = items_map[movie_id1]
    iid2 = items_map[movie_id2]
    return cosine_sim(item_factors[iid1], item_factors[iid2])

def get_avg_cosine_sim(paths):
    print "Free Willy - Free Willy 2 (similar in Doc2Vec):", np.mean([get_cosine_sim(p, fw_movie_id, fw2_movie_id) for p in paths])
    print "Free Willy - Mrs. Brown (dissimilar in Doc2Vec):", np.mean([get_cosine_sim(p, fw_movie_id, brown_movie_id) for p in paths])
    print
    
    print "Jurassic Park - Jurassic Park 2 (similar in Doc2Vec):",np.mean([get_cosine_sim(p, jp_movie_id, jp2_movie_id) for p in paths])
    print "Jurassic Park - Battling Butler (dissimilar in Doc2Vec):", np.mean([get_cosine_sim(p, jp_movie_id, butler_movie_id) for p in paths])
    print
    
    print "Scream - Scream 2 (similar in Doc2Vec):", np.mean([get_cosine_sim(p, scr_movie_id, scr2_movie_id) for p in paths])
    print "Scream - Kelly's Heroes (dissimilar in Doc2Vec):",np.mean([get_cosine_sim(p, scr_movie_id, kelly_movie_id) for p in paths])
    print
    
    print "Species - Species II (similar in Doc2Vec):", np.mean([get_cosine_sim(p, sp_movie_id, sp2_movie_id) for p in paths])
    print "Species - Patton (dissimilar in Doc2Vec):",np.mean([get_cosine_sim(p, sp_movie_id, patton_movie_id) for p in paths])
    print
    
    print "Star Wars V - Star Wars VI (similar in Doc2Vec):", np.mean([get_cosine_sim(p, swv_movie_id, swvi_movie_id) for p in paths])
    print "Star Wars V - Angela's Ashes (dissimilar in Doc2Vec):", np.mean([get_cosine_sim(p, swv_movie_id, angela_movie_id) for p in paths])
    print
    
    print "Toy Story - Toy Story 2 (similar in Doc2Vec):",np.mean([get_cosine_sim(p, ts_movie_id, ts2_movie_id) for p in paths])
    print "Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec):",np.mean([get_cosine_sim(p, ts_movie_id, lantern_movie_id) for p in paths])

def print_similarities(path, nb_similar=10):
    loaded = dd.io.load(path)
    items_map = loaded['movies'] if 'movies' in loaded else loaded['items']# maps movie_id to 0-based index
    item_factors = loaded['params']['Q']

    fw_id = items_map[fw_movie_id]
    fw2_id = items_map[fw2_movie_id]
    brown_id = items_map[brown_movie_id]
    print "Free Willy - Free Willy 2 (similar in Doc2Vec):", cosine_sim(item_factors[fw_id], item_factors[fw2_id])
    print "Free Willy - Mrs. Brown (dissimilar in Doc2Vec):", cosine_sim(item_factors[fw_id], item_factors[brown_id])
    print "most similar to Free Willy"
    df_fw_sim = find_most_similar(fw_id, items_map, item_factors, df_movies)
    display(df_fw_sim[:nb_similar])
    print

    jp_id = items_map[jp_movie_id]
    jp2_id = items_map[jp2_movie_id]
    butler_id = items_map[butler_movie_id]
    print "Jurassic Park - Jurassic Park 2 (similar in Doc2Vec):", cosine_sim(item_factors[jp_id], item_factors[jp2_id])
    print "Jurassic Park - Battling Butler (dissimilar in Doc2Vec):", cosine_sim(item_factors[jp_id], item_factors[butler_id])
    print "most similar to Jurassic Park"
    df_jp_sim = find_most_similar(jp_id, items_map, item_factors, df_movies)
    display(df_jp_sim[:nb_similar])
    print

    scr_id = items_map[scr_movie_id]
    scr2_id = items_map[scr2_movie_id]
    kelly_id = items_map[kelly_movie_id]
    print "Scream - Scream 2 (similar in Doc2Vec):", cosine_sim(item_factors[scr_id], item_factors[scr2_id])
    print "Scream - Kelly's Heroes (dissimilar in Doc2Vec):", cosine_sim(item_factors[scr_id], item_factors[kelly_id])
    print "most similar to Scream"
    df_scr_sim = find_most_similar(scr_id, items_map, item_factors, df_movies)
    display(df_scr_sim[:nb_similar])
    print

    sp_id = items_map[sp_movie_id]
    sp2_id = items_map[sp2_movie_id]
    patton_id = items_map[patton_movie_id]
    print "Species - Species II (similar in Doc2Vec):", cosine_sim(item_factors[sp_id], item_factors[sp2_id])
    print "Species - Patton (dissimilar in Doc2Vec):", cosine_sim(item_factors[sp_id], item_factors[patton_id])
    print "most similar to Species"
    df_sp_sim = find_most_similar(sp_id, items_map, item_factors, df_movies)
    display(df_sp_sim[:nb_similar])
    print

    swv_id = items_map[swv_movie_id]
    swvi_id = items_map[swvi_movie_id]
    angela_id = items_map[angela_movie_id]
    print "Star Wars V - Star Wars VI (similar in Doc2Vec):", cosine_sim(item_factors[swv_id], item_factors[swvi_id])
    print "Star Wars V - Angela's Ashes (dissimilar in Doc2Vec):", cosine_sim(item_factors[swv_id], item_factors[angela_id])
    print "most similar to Star Wars V"
    df_swv_sim = find_most_similar(swv_id, items_map, item_factors, df_movies)
    display(df_swv_sim[:nb_similar])
    print

    ts_id = items_map[ts_movie_id]
    ts2_id = items_map[ts2_movie_id]
    lantern_id = items_map[lantern_movie_id]
    print "Toy Story - Toy Story 2 (similar in Doc2Vec):", cosine_sim(item_factors[ts_id], item_factors[ts2_id])
    print "Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec):", cosine_sim(item_factors[ts_id], item_factors[lantern_id])
    print "most similar to Toy Story"
    df_ts_sim = find_most_similar(ts_id, items_map, item_factors, df_movies)
    display(df_ts_sim[:nb_similar])

## MPCFs (No side information)

In [41]:
path = '../models/mpcf/2016-05-27_20.57.43_no-si_ml-1m_e20_tt-0.7_task-22.h5'
print_similarities(path)

Free Willy - Free Willy 2 (similar in Doc2Vec): 0.692309544564
Free Willy - Mrs. Brown (dissimilar in Doc2Vec): -0.0310242412934
most similar to Free Willy


Unnamed: 0,movie_id,sim,title,genres
0,169,0.69231,Free Willy 2: The Adventure Home,"[""Adventure"",""Children"",""Drama""]"
1,1359,0.612439,Jingle All the Way,"[""Children"",""Comedy""]"
2,2429,0.58727,Mighty Joe Young,"[""Action"",""Adventure"",""Drama"",""Fantasy"",""Thril..."
3,1595,0.564771,Free Willy 3: The Rescue,"[""Adventure"",""Children"",""Drama""]"
4,1015,0.522669,Homeward Bound: The Incredible Journey,"[""Adventure"",""Children"",""Drama""]"
5,158,0.511294,Casper,"[""Adventure"",""Children""]"
6,711,0.510071,Flipper,"[""Adventure"",""Children""]"
7,3672,0.494547,Benji,"[""Adventure"",""Children""]"
8,2,0.458505,Jumanji,"[""Adventure"",""Children"",""Fantasy""]"
9,2162,0.457828,"NeverEnding Story II: The Next Chapter, The","[""Adventure"",""Children"",""Fantasy""]"



Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.583736976915
Jurassic Park - Battling Butler (dissimilar in Doc2Vec): -0.121387548439
most similar to Jurassic Park


Unnamed: 0,movie_id,sim,title,genres
0,1580,0.806409,Men in Black,"[""Action"",""Comedy"",""Sci-Fi""]"
1,589,0.657228,Terminator 2: Judgment Day,"[""Action"",""Sci-Fi""]"
2,2916,0.624631,Total Recall,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
3,1544,0.583737,"Lost World: Jurassic Park, The","[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
4,2571,0.535067,"Matrix, The","[""Action"",""Sci-Fi"",""Thriller""]"
5,1527,0.520407,"Fifth Element, The","[""Action"",""Adventure"",""Comedy"",""Sci-Fi""]"
6,1573,0.50068,Face/Off,"[""Action"",""Crime"",""Drama"",""Thriller""]"
7,316,0.496416,Stargate,"[""Action"",""Adventure"",""Sci-Fi""]"
8,110,0.473923,Braveheart,"[""Action"",""Drama"",""War""]"
9,1356,0.459043,Star Trek: First Contact,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"



Scream - Scream 2 (similar in Doc2Vec): 0.686800923547
Scream - Kelly's Heroes (dissimilar in Doc2Vec): -0.197853290897
most similar to Scream


Unnamed: 0,movie_id,sim,title,genres
0,1717,0.686801,Scream 2,"[""Comedy"",""Horror"",""Mystery"",""Thriller""]"
1,724,0.568954,"Craft, The","[""Drama"",""Fantasy"",""Horror"",""Thriller""]"
2,3499,0.567875,Misery,"[""Drama"",""Horror"",""Thriller""]"
3,1339,0.565468,Dracula,"[""Fantasy"",""Horror"",""Romance"",""Thriller""]"
4,1982,0.540004,Halloween,"[""Horror""]"
5,1644,0.537236,I Know What You Did Last Summer,"[""Horror"",""Mystery"",""Thriller""]"
6,1645,0.534557,"Devil\'s Advocate, The","[""Drama"",""Mystery"",""Thriller""]"
7,1342,0.532611,Candyman,"[""Horror"",""Thriller""]"
8,3476,0.529911,Jacob\'s Ladder,"[""Horror"",""Mystery""]"
9,2338,0.519681,I Still Know What You Did Last Summer,"[""Horror"",""Mystery"",""Thriller""]"



Species - Species II (similar in Doc2Vec): 0.616964580935
Species - Patton (dissimilar in Doc2Vec): -0.0549698772233
most similar to Species


Unnamed: 0,movie_id,sim,title,genres
0,1603,0.633995,Mimic,"[""Horror"",""Sci-Fi"",""Thriller""]"
1,1862,0.616965,Species II,"[""Horror"",""Sci-Fi""]"
2,1690,0.595125,Alien: Resurrection,"[""Action"",""Horror"",""Sci-Fi""]"
3,185,0.586818,"Net, The","[""Action"",""Crime"",""Thriller""]"
4,880,0.563946,"Island of Dr. Moreau, The","[""Sci-Fi"",""Thriller""]"
5,172,0.561998,Johnny Mnemonic,"[""Action"",""Sci-Fi"",""Thriller""]"
6,1779,0.558978,Sphere,"[""Sci-Fi"",""Thriller""]"
7,173,0.553721,Judge Dredd,"[""Action"",""Crime"",""Sci-Fi""]"
8,332,0.544208,Village of the Damned,"[""Horror"",""Sci-Fi""]"
9,442,0.544054,Demolition Man,"[""Action"",""Adventure"",""Sci-Fi""]"



Star Wars V - Star Wars VI (similar in Doc2Vec): 0.690062467715
Star Wars V - Angela's Ashes (dissimilar in Doc2Vec): -0.0734449486636
most similar to Star Wars V


Unnamed: 0,movie_id,sim,title,genres
0,260,0.756724,Star Wars: Episode IV - A New Hope,"[""Action"",""Adventure"",""Sci-Fi""]"
1,1240,0.706137,"Terminator, The","[""Action"",""Sci-Fi"",""Thriller""]"
2,1210,0.690062,Star Wars: Episode VI - Return of the Jedi,"[""Action"",""Adventure"",""Sci-Fi""]"
3,1198,0.649632,Raiders of the Lost Ark,"[""Action"",""Adventure""]"
4,1291,0.643633,Indiana Jones and the Last Crusade,"[""Action"",""Adventure""]"
5,1200,0.627442,Aliens,"[""Action"",""Adventure"",""Horror"",""Sci-Fi""]"
6,1214,0.484115,Alien,"[""Horror"",""Sci-Fi""]"
7,2571,0.483702,"Matrix, The","[""Action"",""Sci-Fi"",""Thriller""]"
8,1097,0.478654,E.T. the Extra-Terrestrial,"[""Children"",""Drama"",""Sci-Fi""]"
9,1036,0.473915,Die Hard,"[""Action"",""Crime"",""Thriller""]"



Toy Story - Toy Story 2 (similar in Doc2Vec): 0.669595461581
Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec): -0.0127149157715
most similar to Toy Story


Unnamed: 0,movie_id,sim,title,genres
0,3114,0.669595,Toy Story 2,"[""Adventure"",""Animation"",""Children"",""Comedy"",""..."
1,34,0.599774,Babe,"[""Children"",""Drama""]"
2,1265,0.599761,Groundhog Day,"[""Comedy"",""Fantasy"",""Romance""]"
3,1923,0.536572,There\'s Something About Mary,"[""Comedy"",""Romance""]"
4,2355,0.53088,"Bug\'s Life, A","[""Adventure"",""Animation"",""Children"",""Comedy""]"
5,588,0.523862,Aladdin,"[""Adventure"",""Animation"",""Children"",""Comedy"",""..."
6,2321,0.517021,Pleasantville,"[""Comedy"",""Drama"",""Fantasy""]"
7,1641,0.512097,"Full Monty, The","[""Comedy"",""Drama""]"
8,3253,0.485353,Wayne\'s World,"[""Comedy""]"
9,1517,0.450316,Austin Powers: International Man of Mystery,"[""Action"",""Adventure"",""Comedy""]"


In [42]:
paths = [
'../models/mpcf/2016-05-27_20.57.43_no-si_ml-1m_e20_tt-0.7_task-22.h5',
'../models/mpcf/2016-06-22_19.55.46_no-si_ml-1m_e20_tt-0.7_train-2_task-0.h5',
'../models/mpcf/2016-06-22_19.56.10_no-si_ml-1m_e20_tt-0.7_train-5_task-0.h5',
'../models/mpcf/2016-06-22_19.56.50_no-si_ml-1m_e20_tt-0.7_train-3_task-0.h5',
'../models/mpcf/2016-06-22_19.59.20_no-si_ml-1m_e20_tt-0.7_train-4_task-0.h5',
]
get_avg_cosine_sim(paths)

Free Willy - Free Willy 2 (similar in Doc2Vec): 0.691529941656
Free Willy - Mrs. Brown (dissimilar in Doc2Vec): -0.0366433348194

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.541842436216
Jurassic Park - Battling Butler (dissimilar in Doc2Vec): -0.121830731486

Scream - Scream 2 (similar in Doc2Vec): 0.70155599718
Scream - Kelly's Heroes (dissimilar in Doc2Vec): -0.138500868215

Species - Species II (similar in Doc2Vec): 0.664088985129
Species - Patton (dissimilar in Doc2Vec): -0.0282791287545

Star Wars V - Star Wars VI (similar in Doc2Vec): 0.684562985778
Star Wars V - Angela's Ashes (dissimilar in Doc2Vec): -0.143269204894

Toy Story - Toy Story 2 (similar in Doc2Vec): 0.654633650904
Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec): -0.0255728156526


## MPCFs-SI (with side information)

In [43]:
path = '../models/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0.h5'
print_similarities(path)

Free Willy - Free Willy 2 (similar in Doc2Vec): 0.705107367023
Free Willy - Mrs. Brown (dissimilar in Doc2Vec): -0.0675717988178
most similar to Free Willy


Unnamed: 0,movie_id,sim,title,genres
0,169,0.705107,Free Willy 2: The Adventure Home,"[""Adventure"",""Children"",""Drama""]"
1,1015,0.625628,Homeward Bound: The Incredible Journey,"[""Adventure"",""Children"",""Drama""]"
2,158,0.605429,Casper,"[""Adventure"",""Children""]"
3,1595,0.589972,Free Willy 3: The Rescue,"[""Adventure"",""Children"",""Drama""]"
4,711,0.571554,Flipper,"[""Adventure"",""Children""]"
5,1359,0.546241,Jingle All the Way,"[""Children"",""Comedy""]"
6,2429,0.541455,Mighty Joe Young,"[""Action"",""Adventure"",""Drama"",""Fantasy"",""Thril..."
7,374,0.482522,Richie Rich,"[""Children"",""Comedy""]"
8,362,0.464526,"Jungle Book, The","[""Adventure"",""Children"",""Romance""]"
9,3672,0.452835,Benji,"[""Adventure"",""Children""]"



Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.617076550583
Jurassic Park - Battling Butler (dissimilar in Doc2Vec): -0.190130878462
most similar to Jurassic Park


Unnamed: 0,movie_id,sim,title,genres
0,1580,0.841416,Men in Black,"[""Action"",""Comedy"",""Sci-Fi""]"
1,589,0.736652,Terminator 2: Judgment Day,"[""Action"",""Sci-Fi""]"
2,2916,0.641961,Total Recall,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
3,1544,0.617077,"Lost World: Jurassic Park, The","[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
4,2571,0.597141,"Matrix, The","[""Action"",""Sci-Fi"",""Thriller""]"
5,648,0.540267,Mission: Impossible,"[""Action"",""Adventure"",""Mystery"",""Thriller""]"
6,1356,0.53045,Star Trek: First Contact,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
7,380,0.51562,True Lies,"[""Action"",""Adventure"",""Comedy"",""Romance"",""Thri..."
8,1573,0.496589,Face/Off,"[""Action"",""Crime"",""Drama"",""Thriller""]"
9,110,0.490036,Braveheart,"[""Action"",""Drama"",""War""]"



Scream - Scream 2 (similar in Doc2Vec): 0.723297044654
Scream - Kelly's Heroes (dissimilar in Doc2Vec): -0.205559453259
most similar to Scream


Unnamed: 0,movie_id,sim,title,genres
0,1717,0.723297,Scream 2,"[""Comedy"",""Horror"",""Mystery"",""Thriller""]"
1,3499,0.647078,Misery,"[""Drama"",""Horror"",""Thriller""]"
2,1339,0.601797,Dracula,"[""Fantasy"",""Horror"",""Romance"",""Thriller""]"
3,1644,0.596543,I Know What You Did Last Summer,"[""Horror"",""Mystery"",""Thriller""]"
4,1982,0.554073,Halloween,"[""Horror""]"
5,1645,0.552528,"Devil\'s Advocate, The","[""Drama"",""Mystery"",""Thriller""]"
6,253,0.551472,Interview with the Vampire: The Vampire Chroni...,"[""Drama"",""Horror""]"
7,3476,0.549237,Jacob\'s Ladder,"[""Horror"",""Mystery""]"
8,2338,0.548721,I Still Know What You Did Last Summer,"[""Horror"",""Mystery"",""Thriller""]"
9,2120,0.541995,Needful Things,"[""Drama"",""Horror""]"



Species - Species II (similar in Doc2Vec): 0.70643524738
Species - Patton (dissimilar in Doc2Vec): -0.0306956746373
most similar to Species


Unnamed: 0,movie_id,sim,title,genres
0,1603,0.722097,Mimic,"[""Horror"",""Sci-Fi"",""Thriller""]"
1,1862,0.706435,Species II,"[""Horror"",""Sci-Fi""]"
2,1690,0.664487,Alien: Resurrection,"[""Action"",""Horror"",""Sci-Fi""]"
3,1590,0.664322,Event Horizon,"[""Horror"",""Sci-Fi"",""Thriller""]"
4,172,0.637178,Johnny Mnemonic,"[""Action"",""Sci-Fi"",""Thriller""]"
5,1779,0.621771,Sphere,"[""Sci-Fi"",""Thriller""]"
6,173,0.619452,Judge Dredd,"[""Action"",""Crime"",""Sci-Fi""]"
7,442,0.619162,Demolition Man,"[""Action"",""Adventure"",""Sci-Fi""]"
8,332,0.617267,Village of the Damned,"[""Horror"",""Sci-Fi""]"
9,880,0.608649,"Island of Dr. Moreau, The","[""Sci-Fi"",""Thriller""]"



Star Wars V - Star Wars VI (similar in Doc2Vec): 0.722052519549
Star Wars V - Angela's Ashes (dissimilar in Doc2Vec): -0.16142731848
most similar to Star Wars V


Unnamed: 0,movie_id,sim,title,genres
0,260,0.814265,Star Wars: Episode IV - A New Hope,"[""Action"",""Adventure"",""Sci-Fi""]"
1,1210,0.722053,Star Wars: Episode VI - Return of the Jedi,"[""Action"",""Adventure"",""Sci-Fi""]"
2,1240,0.70944,"Terminator, The","[""Action"",""Sci-Fi"",""Thriller""]"
3,1198,0.685031,Raiders of the Lost Ark,"[""Action"",""Adventure""]"
4,1200,0.616765,Aliens,"[""Action"",""Adventure"",""Horror"",""Sci-Fi""]"
5,1291,0.615452,Indiana Jones and the Last Crusade,"[""Action"",""Adventure""]"
6,1097,0.531153,E.T. the Extra-Terrestrial,"[""Children"",""Drama"",""Sci-Fi""]"
7,1197,0.517411,"Princess Bride, The","[""Action"",""Adventure"",""Comedy"",""Fantasy"",""Roma..."
8,541,0.510899,Blade Runner,"[""Action"",""Sci-Fi"",""Thriller""]"
9,1214,0.501877,Alien,"[""Horror"",""Sci-Fi""]"



Toy Story - Toy Story 2 (similar in Doc2Vec): 0.667803907944
Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec): -0.0505249450573
most similar to Toy Story


Unnamed: 0,movie_id,sim,title,genres
0,3114,0.667804,Toy Story 2,"[""Adventure"",""Animation"",""Children"",""Comedy"",""..."
1,1265,0.616815,Groundhog Day,"[""Comedy"",""Fantasy"",""Romance""]"
2,1923,0.563258,There\'s Something About Mary,"[""Comedy"",""Romance""]"
3,3253,0.557426,Wayne\'s World,"[""Comedy""]"
4,2321,0.554332,Pleasantville,"[""Comedy"",""Drama"",""Fantasy""]"
5,34,0.553615,Babe,"[""Children"",""Drama""]"
6,588,0.534896,Aladdin,"[""Adventure"",""Animation"",""Children"",""Comedy"",""..."
7,1641,0.522546,"Full Monty, The","[""Comedy"",""Drama""]"
8,2355,0.520408,"Bug\'s Life, A","[""Adventure"",""Animation"",""Children"",""Comedy""]"
9,364,0.490792,"Lion King, The","[""Adventure"",""Animation"",""Children"",""Drama"",""M..."


In [44]:
paths = [
'../models/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0.h5',
'../models/mpcf-si/2016-06-30_07.38.02_si_ml-1m_e20_tt-0.7_train-2.h5',
'../models/mpcf-si/2016-06-30_07.32.45_si_ml-1m_e20_tt-0.7_train-3.h5',
'../models/mpcf-si/2016-06-30_07.36.22_si_ml-1m_e20_tt-0.7_train-4.h5',
'../models/mpcf-si/2016-06-30_07.35.21_si_ml-1m_e20_tt-0.7_train-5.h5',
]
get_avg_cosine_sim(paths)

Free Willy - Free Willy 2 (similar in Doc2Vec): 0.732944583476
Free Willy - Mrs. Brown (dissimilar in Doc2Vec): -0.0926604879151

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.621436244914
Jurassic Park - Battling Butler (dissimilar in Doc2Vec): -0.180504018778

Scream - Scream 2 (similar in Doc2Vec): 0.736175015923
Scream - Kelly's Heroes (dissimilar in Doc2Vec): -0.197135526548

Species - Species II (similar in Doc2Vec): 0.701474777661
Species - Patton (dissimilar in Doc2Vec): -0.0485456255288

Star Wars V - Star Wars VI (similar in Doc2Vec): 0.765015977461
Star Wars V - Angela's Ashes (dissimilar in Doc2Vec): -0.17324603141

Toy Story - Toy Story 2 (similar in Doc2Vec): 0.68379751577
Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec): -0.0490065116925


## MFNN

In [45]:
path = '../models/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0.h5'
print_similarities(path)

Free Willy - Free Willy 2 (similar in Doc2Vec): 0.828474725234
Free Willy - Mrs. Brown (dissimilar in Doc2Vec): 0.0437503275236
most similar to Free Willy


Unnamed: 0,movie_id,sim,title,genres
0,169,0.828475,Free Willy 2: The Adventure Home,"[""Adventure"",""Children"",""Drama""]"
1,1015,0.786158,Homeward Bound: The Incredible Journey,"[""Adventure"",""Children"",""Drama""]"
2,711,0.750752,Flipper,"[""Adventure"",""Children""]"
3,158,0.744557,Casper,"[""Adventure"",""Children""]"
4,1595,0.74103,Free Willy 3: The Rescue,"[""Adventure"",""Children"",""Drama""]"
5,1359,0.728484,Jingle All the Way,"[""Children"",""Comedy""]"
6,3672,0.70598,Benji,"[""Adventure"",""Children""]"
7,2429,0.702793,Mighty Joe Young,"[""Action"",""Adventure"",""Drama"",""Fantasy"",""Thril..."
8,8,0.699666,Tom and Huck,"[""Adventure"",""Children""]"
9,885,0.682233,Bogus,"[""Children"",""Drama"",""Fantasy""]"



Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.733450617764
Jurassic Park - Battling Butler (dissimilar in Doc2Vec): 0.0611703494449
most similar to Jurassic Park


Unnamed: 0,movie_id,sim,title,genres
0,1580,0.907444,Men in Black,"[""Action"",""Comedy"",""Sci-Fi""]"
1,589,0.800509,Terminator 2: Judgment Day,"[""Action"",""Sci-Fi""]"
2,2916,0.753964,Total Recall,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
3,1544,0.733451,"Lost World: Jurassic Park, The","[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
4,1573,0.696839,Face/Off,"[""Action"",""Crime"",""Drama"",""Thriller""]"
5,1356,0.677492,Star Trek: First Contact,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"
6,1527,0.675665,"Fifth Element, The","[""Action"",""Adventure"",""Comedy"",""Sci-Fi""]"
7,2571,0.669747,"Matrix, The","[""Action"",""Sci-Fi"",""Thriller""]"
8,648,0.663822,Mission: Impossible,"[""Action"",""Adventure"",""Mystery"",""Thriller""]"
9,780,0.65053,Independence Day,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"



Scream - Scream 2 (similar in Doc2Vec): 0.807934065066
Scream - Kelly's Heroes (dissimilar in Doc2Vec): -0.0952893964007
most similar to Scream


Unnamed: 0,movie_id,sim,title,genres
0,1717,0.807934,Scream 2,"[""Comedy"",""Horror"",""Mystery"",""Thriller""]"
1,3499,0.784118,Misery,"[""Drama"",""Horror"",""Thriller""]"
2,1644,0.741383,I Know What You Did Last Summer,"[""Horror"",""Mystery"",""Thriller""]"
3,3476,0.737348,Jacob\'s Ladder,"[""Horror"",""Mystery""]"
4,1339,0.733826,Dracula,"[""Fantasy"",""Horror"",""Romance"",""Thriller""]"
5,1982,0.727594,Halloween,"[""Horror""]"
6,724,0.721092,"Craft, The","[""Drama"",""Fantasy"",""Horror"",""Thriller""]"
7,2120,0.719413,Needful Things,"[""Drama"",""Horror""]"
8,1645,0.710941,"Devil\'s Advocate, The","[""Drama"",""Mystery"",""Thriller""]"
9,253,0.695502,Interview with the Vampire: The Vampire Chroni...,"[""Drama"",""Horror""]"



Species - Species II (similar in Doc2Vec): 0.786596563854
Species - Patton (dissimilar in Doc2Vec): 0.127275866759
most similar to Species


Unnamed: 0,movie_id,sim,title,genres
0,1603,0.852905,Mimic,"[""Horror"",""Sci-Fi"",""Thriller""]"
1,880,0.811985,"Island of Dr. Moreau, The","[""Sci-Fi"",""Thriller""]"
2,1779,0.794934,Sphere,"[""Sci-Fi"",""Thriller""]"
3,1862,0.786597,Species II,"[""Horror"",""Sci-Fi""]"
4,332,0.785775,Village of the Damned,"[""Horror"",""Sci-Fi""]"
5,2448,0.780651,Virus,"[""Horror"",""Sci-Fi""]"
6,512,0.775712,"Puppet Masters, The","[""Horror"",""Sci-Fi""]"
7,1590,0.774781,Event Horizon,"[""Horror"",""Sci-Fi"",""Thriller""]"
8,1690,0.773353,Alien: Resurrection,"[""Action"",""Horror"",""Sci-Fi""]"
9,76,0.764494,Screamers,"[""Action"",""Sci-Fi"",""Thriller""]"



Star Wars V - Star Wars VI (similar in Doc2Vec): 0.847277554403
Star Wars V - Angela's Ashes (dissimilar in Doc2Vec): -0.123863645706
most similar to Star Wars V


Unnamed: 0,movie_id,sim,title,genres
0,260,0.901147,Star Wars: Episode IV - A New Hope,"[""Action"",""Adventure"",""Sci-Fi""]"
1,1210,0.847278,Star Wars: Episode VI - Return of the Jedi,"[""Action"",""Adventure"",""Sci-Fi""]"
2,1240,0.843912,"Terminator, The","[""Action"",""Sci-Fi"",""Thriller""]"
3,1198,0.838579,Raiders of the Lost Ark,"[""Action"",""Adventure""]"
4,1200,0.792017,Aliens,"[""Action"",""Adventure"",""Horror"",""Sci-Fi""]"
5,1291,0.789889,Indiana Jones and the Last Crusade,"[""Action"",""Adventure""]"
6,1097,0.722538,E.T. the Extra-Terrestrial,"[""Children"",""Drama"",""Sci-Fi""]"
7,1036,0.691353,Die Hard,"[""Action"",""Crime"",""Thriller""]"
8,1270,0.677533,Back to the Future,"[""Adventure"",""Comedy"",""Sci-Fi""]"
9,1374,0.674408,Star Trek II: The Wrath of Khan,"[""Action"",""Adventure"",""Sci-Fi"",""Thriller""]"



Toy Story - Toy Story 2 (similar in Doc2Vec): 0.803792550396
Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec): 0.0929723063156
most similar to Toy Story


Unnamed: 0,movie_id,sim,title,genres
0,3114,0.803793,Toy Story 2,"[""Adventure"",""Animation"",""Children"",""Comedy"",""..."
1,34,0.786402,Babe,"[""Children"",""Drama""]"
2,1265,0.749167,Groundhog Day,"[""Comedy"",""Fantasy"",""Romance""]"
3,588,0.709012,Aladdin,"[""Adventure"",""Animation"",""Children"",""Comedy"",""..."
4,2321,0.70297,Pleasantville,"[""Comedy"",""Drama"",""Fantasy""]"
5,1923,0.670119,There\'s Something About Mary,"[""Comedy"",""Romance""]"
6,1641,0.667783,"Full Monty, The","[""Comedy"",""Drama""]"
7,3253,0.643787,Wayne\'s World,"[""Comedy""]"
8,2384,0.642325,Babe: Pig in the City,"[""Adventure"",""Children"",""Drama""]"
9,2355,0.634753,"Bug\'s Life, A","[""Adventure"",""Animation"",""Children"",""Comedy""]"


In [47]:
paths = [
'../models/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0.h5',
'../models/mfnn/2016-07-07_19.50.15_mfnn_ml-1m_e10_tt-0.7_train-2.h5',
'../models/mfnn/2016-07-07_19.49.55_mfnn_ml-1m_e10_tt-0.7_train-3.h5',
'../models/mfnn/2016-07-07_19.51.28_mfnn_ml-1m_e10_tt-0.7_train-4.h5',
'../models/mfnn/2016-07-07_19.51.20_mfnn_ml-1m_e10_tt-0.7_train-5.h5',
]
get_avg_cosine_sim(paths)

 Free Willy - Free Willy 2 (similar in Doc2Vec): 0.830533277952
Free Willy - Mrs. Brown (dissimilar in Doc2Vec): 0.0976521464148

Jurassic Park - Jurassic Park 2 (similar in Doc2Vec): 0.743970298283
Jurassic Park - Battling Butler (dissimilar in Doc2Vec): -0.00828452237641

Scream - Scream 2 (similar in Doc2Vec): 0.83249561596
Scream - Kelly's Heroes (dissimilar in Doc2Vec): -0.0867792155383

Species - Species II (similar in Doc2Vec): 0.820342021073
Species - Patton (dissimilar in Doc2Vec): 0.0941959938555

Star Wars V - Star Wars VI (similar in Doc2Vec): 0.871660045618
Star Wars V - Angela's Ashes (dissimilar in Doc2Vec): -0.118271873623

Toy Story - Toy Story 2 (similar in Doc2Vec): 0.792862843221
Toy Story - Raise the Red Lantern (dissimilar in Doc2Vec): 0.0592982840202
