# Predicting Preference Space Neighbours Using Content Space Neighbours

In [53]:
import pandas as pd
import numpy as np
import pickle

with open('data/output/mv2wiki.pickle', 'rb') as f:
    mv2wiki = pickle.load(f)
    
with open('data/output/content_vectors.pickle', 'rb') as f:
    cvecs = pickle.load(f)
    
with open('data/output/content_titles.pickle', 'rb') as f:
    ctitles = pickle.load(f)
    
with open('data/output/pref_vectors.pickle', 'rb') as f:
    pvecs = pickle.load(f)
    
with open('data/output/pref_movieids.pickle', 'rb') as f:
    pmovieids = pickle.load(f)

In [54]:
movies = pd.read_csv("data/ml-20m/movies.csv")
ml_titlemap = dict(zip(movies.movieId, movies.title))
ptitles = [ml_titlemap[mid].decode('utf-8') for mid in pmovieids]

In [55]:
from sklearn.neighbors import NearestNeighbors
cnbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(cvecs)
pnbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(pvecs)

## C-neighbourhood Vs P-neighbourhood 

Let's check the neighbourhoods of the movie "Transformers: Dark of the Moon" 

In [56]:
pidx = ptitles.index('Transformers: Dark of the Moon (2011)')
cidx = mv2wiki[pidx]

d, cnbr_indices = cnbrs.kneighbors(cvecs[cidx])
print "Content space neighbours: \n-------------------------"
print '\n'.join([ctitles[i] for i in cnbr_indices[0]])

d, pnbr_indices = pnbrs.kneighbors(pvecs[pidx])
print "\nPreference space neighbours: \n----------------------------"
print '\n'.join([ptitles[i] for i in pnbr_indices[0]])

Content space neighbours: 
-------------------------
Transformers: Dark of the Moon (2011)
Battleship (2012)
The Untold (2002)
Yakuza Weapon (2011)
Kaizoku Sentai Gokaiger the Movie: The Flying Ghost Ship (2011)
John Carter (2012)
Prometheus (2012)
Ironclad (2011)
Kamen Rider Fourze the Movie: Space, Here We Come! (2012)
Oblivion (2013)
Tensou Sentai Goseiger vs. Shinkenger: Epic on Ginmaku (2011)
Kamen Rider × Super Sentai × Space Sheriff: Super Hero Taisen Z (2013)
Kaizoku Sentai Gokaiger vs. Space Sheriff Gavan: The Movie (2012)
Battle of Los Angeles (2011)
Apollo 18 (2011)
Hunter Prey (2010)
Race to Witch Mountain (2009)
The Divine Weapon (2008)
The Pirates! In an Adventure with Scientists! (2012)
Red Dawn (2012)

Preference space neighbours: 
----------------------------
Transformers: Dark of the Moon (2011)
Transformers: Revenge of the Fallen (2009)
How to Make an American Quilt (1995)
Big One, The (1997)
Oliver! (1968)
Sexual Life of the Belgians, The (Vie sexuelle des Belges 19



# Movies with only content space representation

In [57]:
wiki2mv = {v:k for k,v in mv2wiki.items()}

In [58]:
cidx = ctitles.index('Night Train to Lisbon (2013)')
wiki2mv[cidx]

KeyError: 456

This is a movie for which we don't have a preference space representation.

In [59]:
d, cnbr_indices = cnbrs.kneighbors(cvecs[cidx])
pnbr_vecs = []
for i in cnbr_indices[0]:
    pidx = wiki2mv.get(i, None)
    if(pidx is not None):
        print "a neighbour is in preference space"
        pnbr_vecs.append(pvecs[pidx])

a neighbour is in preference space
a neighbour is in preference space




In [69]:
pflat = []
for ps in pnbr_indices:
    for p in ps:
        pflat += [p]

In [71]:
print "Content space neighbours: \n-------------------------"
print '\n'.join([ctitles[i] for i in cnbr_indices[0]])

d, pnbr_indices = pnbrs.kneighbors(pnbr_vecs)
print "\nPredicted preference space neighbours: \n----------------------------"
print '\n'.join([ptitles[i] for i in pflat])

Content space neighbours: 
-------------------------
Night Train to Lisbon (2013)
Scenic Route (2013)
Sri Ramakrishna Darshanam (2012)
Siam–Burma Death Railway (2014)
Street Society (2014)
Pilla Nuvvu Leni Jeevitam (2014)
Irukku Aana Illai (2014)
Legend (2014)
School Dance (2014)
Kappal (2014)
Benvenuti al Nord (2012)
Whiplash (2014)
Antisocial (2013)
Like Sunday, Like Rain (2014)
Lessons in Forgetting (2012)
Super Nani (2014)
Adhisaya Ulagam (2012)
22 Jump Street (2014)
The Last: Naruto the Movie (2014)
The Best Offer (2013)

Predicted preference space neighbours: 
----------------------------
Whiplash (2014)
Warrior (2011)
Persona (1966)
The Count of Monte Cristo (2002)
Seven Pounds (2008)
Hobbit: The Desolation of Smaug, The (2013)
Now You See Me (2013)
X-Men: Days of Future Past (2014)
Fighter, The (2010)
Legend of Bagger Vance, The (2000)
Boy in the Striped Pajamas, The (Boy in the Striped Pyjamas, The) (2008)
Apocalypto (2006)
Guardians of the Galaxy (2014)
Dallas Buyers Club (20