In [4]:
import numpy as np
from numpy import genfromtxt
import numpy.ma as ma
import pandas as pd
import csv
import pickle
from fuzzywuzzy import fuzz



In [5]:
with open('movie_to_idx.pickle', 'rb') as f:
    # Dump the dictionary into the file using pickle
    movie_to_idx = pickle.load(f)

In [6]:
with open('m_dist.pickle', 'rb') as f:
    # Dump the dictionary into the file using pickle
    m_dist = pickle.load(f)

In [7]:
with open('movie_dict.pickle', 'rb') as f:
    # Dump the dictionary into the file using pickle
    movie_dict = pickle.load(f)

In [8]:
with open('movieLinkLarge.pickle', 'rb') as f:
    # Load the dictionary from the file using pickle
    movieLink = pickle.load(f)

In [9]:
item_vecs = genfromtxt('item_vecs.csv', delimiter=',')

In [10]:
def fuzzy_matching(fav_movie, verbose=True):
    print('inside of fuzzy matching')
    """
    return the closest match via fuzzy ratio. If no match found, return None
    
    Parameters
    ----------    
    mapper: dict, map movie title name to index of the movie in data

    fav_movie: str, name of user input movie
    
    verbose: bool, print log if True

    Return
    ------
    index of the closest match
    """
    match_tuple = []
    # get match
    for title, idx in movie_to_idx.items():
        ratio = fuzz.ratio(title.lower(), fav_movie.lower())
        if ratio >= 60:
            match_tuple.append((title, idx, ratio))
    # sort
    match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
    if not match_tuple:
        print('Oops! No match is found')
        return
    if verbose:
        print('Found possible matches in our database: {0}\n'.format([x[0] for x in match_tuple]))
    return match_tuple[0][1]


In [11]:
def make_pred(fav_movie, n_recommandations = 10):
    id_listesi = []
    title_listesi = []
    idx = fuzzy_matching(fav_movie)
    for i in np.argsort(m_dist[idx])[::1][:n_recommandations]:
        movid = int(item_vecs[i,0])
        id_listesi.append(movid)
        title_listesi.append(movie_dict[item_vecs[i,0]]['title'])


    return id_listesi, title_listesi
    

In [13]:
for i in range(10):
    print(i)

0
1
2
3
4
5
6
7
8
9


In [40]:
item_vecs[524,0]

3578.0

In [38]:
movie_dict[item_vecs[524,0]]

{'title': 'Gladiator (2000)', 'genres': 'Action|Adventure|Drama'}

In [12]:
make_pred('troy 2004')

inside of fuzzy matching
Found possible matches in our database: ['Troy (2004)', 'Hellboy (2004)']



([7090, 3578, 7143, 55247, 41569, 86332, 54001, 150, 5064, 590],
 ['Hero (Ying xiong) (2002)',
  'Gladiator (2000)',
  'Last Samurai, The (2003)',
  'Into the Wild (2007)',
  'King Kong (2005)',
  'Thor (2011)',
  'Harry Potter and the Order of the Phoenix (2007)',
  'Apollo 13 (1995)',
  'The Count of Monte Cristo (2002)',
  'Dances with Wolves (1990)'])

In [21]:
for i in np.argsort(m_dist[idx])[::1][:10]:
    print(movie_dict[item_vecs[i,0]]['title'])

Hero (Ying xiong) (2002)
Gladiator (2000)
Last Samurai, The (2003)
Into the Wild (2007)
King Kong (2005)
Thor (2011)
Harry Potter and the Order of the Phoenix (2007)
Apollo 13 (1995)
The Count of Monte Cristo (2002)
Dances with Wolves (1990)


In [14]:
np.argsort(m_dist[10])[::1][:10]

inside of fuzzy matching
Found possible matches in our database: ['Troy (2004)', 'Hellboy (2004)']



array([665, 524, 666, 770, 732, 822, 765,  38, 608, 143], dtype=int64)