# Movie recommender using colaborative filtering approach 

In [1]:
from surprise import Dataset, evaluate
from surprise import KNNBasic
from surprise import SVD

### Load data from Movielens lens 100k and store in user-movie matrix

In [2]:
data = Dataset.load_builtin("ml-100k")
trainingSet = data.build_full_trainset()

### Use Cosine to make recommendation

In [3]:
sim_options = {
    'name': 'cosine',
    'user_based': False
}
knn = KNNBasic(sim_options=sim_options)

In [4]:
knn.train(trainingSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.


### movie recommendations for each users using build_anti_testset method

In [5]:
testSet = trainingSet.build_anti_testset()
predictions = knn.test(testSet)

In [6]:
from collections import defaultdict

def get_top5_recommendations(predictions, topN = 5):
    
    top_recs = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_recs[uid].append((iid, est))
    
    for uid, user_ratings in top_recs.items():
        user_ratings.sort(key = lambda x: x[1], reverse = True)
        top_recs[uid] = user_ratings[:topN]
    
    return top_recs

### Top 5 highest rated movie (id) recommendation for each users

In [7]:
top_recs = get_top5_recommendations(predictions, topN = 5)
for uid, user_ratings in top_recs.items():
    print(uid, [iid for (iid, _) in user_ratings])

196 ['1309', '1310', '1676', '1675', '1593']
186 ['1674', '328', '1', '749', '1450']
22 ['1653', '1618', '12', '357', '1156']
244 ['1236', '1235', '127', '483', '248']
166 ['1674', '1306', '1307', '1201', '1308']
298 ['480', '404', '191', '216', '531']
115 ['135', '179', '61', '276', '1593']
253 ['1653', '866', '172', '423', '133']
305 ['124', '498', '509', '132', '8']
6 ['792', '292', '603', '319', '58']
62 ['79', '1156', '1618', '1377', '1593']
286 ['1236', '1235', '1679', '1678', '1680']
200 ['12', '403', '651', '181', '186']
210 ['1156', '1654', '1593', '273', '1236']
224 ['1618', '1309', '1310', '181', '568']
303 ['1674', '1653', '711', '1201', '1507']
122 ['1342', '1348', '1320', '1364', '1362']
194 ['302', '156', '1654', '480', '603']
291 ['1236', '1235', '1654', '1593', '1618']
234 ['1058', '1122', '1612', '1660', '1201']
119 ['957', '1614', '483', '498', '686']
167 ['1431', '1618', '1201', '1533', '1430']
299 ['1593', '246', '711', '1619', '709']
308 ['1032', '585', '389', '13

380 ['1653', '1671', '1624', '1625', '1617']
381 ['711', '792', '1674', '474', '1593']
385 ['316', '1107', '827', '124', '229']
382 ['1619', '1582', '1561', '1565', '1563']
387 ['781', '141', '1654', '171', '1609']
364 ['1122', '1603', '1618', '1627', '1156']
369 ['1533', '1601', '1467', '1614', '1671']
388 ['1653', '1673', '1309', '1310', '1614']
386 ['1201', '957', '1582', '1561', '1565']
389 ['273', '180', '443', '192', '70']
383 ['1520', '1604', '1309', '1310', '347']
390 ['1309', '1310', '643', '599', '677']
393 ['234', '735', '1679', '1678', '1680']
392 ['79', '1122', '1306', '1307', '480']
376 ['1656', '1526', '643', '1536', '1582']
394 ['1609', '1674', '711', '182', '1377']
391 ['285', '193', '492', '124', '178']
398 ['527', '1654', '318', '164', '193']
397 ['200', '1674', '443', '234', '64']
399 ['1653', '1671', '1420', '119', '1534']
396 ['1309', '1310', '1122', '1619', '1306']
401 ['283', '1344', '123', '616', '936']
402 ['1236', '1235', '1593', '1674', '1431']
384 ['1533', 

### Maping movie ID to its name

In [8]:
import os, io

def read_item_names():
    """Read the u.item file from dataset and 
    map ids into movie names.
    """

    file_name = (os.path.expanduser('~') +
                 '/.surprise_data/ml-100k/ml-100k/u.item')
    rid_to_name = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = line[1]

    return rid_to_name

### Top 5 movie name recommendations with highest rating for each user

In [9]:
top5_recommendations = get_top5_recommendations(predictions)
rid_to_name = read_item_names()
for uid, user_ratings in top5_recommendations.items():
    print(uid, [rid_to_name[iid] for (iid, _) in user_ratings])

196 ['Very Natural Thing, A (1974)', 'Walk in the Sun, A (1945)', 'War at Home, The (1996)', 'Sunchaser, The (1996)', 'Death in Brunswick (1991)']
186 ['Mamma Roma (1962)', 'Conspiracy Theory (1997)', 'Toy Story (1995)', 'MatchMaker, The (1997)', 'Golden Earrings (1947)']
22 ['Entertaining Angels: The Dorothy Day Story (1996)', 'King of New York (1990)', 'Usual Suspects, The (1995)', "One Flew Over the Cuckoo's Nest (1975)", 'Cyclo (1995)']
244 ['Other Voices, Other Rooms (1997)', 'Big Bang Theory, The (1994)', 'Godfather, The (1972)', 'Casablanca (1942)', 'Grosse Pointe Blank (1997)']
166 ['Mamma Roma (1962)', 'Delta of Venus (1994)', 'Carmen Miranda: Bananas Is My Business (1994)', 'Marlene Dietrich: Shadow and Light (1996) ', 'Babyfever (1994)']
298 ['North by Northwest (1959)', 'Pinocchio (1940)', 'Amadeus (1984)', 'When Harry Met Sally... (1989)', 'Shine (1996)']
115 ['2001: A Space Odyssey (1968)', 'Clockwork Orange, A (1971)', 'Three Colors: White (1994)', 'Leaving Las Vegas (19

441 ['Substance of Fire, The (1996)', 'King of New York (1990)', 'War at Home, The (1996)', 'He Walked by Night (1948)', 'Sunchaser, The (1996)']
479 ['L.A. Confidential (1997)', 'Lone Star (1996)', 'Dead Man Walking (1995)', 'Usual Suspects, The (1995)', 'Twelve Monkeys (1995)']
484 ['L.A. Confidential (1997)', 'Usual Suspects, The (1995)', 'Field of Dreams (1989)', 'Shawshank Redemption, The (1994)', 'Wizard of Oz, The (1939)']
486 ['T-Men (1947)', 'Tigrero: A Film That Was Never Made (1994)', 'Daens (1992)', 'Promise, The (Versprechen, Das) (1994)', 'Yankee Zulu (1994)']
487 ['King of New York (1990)', 'Mask, The (1994)', 'Babe (1995)', 'Wizard of Oz, The (1939)', 'Fish Called Wanda, A (1988)']
482 ['Delta of Venus (1994)', 'Carmen Miranda: Bananas Is My Business (1994)', 'Babyfever (1994)', 'T-Men (1947)', 'Tigrero: A Film That Was Never Made (1994)']
481 ['Substance of Fire, The (1996)', 'Eat Drink Man Woman (1994)', 'Underground (1995)', 'Man Who Would Be King, The (1975)', 'Star

### Nearest neighbour for a user 

In [10]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
from surprise import KNNBaseline

def read_item_names():
    """Read the u.item file from dataset and return two
    mappings to ids into movie names and movie names into ids.
    """

    file_name = (os.path.expanduser('~') +
                 '/.surprise_data/ml-100k/ml-100k/u.item')
    rid_to_name = {}
    name_to_rid = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = line[1]
            name_to_rid[line[1]] = line[0]

    return rid_to_name, name_to_rid

### Using pearson_baseline  to find similarities

In [11]:
sim_options = {
    'name': 'pearson_baseline', 
    'user_based': False
}
knn = KNNBaseline(sim_options=sim_options)


In [12]:
knn.train(trainingSet)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


### Mapping movie id to name and  viceversa 

In [13]:
rid_to_name, name_to_rid = read_item_names()

In [14]:
# Retieve inner id of the given movie name
raw_id = name_to_rid['Star Wars (1977)']
inner_id = knn.trainset.to_inner_iid(raw_id)

In [15]:
# Retrieve inner ids of the nearest neighbors of Star wars.
movie_neighbors = knn.get_neighbors(inner_id, k=10)

In [16]:
# Convert inner ids of the neighbors into names.
movie_neighbors = (knn.trainset.to_raw_iid(inner_id)
                       for inner_id in movie_neighbors)
movie_neighbors = (rid_to_name[rid]
                       for rid in movie_neighbors)

### List of 10 similar movies

In [17]:
print()
print('The 10 nearest neighbors of Star Wars (1977) are:')
for movie in movie_neighbors:
    print(movie)


The 10 nearest neighbors of Star Wars (1977) are:
Empire Strikes Back, The (1980)
Return of the Jedi (1983)
Raiders of the Lost Ark (1981)
Indiana Jones and the Last Crusade (1989)
Sting, The (1973)
L.A. Confidential (1997)
Princess Bride, The (1987)
E.T. the Extra-Terrestrial (1982)
Terminator, The (1984)
Get Shorty (1995)
