# Content-based recommendation

In [1]:
%pylab inline

import numpy as np
from sklearn.neighbors import NearestNeighbors

from evaluator import Evaluator
from dataset_handler import DatasetHandler

Populating the interactive namespace from numpy and matplotlib


In [2]:
dataset100k = "datasets/ml-latest-small/"
dataset1M = "datasets/ml-1m"
dataset10M = "datasets/ml-10m"

In [3]:
dataset_handler = DatasetHandler(dataset1M)

In [4]:
class ContentBasedRecommender(object):
    def __init__(self, dataset_handler):
        self.dataset_handler = dataset_handler
    
    def train(self, train_set):
        self.movies_vectors = self.dataset_handler.load_movies()
    
    def top(self, user_ratings, topN):
        user_profile = self._create_user_profile(user_ratings)
        return self._cosineKNN(user_profile, topN)
        
    def _create_user_profile(self, user_ratings):
        mid_rating=2.75
        return np.average(
            np.array([
                self.movies_vectors[self.dataset_handler.id2index(movie)]*np.sign(rating - mid_rating)
                for (movie, rating) in user_ratings.items()
            ]),
            weights=(mid_rating-np.array(user_ratings.values()))**2,
            axis=0
        )
    
    def _cosineKNN(self, user_profile, k):
        nbrs = NearestNeighbors(k, metric='cosine', algorithm='brute')
        nbrs.fit(self.movies_vectors)
        return nbrs.kneighbors(np.array([user_profile]), return_distance=False)[0]

In [5]:
recommender = ContentBasedRecommender(dataset_handler)
recommender.train({})
user_ratings = dataset_handler.load_users_ratings()
top = recommender.top(user_ratings[1], 5)
print dataset_handler.ids2titles(top)

['Pump Up the Volume (1990)', "Let's Talk About Sex (1998)", "Muriel's Wedding (1994)", 'Keeping the Faith (2000)', 'Pumpkinhead (1988)']


In [6]:
from evaluator import Evaluator
evaluator = Evaluator(ContentBasedRecommender)
evaluator.computeMAP(dataset_handler, topN=5)

0.008962023173299182