# Content based KNN recommendation

## load package

In [1]:
import random
import numpy as np

from surprise import NormalPredictor

from utils.MovieLens import MovieLens
from utils.Evaluator import Evaluator
from utils.ContentKNNAlgorithm import ContentKNNAlgorithm

## utils function

In [2]:
def LoadMovieLensData():
    ml = MovieLens()
    print("Loading movie ratings...")
    data = ml.loadMovieLensLatestSmall()
    print("\nComputing movie popularity ranks so we can measure novelty later...")
    rankings = ml.getPopularityRanks()
    return (ml, data, rankings)

In [None]:
np.random.seed(0)
random.seed(0)

## read data

In [3]:
# Load up common data set for the recommender algorithms
(ml, evaluationData, rankings) = LoadMovieLensData()

# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


## Content based KNN

In [4]:
contentKNN = ContentKNNAlgorithm()
evaluator.AddAlgorithm(contentKNN, "ContentKNN")

In [5]:
# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

In [6]:
evaluator.Evaluate(False)

Evaluating  ContentKNN ...
Evaluating accuracy...
Computing content-based similarity matrix...
0  of  8211
100  of  8211
200  of  8211
300  of  8211
400  of  8211
500  of  8211
600  of  8211
700  of  8211
800  of  8211
900  of  8211
1000  of  8211
1100  of  8211
1200  of  8211
1300  of  8211
1400  of  8211
1500  of  8211
1600  of  8211
1700  of  8211
1800  of  8211
1900  of  8211
2000  of  8211
2100  of  8211
2200  of  8211
2300  of  8211
2400  of  8211
2500  of  8211
2600  of  8211
2700  of  8211
2800  of  8211
2900  of  8211
3000  of  8211
3100  of  8211
3200  of  8211
3300  of  8211
3400  of  8211
3500  of  8211
3600  of  8211
3700  of  8211
3800  of  8211
3900  of  8211
4000  of  8211
4100  of  8211
4200  of  8211
4300  of  8211
4400  of  8211
4500  of  8211
4600  of  8211
4700  of  8211
4800  of  8211
4900  of  8211
5000  of  8211
5100  of  8211
5200  of  8211
5300  of  8211
5400  of  8211
5500  of  8211
5600  of  8211
5700  of  8211
5800  of  8211
5900  of  8211
6000  of  8211
61

## take samples to evaluate top-N recommendations 

In [7]:
evaluator.SampleTopNRecs(ml)


Using recommender  ContentKNN

Building recommendation model...
Computing content-based similarity matrix...
0  of  9066
100  of  9066
200  of  9066
300  of  9066
400  of  9066
500  of  9066
600  of  9066
700  of  9066
800  of  9066
900  of  9066
1000  of  9066
1100  of  9066
1200  of  9066
1300  of  9066
1400  of  9066
1500  of  9066
1600  of  9066
1700  of  9066
1800  of  9066
1900  of  9066
2000  of  9066
2100  of  9066
2200  of  9066
2300  of  9066
2400  of  9066
2500  of  9066
2600  of  9066
2700  of  9066
2800  of  9066
2900  of  9066
3000  of  9066
3100  of  9066
3200  of  9066
3300  of  9066
3400  of  9066
3500  of  9066
3600  of  9066
3700  of  9066
3800  of  9066
3900  of  9066
4000  of  9066
4100  of  9066
4200  of  9066
4300  of  9066
4400  of  9066
4500  of  9066
4600  of  9066
4700  of  9066
4800  of  9066
4900  of  9066
5000  of  9066
5100  of  9066
5200  of  9066
5300  of  9066
5400  of  9066
5500  of  9066
5600  of  9066
5700  of  9066
5800  of  9066
5900  of  9066
60