# Content-based recommenders

Content-based recommenders in their recommendations rely purely on the features of items. Conceptually it can be expressed as a model of the form (personalized):
<center>
$$
    score \sim (user, item\_feature_1, item\_feature_2, ..., item\_feature_n)
$$
</center>
or (not personalized)
<center>
$$
    score \sim (item\_feature_1, item\_feature_2, ..., item\_feature_n)
$$
</center>

    + Content-based recommenders do not suffer from the cold-start problem for new items.
    - They do not use information about complex patterns of user-item interactions - what other similar users have already discovered and liked.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown, display, HTML
from collections import defaultdict
from sklearn.model_selection import KFold

# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

# Load the data

In [2]:
ml_ratings_df = pd.read_csv(os.path.join("data", "movielens_small", "ratings.csv")).rename(columns={'userId': 'user_id', 'movieId': 'item_id'})
ml_movies_df = pd.read_csv(os.path.join("data", "movielens_small", "movies.csv")).rename(columns={'movieId': 'item_id'})
ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='item_id')
ml_df.head(10)

display(HTML(ml_movies_df.head(10).to_html()))

# Filter the data to reduce the number of movies
rng = np.random.RandomState(seed=6789)
left_ids = rng.choice(ml_movies_df['item_id'], size=1000, replace=False)

ml_ratings_df = ml_ratings_df.loc[ml_ratings_df['item_id'].isin(left_ids)]
ml_movies_df = ml_movies_df.loc[ml_movies_df['item_id'].isin(left_ids)]
ml_df = ml_df.loc[ml_df['item_id'].isin(left_ids)]

print("Number of left interactions: {}".format(len(ml_ratings_df)))

Unnamed: 0,item_id,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


Number of left interactions: 9692


# Recommender class

Remark: Docstrings written in reStructuredText (reST) used by Sphinx to automatically generate code documentation. It is also used by default by PyCharm (type triple quotes after defining a class or a method and hit enter).

In [3]:
class Recommender(object):
    """
    Base recommender class.
    """
    
    def __init__(self):
        """
        Initialize base recommender params and variables.
        """
        pass
    
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.
        
        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items 
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
        """
        pass
    
    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns 
        top n_recommendations for each user.
        
        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations 
            for each user.
        :rtype: pd.DataFrame
        """
        
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        
        for ix, user in users_df.iterrows():
            user_recommendations = pd.DataFrame({'user_id': user['user_id'],
                                                 'item_id': [-1] * n_recommendations,
                                                 'score': [3.0] * n_recommendations})

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

# Evaluation measures

## Explicit feedback - ratings

### RMSE - Root Mean Squared Error

<center>
$$
    RMSE = \sqrt{\frac{\sum_{i}^N (\hat{r}_i - r_i)^2}{N}}
$$
</center>

where $\hat{r}_i$ are the predicted ratings and $r_i$ are the real ratings and $N$ is the number of items in the test set.

    + Very well-behaved analytically and therefore extensively used to train models, especially neural networks.
    - The scale of errors dependent on data which reduced comparability between different datasets.

In [4]:
def rmse(r_pred, r_real):
    return np.sqrt(np.sum(np.power(r_pred - r_real, 2)) / len(r_pred))

# Test

print("RMSE = {:.2f}".format(rmse(np.array([2.1, 1.2, 3.8, 4.2, 3.6]), np.array([3, 2, 4, 5, 1]))))

RMSE = 1.33


### MRE - Mean Relative Error

<center>
$$
    MRE = \frac{1}{N} \sum_{i}^N \frac{|\hat{r}_i - r_i|}{|r_i|}
$$
</center>

where $\hat{r}_i$ are the predicted ratings and $r_i$ are the real ratings and $N$ is the number of items in the test set.

    + Easily interpretable (average percentage error) and with a meaning understandable for business.
    - Blows up when there are values close to zero among the predicted values.

In [5]:
def mre(r_pred, r_real):
    return 1 / len(r_pred) * np.sum(np.abs(r_pred - r_real) / np.abs(r_real))

# Test

print("MRE = {:.4f}".format(mre(np.array([2.1, 1.2, 3.8, 4.2, 3.6]), np.array([3, 2, 4, 5, 1]))))

MRE = 0.7020


### TRE - Total Relative Error

<center>
$$
    TRE = \frac{\sum_{i}^N |\hat{r}_i - r_i|}{\sum_{i}^N |r_i|}
$$
</center>

where $\hat{r}_i$ are the predicted ratings and $r_i$ are the real ratings and $N$ is the number of items in the test set.

    + Easily interpretable (total percentage error) and with a meaning understandable for business.
    + Reliable even for very small predicted values.
    - Does not distinguish between a case when one prediction is very bad and other are very good and a case when all predictions are mediocre.

In [6]:
def tre(r_pred, r_real):
    return np.sum(np.abs(r_pred - r_real)) / np.sum(np.abs(r_real))

# Test

print("TRE = {:.4f}".format(tre(np.array([2.1, 1.2, 3.8, 4.2, 3.6]), np.array([3, 2, 4, 5, 1]))))

TRE = 0.3533


## Implicit feedback - binary indicators of interactions

### HR@n - Hit Ratio 
How many hits did we score in the first n recommendations.
<br/>
<br/>
<center>
$$
    \text{HR@}n = \frac{\sum_{u} \sum_{i \in I_u} r_{u, i} \cdot 1_{\hat{D}_n(u)}(i)}{M}
$$
</center>

where:
  * $r_{u, i}$ is $1$ if there was an interaction between user $u$ and item $i$ in the test set and $0$ otherwise, 
  * $\hat{D}_n$ is the set of the first $n$ recommendations for user $u$, 
  * $1_{\hat{D}_n}(i)$ is $1$ if and only if $i \in \hat{D}_n$, otherwise it's equal to $0$,
  * $M$ is the number of users.


    + Easily interpretable.
    - Does not take the rank of each recommendation into account.

In [7]:
def hr(recommendations, real_interactions, n=1):
    """
    Assumes recommendations are ordered by user_id and then by score.
    """
    # Transform real_interactions to a dict for a large speed-up
    rui = defaultdict(lambda: 0)
    
    for idx, row in real_interactions.iterrows():
        rui[(row['user_id'], row['item_id'])] = 1
        
    hr = 0.0
    
    previous_user_id = -1
    rank = 0
    for idx, row in recommendations.iterrows():
        if previous_user_id == row['user_id']:
            rank += 1
        else:
            rank = 1
            
        if rank <= n:
            hr += rui[(row['user_id'], row['item_id'])]
        
        previous_user_id = row['user_id']
    
    hr /= len(recommendations['user_id'].unique())
    
    return hr

    
recommendations = pd.DataFrame(
    [
        [1, 13, 0.9],
        [1, 45, 0.8],
        [1, 22, 0.71],
        [1, 77, 0.55],
        [1, 9, 0.52],
        [2, 11, 0.85],
        [2, 13, 0.69],
        [2, 25, 0.64],
        [2, 6, 0.60],
        [2, 77, 0.53]
        
    ], columns=['user_id', 'item_id', 'score'])

display(HTML(recommendations.to_html()))

real_interactions = pd.DataFrame(
    [
        [1, 45],
        [1, 22],
        [1, 77],
        [2, 13],
        [2, 77]
        
    ], columns=['user_id', 'item_id'])

display(HTML(real_interactions.to_html()))
    
print("HR@3 = {:.4f}".format(hr(recommendations, real_interactions, n=3)))

Unnamed: 0,user_id,item_id,score
0,1,13,0.9
1,1,45,0.8
2,1,22,0.71
3,1,77,0.55
4,1,9,0.52
5,2,11,0.85
6,2,13,0.69
7,2,25,0.64
8,2,6,0.6
9,2,77,0.53


Unnamed: 0,user_id,item_id
0,1,45
1,1,22
2,1,77
3,2,13
4,2,77


HR@3 = 1.5000


### NDCG@n - Normalized Discounted Cumulative Gain

How many hits did we score in the first n recommendations discounted by the position of each recommendation.
<br/>
<br/>
<center>
$$
    \text{NDCG@}n = \frac{\sum_{u} \sum_{i \in I_u} \frac{r_{u, i}}{log\left(1 + v_{\hat{D}_n(u)}(i)\right)}}{M}
$$
</center>

where:
  * $r_{u, i}$ is $1$ if there was an interaction between user $u$ and item $i$ in the test set and $0$ otherwise, 
  * $\hat{D}_n(u)$ is the set of the first $n$ recommendations for user $u$, 
  * $v_{\hat{D}_n(u)}(i)$ is the position of item $i$ in recommendations $\hat{D}_n$,
  * $M$ is the number of users.


    - Takes the rank of each recommendation into account.

In [8]:
def ndcg(recommendations, real_interactions, n=1):
    """
    Assumes recommendations are ordered by user_id and then by score.
    """
    # Transform real_interactions to a dict for a large speed-up
    rui = defaultdict(lambda: 0)
    
    for idx, row in real_interactions.iterrows():
        rui[(row['user_id'], row['item_id'])] = 1
        
    ndcg = 0.0
    
    previous_user_id = -1
    rank = 0
    for idx, row in recommendations.iterrows():
        if previous_user_id == row['user_id']:
            rank += 1
        else:
            rank = 1
            
        if rank <= n:
            ndcg += rui[(row['user_id'], row['item_id'])] / np.log2(1 + rank)
        
        previous_user_id = row['user_id']
    
    ndcg /= len(recommendations['user_id'].unique())
    
    return ndcg

    
recommendations = pd.DataFrame(
    [
        [1, 13, 0.9],
        [1, 45, 0.8],
        [1, 22, 0.71],
        [1, 77, 0.55],
        [1, 9, 0.52],
        [2, 11, 0.85],
        [2, 13, 0.69],
        [2, 25, 0.64],
        [2, 6, 0.60],
        [2, 77, 0.53]
        
    ], columns=['user_id', 'item_id', 'score'])

display(HTML(recommendations.to_html()))

real_interactions = pd.DataFrame(
    [
        [1, 45],
        [1, 22],
        [1, 77],
        [2, 13],
        [2, 77]
        
    ], columns=['user_id', 'item_id'])

display(HTML(real_interactions.to_html()))
    
print("NDCG@3 = {:.4f}".format(ndcg(recommendations, real_interactions, n=3)))

Unnamed: 0,user_id,item_id,score
0,1,13,0.9
1,1,45,0.8
2,1,22,0.71
3,1,77,0.55
4,1,9,0.52
5,2,11,0.85
6,2,13,0.69
7,2,25,0.64
8,2,6,0.6
9,2,77,0.53


Unnamed: 0,user_id,item_id
0,1,45
1,1,22
2,1,77
3,2,13
4,2,77


NDCG@3 = 0.8809


# Testing routines (offline)

## Train and test set split

### Explicit feedback

In [9]:
def evaluate_train_test_split_explicit(recommender, interactions_df, items_df, seed=6789):
    rng = np.random.RandomState(seed=seed)
    
    # Split the dataset into train and test
    
    shuffle = np.arange(len(interactions_df))
    rng.shuffle(shuffle)
    shuffle = list(shuffle)

    train_test_split = 0.8
    split_index = int(len(interactions_df) * train_test_split)

    interactions_df_train = interactions_df.iloc[shuffle[:split_index]]
    interactions_df_test = interactions_df.iloc[shuffle[split_index:]]
    
    # Train the recommender
    
    recommender.fit(interactions_df_train, None, items_df)
    
    # Gather predictions
    
    r_pred = []
    
    for idx, row in interactions_df_test.iterrows():
        users_df = pd.DataFrame([row['user_id']], columns=['user_id'])
        eval_items_df = pd.DataFrame([row['item_id']], columns=['item_id'])
        print(eval_items_df)
        eval_items_df = pd.merge(eval_items_df, items_df, on='item_id')
        print(eval_items_df)
        recommendations = recommender.recommend(users_df, eval_items_df, n_recommendations=1)
        
        r_pred.append(recommendations.iloc[0]['score'])
    
    # Gather real ratings
    
    r_real = np.array(interactions_df_test['rating'].tolist())
    # Return evaluation metrics
    
    return rmse(r_pred, r_real), mre(r_pred, r_real), tre(r_pred, r_real)

recommender = Recommender()

results = [['BaseRecommender'] + list(evaluate_train_test_split_explicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'RMSE', 'MRE', 'TRE'])

display(HTML(results.to_html()))

   item_id
0   3751.0
   item_id               title                     genres
0   3751.0  Chicken Run (2000)  Animation|Children|Comedy
   item_id
0   7438.0
   item_id                     title                 genres
0   7438.0  Kill Bill: Vol. 2 (2004)  Action|Drama|Thriller
   item_id
0  91842.0
   item_id              title                       genres
0  91842.0  Contraband (2012)  Action|Crime|Drama|Thriller
   item_id
0   1500.0
   item_id                       title                genres
0   1500.0  Grosse Pointe Blank (1997)  Comedy|Crime|Romance
   item_id
0   1955.0
   item_id                     title genres
0   1955.0  Kramer vs. Kramer (1979)  Drama
   item_id
0   7438.0
   item_id                     title                 genres
0   7438.0  Kill Bill: Vol. 2 (2004)  Action|Drama|Thriller
   item_id
0   3639.0
   item_id                                title                     genres
0   3639.0  Man with the Golden Gun, The (1974)  Action|Adventure|Thriller
   item_id
0

0    930.0
   item_id             title                      genres
0    930.0  Notorious (1946)  Film-Noir|Romance|Thriller
   item_id
0    842.0
   item_id                                              title         genres
0    842.0  Tales from the Crypt Presents: Bordello of Blo...  Comedy|Horror
   item_id
0   2790.0
   item_id                                              title           genres
0   2790.0  Final Conflict, The (a.k.a. Omen III: The Fina...  Horror|Thriller
   item_id
0   7438.0
   item_id                     title                 genres
0   7438.0  Kill Bill: Vol. 2 (2004)  Action|Drama|Thriller
   item_id
0  68358.0
   item_id             title                        genres
0  68358.0  Star Trek (2009)  Action|Adventure|Sci-Fi|IMAX
   item_id
0   2311.0
   item_id                                  title  genres
0   2311.0  2010: The Year We Make Contact (1984)  Sci-Fi
   item_id
0    595.0
   item_id                        title  \
0    595.0  Beauty and the Beast (

0  71899.0
   item_id                title                  genres
0  71899.0  Mary and Max (2009)  Animation|Comedy|Drama
   item_id
0     50.0
   item_id                       title                  genres
0     50.0  Usual Suspects, The (1995)  Crime|Mystery|Thriller
   item_id
0   1225.0
   item_id           title genres
0   1225.0  Amadeus (1984)  Drama
   item_id
0    423.0
   item_id              title           genres
0    423.0  Blown Away (1994)  Action|Thriller
    item_id
0  180031.0
    item_id                      title                   genres
0  180031.0  The Shape of Water (2017)  Adventure|Drama|Fantasy
   item_id
0   4255.0
   item_id                       title  genres
0   4255.0  Freddy Got Fingered (2001)  Comedy
   item_id
0   6688.0
   item_id                             title        genres
0   6688.0  Autumn Spring (Babí léto) (2001)  Comedy|Drama
   item_id
0  67295.0
   item_id                                              title  \
0  67295.0  Kung Fu Panda: S

0   3536.0
   item_id                     title                genres
0   3536.0  Keeping the Faith (2000)  Comedy|Drama|Romance
   item_id
0   2952.0
   item_id                       title                genres
0   2952.0  Sydney (Hard Eight) (1996)  Crime|Drama|Thriller
   item_id
0    379.0
   item_id           title                  genres
0    379.0  Timecop (1994)  Action|Sci-Fi|Thriller
   item_id
0   2716.0
   item_id                                       title                genres
0   2716.0  Ghostbusters (a.k.a. Ghost Busters) (1984)  Action|Comedy|Sci-Fi
   item_id
0    233.0
   item_id           title genres
0    233.0  Exotica (1994)  Drama
   item_id
0   1976.0
   item_id                              title  genres
0   1976.0  Friday the 13th Part 3: 3D (1982)  Horror
   item_id
0   7438.0
   item_id                     title                 genres
0   7438.0  Kill Bill: Vol. 2 (2004)  Action|Drama|Thriller
   item_id
0   2065.0
   item_id                             titl

   item_id                                 title  \
0    780.0  Independence Day (a.k.a. ID4) (1996)   

                             genres  
0  Action|Adventure|Sci-Fi|Thriller  
   item_id
0    858.0
   item_id                  title       genres
0    858.0  Godfather, The (1972)  Crime|Drama
   item_id
0    959.0
   item_id                    title genres
0    959.0  Of Human Bondage (1934)  Drama
   item_id
0   6502.0
   item_id                 title                genres
0   6502.0  28 Days Later (2002)  Action|Horror|Sci-Fi
   item_id
0   4896.0
   item_id                                              title  \
0   4896.0  Harry Potter and the Sorcerer's Stone (a.k.a. ...   

                       genres  
0  Adventure|Children|Fantasy  
   item_id
0   6502.0
   item_id                 title                genres
0   6502.0  28 Days Later (2002)  Action|Horror|Sci-Fi
   item_id
0    585.0
   item_id                          title  genres
0    585.0  Brady Bunch Movie, The (1995) 

0   2502.0
   item_id                title        genres
0   2502.0  Office Space (1999)  Comedy|Crime
   item_id
0  48774.0
   item_id                   title                                  genres
0  48774.0  Children of Men (2006)  Action|Adventure|Drama|Sci-Fi|Thriller
   item_id
0   2010.0
   item_id              title        genres
0   2010.0  Metropolis (1927)  Drama|Sci-Fi
    item_id
0  175431.0
    item_id                         title            genres
0  175431.0  Bobik Visiting Barbos (1977)  Animation|Comedy
   item_id
0   2289.0
   item_id               title              genres
0   2289.0  Player, The (1992)  Comedy|Crime|Drama
   item_id
0   2517.0
   item_id             title  genres
0   2517.0  Christine (1983)  Horror
    item_id
0  137218.0
    item_id                 title genres
0  137218.0  April Morning (1988)  Drama
   item_id
0   3264.0
   item_id                            title                genres
0   3264.0  Buffy the Vampire Slayer (1992)  Action|Comed

0   1755.0
   item_id                 title          genres
0   1755.0  Shooting Fish (1997)  Comedy|Romance
   item_id
0   3327.0
   item_id                  title       genres
0   3327.0  Beyond the Mat (1999)  Documentary
   item_id
0   8644.0
   item_id            title                            genres
0   8644.0  I, Robot (2004)  Action|Adventure|Sci-Fi|Thriller
   item_id
0   3053.0
   item_id                                            title     genres
0   3053.0  Messenger: The Story of Joan of Arc, The (1999)  Drama|War
   item_id
0   3067.0
   item_id                                              title        genres
0   3067.0  Women on the Verge of a Nervous Breakdown (Muj...  Comedy|Drama
   item_id
0   1079.0
   item_id                        title        genres
0   1079.0  Fish Called Wanda, A (1988)  Comedy|Crime
   item_id
0   3912.0
   item_id             title        genres
0   3912.0  Beautiful (2000)  Comedy|Drama
   item_id
0   4890.0
   item_id               title 

   item_id              title           genres
0    586.0  Home Alone (1990)  Children|Comedy
   item_id
0  71899.0
   item_id                title                  genres
0  71899.0  Mary and Max (2009)  Animation|Comedy|Drama
   item_id
0   2123.0
   item_id                         title  \
0   2123.0  All Dogs Go to Heaven (1989)   

                                    genres  
0  Animation|Children|Comedy|Drama|Fantasy  
    item_id
0  152970.0
    item_id                             title            genres
0  152970.0  Hunt for the Wilderpeople (2016)  Adventure|Comedy
   item_id
0    586.0
   item_id              title           genres
0    586.0  Home Alone (1990)  Children|Comedy
   item_id
0    379.0
   item_id           title                  genres
0    379.0  Timecop (1994)  Action|Sci-Fi|Thriller
   item_id
0  71304.0
   item_id                   title        genres
0  71304.0  Thirst (Bakjwi) (2009)  Drama|Horror
   item_id
0    327.0
   item_id             title         

   item_id           title                   genres
0    837.0  Matilda (1996)  Children|Comedy|Fantasy
   item_id
0   1381.0
   item_id            title                  genres
0   1381.0  Grease 2 (1982)  Comedy|Musical|Romance
   item_id
0   5502.0
   item_id         title                  genres
0   5502.0  Signs (2002)  Horror|Sci-Fi|Thriller
   item_id
0   1411.0
   item_id          title               genres
0   1411.0  Hamlet (1996)  Crime|Drama|Romance
    item_id
0  159161.0
    item_id                        title              genres
0  159161.0  Ali Wong: Baby Cobra (2016)  (no genres listed)
   item_id
0    858.0
   item_id                  title       genres
0    858.0  Godfather, The (1972)  Crime|Drama
   item_id
0  56801.0
   item_id                                       title                genres
0  56801.0  AVPR: Aliens vs. Predator - Requiem (2007)  Action|Horror|Sci-Fi
   item_id
0  83349.0
   item_id                     title  \
0  83349.0  Green Hornet, The (201

0    208.0
   item_id              title                   genres
0    208.0  Waterworld (1995)  Action|Adventure|Sci-Fi
   item_id
0   3006.0
   item_id                title          genres
0   3006.0  Insider, The (1999)  Drama|Thriller
   item_id
0   3355.0
   item_id                   title                           genres
0   3355.0  Ninth Gate, The (1999)  Fantasy|Horror|Mystery|Thriller
    item_id
0  156553.0
    item_id        title                  genres
0  156553.0  Zoom (2015)  Animation|Comedy|Drama
   item_id
0   1079.0
   item_id                        title        genres
0   1079.0  Fish Called Wanda, A (1988)  Comedy|Crime
   item_id
0    208.0
   item_id              title                   genres
0    208.0  Waterworld (1995)  Action|Adventure|Sci-Fi
   item_id
0   3536.0
   item_id                     title                genres
0   3536.0  Keeping the Faith (2000)  Comedy|Drama|Romance
   item_id
0   5074.0
   item_id                title        genres
0   5074.0 

   item_id           title            genres
0  27773.0  Old Boy (2003)  Mystery|Thriller
   item_id
0  48774.0
   item_id                   title                                  genres
0  48774.0  Children of Men (2006)  Action|Adventure|Drama|Sci-Fi|Thriller
   item_id
0   1799.0
   item_id                 title                               genres
0   1799.0  Suicide Kings (1997)  Comedy|Crime|Drama|Mystery|Thriller
   item_id
0   1500.0
   item_id                       title                genres
0   1500.0  Grosse Pointe Blank (1997)  Comedy|Crime|Romance
   item_id
0   1012.0
   item_id              title          genres
0   1012.0  Old Yeller (1957)  Children|Drama
   item_id
0   2716.0
   item_id                                       title                genres
0   2716.0  Ghostbusters (a.k.a. Ghost Busters) (1984)  Action|Comedy|Sci-Fi
    item_id
0  140110.0
    item_id              title  genres
0  140110.0  The Intern (2015)  Comedy
   item_id
0   3846.0
   item_id        

0   3000.0
   item_id                                     title  \
0   3000.0  Princess Mononoke (Mononoke-hime) (1997)   

                                     genres  
0  Action|Adventure|Animation|Drama|Fantasy  
   item_id
0    627.0
   item_id                    title          genres
0    627.0  Last Supper, The (1995)  Drama|Thriller
   item_id
0   3524.0
   item_id          title          genres
0   3524.0  Arthur (1981)  Comedy|Romance
   item_id
0   4896.0
   item_id                                              title  \
0   4896.0  Harry Potter and the Sorcerer's Stone (a.k.a. ...   

                       genres  
0  Adventure|Children|Fantasy  
   item_id
0  64957.0
   item_id                                        title  \
0  64957.0  Curious Case of Benjamin Button, The (2008)   

                          genres  
0  Drama|Fantasy|Mystery|Romance  
   item_id
0     95.0
   item_id                title                     genres
0     95.0  Broken Arrow (1996)  Action|Adv

0    858.0
   item_id                  title       genres
0    858.0  Godfather, The (1972)  Crime|Drama
   item_id
0   1411.0
   item_id          title               genres
0   1411.0  Hamlet (1996)  Crime|Drama|Romance
   item_id
0  99114.0
   item_id                    title                genres
0  99114.0  Django Unchained (2012)  Action|Drama|Western
   item_id
0   2692.0
   item_id                             title        genres
0   2692.0  Run Lola Run (Lola rennt) (1998)  Action|Crime
   item_id
0    780.0
   item_id                                 title  \
0    780.0  Independence Day (a.k.a. ID4) (1996)   

                             genres  
0  Action|Adventure|Sci-Fi|Thriller  
   item_id
0  33162.0
   item_id                     title                    genres
0  33162.0  Kingdom of Heaven (2005)  Action|Drama|Romance|War
   item_id
0    232.0
   item_id                                        title                genres
0    232.0  Eat Drink Man Woman (Yin shi nan nu) (

0   8025.0  The Thief (1997)  Drama
   item_id
0   4239.0
   item_id        title       genres
0   4239.0  Blow (2001)  Crime|Drama
   item_id
0   1417.0
   item_id                           title genres
0   1417.0  Portrait of a Lady, The (1996)  Drama
   item_id
0   4987.0
   item_id                                              title  \
0   4987.0  Spacehunter: Adventures in the Forbidden Zone ...   

                    genres  
0  Action|Adventure|Sci-Fi  
   item_id
0   5954.0
   item_id             title       genres
0   5954.0  25th Hour (2002)  Crime|Drama
   item_id
0  64957.0
   item_id                                        title  \
0  64957.0  Curious Case of Benjamin Button, The (2008)   

                          genres  
0  Drama|Fantasy|Mystery|Romance  
   item_id
0   7451.0
   item_id              title  genres
0   7451.0  Mean Girls (2004)  Comedy
   item_id
0  56775.0
   item_id                                      title            genres
0  56775.0  National Treas

0   4896.0
   item_id                                              title  \
0   4896.0  Harry Potter and the Sorcerer's Stone (a.k.a. ...   

                       genres  
0  Adventure|Children|Fantasy  
   item_id
0  90746.0
   item_id                             title                         genres
0  90746.0  Adventures of Tintin, The (2011)  Action|Animation|Mystery|IMAX
   item_id
0    858.0
   item_id                  title       genres
0    858.0  Godfather, The (1972)  Crime|Drama
   item_id
0   3189.0
   item_id               title          genres
0   3189.0  My Dog Skip (1999)  Children|Drama
   item_id
0   1130.0
   item_id                title          genres
0   1130.0  Howling, The (1980)  Horror|Mystery
    item_id
0  114707.0
    item_id         title          genres
0  114707.0  Horns (2014)  Horror|Mystery
   item_id
0   4896.0
   item_id                                              title  \
0   4896.0  Harry Potter and the Sorcerer's Stone (a.k.a. ...   

         

0     12.0
   item_id                               title         genres
0     12.0  Dracula: Dead and Loving It (1995)  Comedy|Horror
    item_id
0  122892.0
    item_id                           title                   genres
0  122892.0  Avengers: Age of Ultron (2015)  Action|Adventure|Sci-Fi
   item_id
0    208.0
   item_id              title                   genres
0    208.0  Waterworld (1995)  Action|Adventure|Sci-Fi
   item_id
0   2986.0
   item_id             title                        genres
0   2986.0  RoboCop 2 (1990)  Action|Crime|Sci-Fi|Thriller
   item_id
0   2502.0
   item_id                title        genres
0   2502.0  Office Space (1999)  Comedy|Crime
    item_id
0  119145.0
    item_id                                title  \
0  119145.0  Kingsman: The Secret Service (2015)   

                          genres  
0  Action|Adventure|Comedy|Crime  
   item_id
0  27773.0
   item_id           title            genres
0  27773.0  Old Boy (2003)  Mystery|Thriller
   ite

0     50.0
   item_id                       title                  genres
0     50.0  Usual Suspects, The (1995)  Crime|Mystery|Thriller
   item_id
0   2525.0
   item_id             title                genres
0   2525.0  Alligator (1980)  Action|Horror|Sci-Fi
   item_id
0    750.0
   item_id                                              title      genres
0    750.0  Dr. Strangelove or: How I Learned to Stop Worr...  Comedy|War
   item_id
0   2716.0
   item_id                                       title                genres
0   2716.0  Ghostbusters (a.k.a. Ghost Busters) (1984)  Action|Comedy|Sci-Fi
   item_id
0   1021.0
   item_id                          title           genres
0   1021.0  Angels in the Outfield (1994)  Children|Comedy
   item_id
0   2716.0
   item_id                                       title                genres
0   2716.0  Ghostbusters (a.k.a. Ghost Busters) (1984)  Action|Comedy|Sci-Fi
   item_id
0    435.0
   item_id             title         genres
0    435.0 

   item_id          title         genres
0   8605.0  Taxi 3 (2003)  Action|Comedy
   item_id
0  99114.0
   item_id                    title                genres
0  99114.0  Django Unchained (2012)  Action|Drama|Western
   item_id
0    207.0
   item_id                         title         genres
0    207.0  Walk in the Clouds, A (1995)  Drama|Romance
   item_id
0   8958.0
   item_id       title genres
0   8958.0  Ray (2004)  Drama
   item_id
0  66335.0
   item_id                              title     genres
0  66335.0  Afro Samurai: Resurrection (2009)  Animation
   item_id
0   1225.0
   item_id           title genres
0   1225.0  Amadeus (1984)  Drama
   item_id
0    170.0
   item_id           title                           genres
0    170.0  Hackers (1995)  Action|Adventure|Crime|Thriller
   item_id
0  88744.0
   item_id                                  title  \
0  88744.0  Rise of the Planet of the Apes (2011)   

                         genres  
0  Action|Drama|Sci-Fi|Thriller  

   item_id                   title  genres
0   3507.0  Odd Couple, The (1968)  Comedy
    item_id
0  112552.0
    item_id            title genres
0  112552.0  Whiplash (2014)  Drama
   item_id
0     50.0
   item_id                       title                  genres
0     50.0  Usual Suspects, The (1995)  Crime|Mystery|Thriller
   item_id
0   2000.0
   item_id                 title                     genres
0   2000.0  Lethal Weapon (1987)  Action|Comedy|Crime|Drama
   item_id
0   1203.0
   item_id                title genres
0   1203.0  12 Angry Men (1957)  Drama
   item_id
0     50.0
   item_id                       title                  genres
0     50.0  Usual Suspects, The (1995)  Crime|Mystery|Thriller
   item_id
0    585.0
   item_id                          title  genres
0    585.0  Brady Bunch Movie, The (1995)  Comedy
    item_id
0  164655.0
    item_id                title       genres
0  164655.0  Gimme Danger (2016)  Documentary
   item_id
0    248.0
   item_id          

0  90746.0
   item_id                             title                         genres
0  90746.0  Adventures of Tintin, The (2011)  Action|Animation|Mystery|IMAX
   item_id
0    780.0
   item_id                                 title  \
0    780.0  Independence Day (a.k.a. ID4) (1996)   

                             genres  
0  Action|Adventure|Sci-Fi|Thriller  
   item_id
0   5464.0
   item_id                     title       genres
0   5464.0  Road to Perdition (2002)  Crime|Drama
   item_id
0  66783.0
   item_id                   title  genres
0  66783.0  Friday the 13th (2009)  Horror
   item_id
0    842.0
   item_id                                              title         genres
0    842.0  Tales from the Crypt Presents: Bordello of Blo...  Comedy|Horror
   item_id
0   5464.0
   item_id                     title       genres
0   5464.0  Road to Perdition (2002)  Crime|Drama
   item_id
0    337.0
   item_id                               title genres
0    337.0  What's Eating Gilb

0   2006.0
   item_id                      title                 genres
0   2006.0  Mask of Zorro, The (1998)  Action|Comedy|Romance
   item_id
0   3006.0
   item_id                title          genres
0   3006.0  Insider, The (1999)  Drama|Thriller
   item_id
0   1704.0
   item_id                     title         genres
0   1704.0  Good Will Hunting (1997)  Drama|Romance
   item_id
0   1225.0
   item_id           title genres
0   1225.0  Amadeus (1984)  Drama
   item_id
0   4954.0
   item_id                                      title        genres
0   4954.0  Ocean's Eleven (a.k.a. Ocean's 11) (1960)  Comedy|Crime
   item_id
0    595.0
   item_id                        title  \
0    595.0  Beauty and the Beast (1991)   

                                            genres  
0  Animation|Children|Fantasy|Musical|Romance|IMAX  
   item_id
0  41573.0
   item_id                     title                genres
0  41573.0  Family Stone, The (2005)  Comedy|Drama|Romance
   item_id
0  40819.

0   8958.0
   item_id       title genres
0   8958.0  Ray (2004)  Drama
   item_id
0   3536.0
   item_id                     title                genres
0   3536.0  Keeping the Faith (2000)  Comedy|Drama|Romance
   item_id
0   4002.0
   item_id                                title  genres
0   4002.0  Planes, Trains & Automobiles (1987)  Comedy
   item_id
0  99636.0
   item_id                    title        genres
0  99636.0  English Vinglish (2012)  Comedy|Drama
   item_id
0   8644.0
   item_id            title                            genres
0   8644.0  I, Robot (2004)  Action|Adventure|Sci-Fi|Thriller
   item_id
0   4896.0
   item_id                                              title  \
0   4896.0  Harry Potter and the Sorcerer's Stone (a.k.a. ...   

                       genres  
0  Adventure|Children|Fantasy  
   item_id
0  27731.0
   item_id                                       title  \
0  27731.0  Cat Returns, The (Neko no ongaeshi) (2002)   

                               

0    266.0
   item_id                       title                     genres
0    266.0  Legends of the Fall (1994)  Drama|Romance|War|Western
    item_id
0  133545.0
    item_id                    title        genres
0  133545.0  Just Before I Go (2014)  Comedy|Drama
   item_id
0   1107.0
   item_id         title  genres
0   1107.0  Loser (1991)  Comedy
   item_id
0  69436.0
   item_id            title            genres
0  69436.0  Year One (2009)  Adventure|Comedy
   item_id
0    788.0
   item_id                        title                         genres
0    788.0  Nutty Professor, The (1996)  Comedy|Fantasy|Romance|Sci-Fi
   item_id
0   5040.0
   item_id                       title                    genres
0   5040.0  Conan the Destroyer (1984)  Action|Adventure|Fantasy
   item_id
0   3671.0
   item_id                   title          genres
0   3671.0  Blazing Saddles (1974)  Comedy|Western
   item_id
0   5464.0
   item_id                     title       genres
0   5464.0  Road 

0   3213.0
   item_id                                title              genres
0   3213.0  Batman: Mask of the Phantasm (1993)  Animation|Children
   item_id
0   6537.0
   item_id                                      title                   genres
0   6537.0  Terminator 3: Rise of the Machines (2003)  Action|Adventure|Sci-Fi
   item_id
0   2420.0
   item_id                   title genres
0   2420.0  Karate Kid, The (1984)  Drama
   item_id
0  60684.0
   item_id            title                                     genres
0  60684.0  Watchmen (2009)  Action|Drama|Mystery|Sci-Fi|Thriller|IMAX
   item_id
0  69844.0
   item_id                                          title  \
0  69844.0  Harry Potter and the Half-Blood Prince (2009)   

                                   genres  
0  Adventure|Fantasy|Mystery|Romance|IMAX  
   item_id
0    481.0
   item_id              title          genres
0    481.0  Kalifornia (1993)  Drama|Thriller
   item_id
0  39292.0
   item_id                        

   item_id                   title                                  genres
0  48774.0  Children of Men (2006)  Action|Adventure|Drama|Sci-Fi|Thriller
   item_id
0    543.0
   item_id                                title                   genres
0    543.0  So I Married an Axe Murderer (1993)  Comedy|Romance|Thriller
    item_id
0  126548.0
    item_id            title  genres
0  126548.0  The DUFF (2015)  Comedy
   item_id
0    858.0
   item_id                  title       genres
0    858.0  Godfather, The (1972)  Crime|Drama
   item_id
0   2313.0
   item_id                     title genres
0   2313.0  Elephant Man, The (1980)  Drama
   item_id
0   7482.0
   item_id                    title        genres
0   7482.0  Enter the Dragon (1973)  Action|Crime
   item_id
0   5103.0
   item_id                title                 genres
0   5103.0  Sandlot, The (1993)  Children|Comedy|Drama
   item_id
0   8798.0
   item_id              title                       genres
0   8798.0  Collateral 

   item_id        title       genres
0   4239.0  Blow (2001)  Crime|Drama
   item_id
0   1704.0
   item_id                     title         genres
0   1704.0  Good Will Hunting (1997)  Drama|Romance
   item_id
0   4896.0
   item_id                                              title  \
0   4896.0  Harry Potter and the Sorcerer's Stone (a.k.a. ...   

                       genres  
0  Adventure|Children|Fantasy  
   item_id
0    930.0
   item_id             title                      genres
0    930.0  Notorious (1946)  Film-Noir|Romance|Thriller
   item_id
0   2010.0
   item_id              title        genres
0   2010.0  Metropolis (1927)  Drama|Sci-Fi
   item_id
0   3635.0
   item_id                         title                     genres
0   3635.0  Spy Who Loved Me, The (1977)  Action|Adventure|Thriller
   item_id
0    274.0
   item_id                    title  genres
0    274.0  Man of the House (1995)  Comedy
    item_id
0  111146.0
    item_id                                  

Unnamed: 0,Recommender,RMSE,MRE,TRE
0,BaseRecommender,1.170155,0.349264,0.271796


### Implicit feedback

**Task 1.** Implement the following method for train-test split evaluation for implicit feedback.

In [12]:
def evaluate_train_test_split_implicit(recommender, interactions_df, items_df, seed=6789):
    rng = np.random.RandomState(seed=seed)
    # Split the dataset into train and test
    shuffle = np.arange(len(interactions_df))
    rng.shuffle(shuffle)
    shuffle = list(shuffle)
    train_test_split = 0.8
    split_index = int(len(interactions_df) * train_test_split)
    interactions_df_train = interactions_df.iloc[shuffle[:split_index]]
    interactions_df_test = interactions_df.iloc[shuffle[split_index:]]
    # Train the recommender
    
    recommender.fit(interactions_df_train, None, items_df)
    
    
    hr_1 = []
    hr_3 = []
    hr_5 = []
    hr_10 = []
    
    users_ids = interactions_df_test.loc[:, ['user_id']].drop_duplicates(subset='user_id')
    recommendations = recommender.recommend(users_ids, items_df, n_recommendations=10)
#     display(HTML(recommendations.to_html()))
    hr_1.append(hr(recommendations, interactions_df_test, n=1))
    hr_3.append(hr(recommendations, interactions_df_test, n=3))
    hr_5.append(hr(recommendations, interactions_df_test, n=5))
    hr_10.append(hr(recommendations, interactions_df_test, n=10))
    
    hr_1 = np.mean(hr_1)
    hr_3 = np.mean(hr_3)
    hr_5 = np.mean(hr_5)
    hr_10 = np.mean(hr_10)
    
    return hr_1, hr_3, hr_5, hr_10

recommender = TFIDFRecommender()

results = [['BaseRecommender'] + list(evaluate_train_test_split_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10'])

display(HTML(results.to_html()))

Unnamed: 0,Recommender,HR@1,HR@3,HR@5,HR@10
0,BaseRecommender,0.017897,0.044743,0.058166,0.09396


## Leave-one-out, leave-k-out, cross-validation

### Explicit feedback

**Task 2.** Implement the following method for leave-one-out evaluation for explicit feedback.

In [15]:
def evaluate_leave_one_out_explicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):
    rng = np.random.RandomState(seed=seed)
    
    # Prepare splits of the datasets
    kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)
    

    # For each split of the dataset train the recommender, generate recommendations and evaluate
    r_pred = []
    r_real = []
    n_eval = 1
    for train_index, test_index in kf.split(interactions_df.index):
        interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]
        interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]
                
        recommender.fit(interactions_df_train, None, items_df)
        item = interactions_df_test.loc[:, ['item_id']]
        item = pd.merge(item, items_df, on='item_id')
        recommendations = recommender.recommend(interactions_df_test.loc[:, ['user_id']], 
                                                item, 
                                                n_recommendations=1)
        
        r_pred.append(recommendations.iloc[0]['score'])
        r_real.append(interactions_df_test['rating'].tolist()[0])
        if n_eval == max_evals:
            break
        n_eval += 1
    
    r_real = np.array(r_real)
#     print(r_pred)
#     print(r_real)
    return rmse(r_pred, r_real), mre(r_pred, r_real), tre(r_pred, r_real)

recommender = LinearRegressionRecommender()
results = [['BaseRecommender'] + list(evaluate_leave_one_out_explicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df))]
results = pd.DataFrame(results, 
                       columns=['Recommender', 'RMSE', 'MRE', 'TRE'])
display(HTML(results.to_html()))

Unnamed: 0,Recommender,RMSE,MRE,TRE
0,BaseRecommender,1.039763,0.401878,0.240408


### Implicit feedback

# Linear Regression Recommender

For every movie we transform its genres into one-hot encoded features and then fit a linear regression model to those features and actual ratings.

In [14]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MultiLabelBinarizer

class LinearRegressionRecommender(object):
    """
    Base recommender class.
    """
    
    def __init__(self):
        """
        Initialize base recommender params and variables.
        """
        self.model = None
        self.mlb = None
    
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.
        
        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items 
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
        """
        
        interactions_df = pd.merge(interactions_df, items_df, on='item_id')
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.replace("-", "_", regex=False)
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.replace(" ", "_", regex=False)
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.lower()
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.split("|")
        
        self.mlb = MultiLabelBinarizer()
        interactions_df = interactions_df.join(
            pd.DataFrame(self.mlb.fit_transform(interactions_df.pop('genres')),
                         columns=self.mlb.classes_,
                         index=interactions_df.index))
        
#         print(interactions_df.head())
        
        x = interactions_df.loc[:, self.mlb.classes_].values
        y = interactions_df['rating'].values
    
        self.model = LinearRegression().fit(x, y)
    
    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns 
        top n_recommendations for each user.
        
        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations 
            for each user.
        :rtype: pd.DataFrame
        """
        
        # Transform the item to be scored into proper features
        
        items_df = items_df.copy()
        items_df.loc[:, 'genres'] = items_df['genres'].str.replace("-", "_", regex=False)
        items_df.loc[:, 'genres'] = items_df['genres'].str.replace(" ", "_", regex=False)
        items_df.loc[:, 'genres'] = items_df['genres'].str.lower()
        items_df.loc[:, 'genres'] = items_df['genres'].str.split("|")
        
        items_df = items_df.join(
            pd.DataFrame(self.mlb.transform(items_df.pop('genres')),
                         columns=self.mlb.classes_,
                         index=items_df.index))
        
#         print(items_df)
        
        # Score the item
    
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        
        for ix, user in users_df.iterrows():
            score = self.model.predict(items_df.loc[:, self.mlb.classes_].values)[0]
                
            user_recommendations = pd.DataFrame({'user_id': [user['user_id']],
                                                 'item_id': items_df.iloc[0]['item_id'],
                                                 'score': score})

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

In [14]:
# Quick test of the recommender

lr_recommender = LinearRegressionRecommender()
lr_recommender.fit(ml_ratings_df, None, ml_movies_df)
recommendations = lr_recommender.recommend(pd.DataFrame([[1], [2]], columns=['user_id']), ml_movies_df, 1)

recommendations = pd.merge(recommendations, ml_movies_df, on='item_id')
display(HTML(recommendations.to_html()))

Unnamed: 0,user_id,item_id,score,title,genres
0,1,145,3.282778,Bad Boys (1995),Action|Comedy|Crime|Drama|Thriller
1,2,145,3.282778,Bad Boys (1995),Action|Comedy|Crime|Drama|Thriller


In [15]:
lr_recommender = LinearRegressionRecommender()

results = [['LinearRegressionRecommender'] + list(evaluate_train_test_split_explicit(
    lr_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df, seed=6789))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'RMSE', 'MRE', 'TRE'])

display(HTML(results.to_html()))

Unnamed: 0,Recommender,RMSE,MRE,TRE
0,LinearRegressionRecommender,1.019263,0.370077,0.245447


# TF-IDF Recommender
TF-IDF stands for term frequency–inverse document frequency. Typically Tf-IDF method is used to assign keywords (words describing the gist of a document) to documents in a corpus of documents.

In our case we will treat users as documents and genres as words.

Term-frequency is given by the following formula:
<center>
$$
    \text{tf}(g, u) = f_{g, u}
$$
</center>
where $f_{g, i}$ is the number of times genre $g$ appear for movies watched by user $u$.

Inverse document frequency is defined as follows:
<center>
$$
    \text{idf}(g) = \log \frac{N}{n_g}
$$
</center>
where $N$ is the number of users and $n_g$ is the number of users with $g$ in their genres list.

Finally, tf-idf is defined as follows:
<center>
$$
    \text{tfidf}(g, u) = \text{tf}(g, u) \cdot \text{idf}(g)
$$
</center>

In our case we will measure how often a given genre appears for movies watched by a given user vs how often it appears for all users. To obtain a movie score we will take the average of its genres' scores for this user.

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer

class TFIDFRecommender(object):
    """
    Recommender based on the TF-IDF method.
    """
    
    def __init__(self):
        """
        Initialize base recommender params and variables.
        """
        self.tfidf_scores = None
    
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.
        
        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items 
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
        """
        
        self.tfidf_scores = defaultdict(lambda: 0.0)

        # Prepare the corpus for tfidf calculation
        
        interactions_df = pd.merge(interactions_df, items_df, on='item_id')
        user_genres = interactions_df.loc[:, ['user_id', 'genres']]
        user_genres.loc[:, 'genres'] = user_genres['genres'].str.replace("-", "_", regex=False)
        user_genres.loc[:, 'genres'] = user_genres['genres'].str.replace(" ", "_", regex=False)
        user_genres = user_genres.groupby('user_id').aggregate(lambda x: "|".join(x))
        user_genres.loc[:, 'genres'] = user_genres['genres'].str.replace("|", " ", regex=False)
#         print(user_genres)
        user_ids = user_genres.index.tolist()
        genres_corpus = user_genres['genres'].tolist()
        
        # Calculate tf-idf scores
        
        vectorizer = TfidfVectorizer()
        tfidf_scores = vectorizer.fit_transform(genres_corpus)
        
        # Transform results into a dict {(user_id, genre): score}
        
        for u in range(tfidf_scores.shape[0]):
            for g in range(tfidf_scores.shape[1]):
                self.tfidf_scores[(user_ids[u], vectorizer.get_feature_names()[g])] = tfidf_scores[u, g]
                
#         print(self.tfidf_scores)
    
    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns 
        top n_recommendations for each user.
        
        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations 
            for each user.
        :rtype: pd.DataFrame
        """
        
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        
        # Transform genres to a unified form used by the vectorizer
        
        items_df = items_df.copy()
        items_df.loc[:, 'genres'] = items_df['genres'].str.replace("-", "_", regex=False)
        items_df.loc[:, 'genres'] = items_df['genres'].str.replace(" ", "_", regex=False)
        items_df.loc[:, 'genres'] = items_df['genres'].str.lower()
        items_df.loc[:, 'genres'] = items_df['genres'].str.split("|")
                
        # Score items    
        
        for uix, user in users_df.iterrows():
            items = []
            for iix, item in items_df.iterrows():
                score = 0.0
                for genre in item['genres']:
                    score += self.tfidf_scores[(user['user_id'], genre)]
                score /= len(item['genres'])
                items.append((item['item_id'], score))
                
            items = sorted(items, key=lambda x: x[1], reverse=True)
            user_recommendations = pd.DataFrame({'user_id': user['user_id'],
                                                 'item_id': [item[0] for item in items][:n_recommendations],
                                                 'score': [item[1] for item in items][:n_recommendations]})

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

In [17]:
# Quick test of the recommender

tfidf_recommender = TFIDFRecommender()
tfidf_recommender.fit(ml_ratings_df, None, ml_movies_df)
recommendations = tfidf_recommender.recommend(pd.DataFrame([[1], [2]], columns=['user_id']), ml_movies_df, 3)

recommendations = pd.merge(recommendations, ml_movies_df, on='item_id')
display(HTML(recommendations.to_html()))

Unnamed: 0,user_id,item_id,score,title,genres
0,1,2690,0.452122,"Ideal Husband, An (1999)",Comedy|Romance
1,1,3614,0.452122,Honeymoon in Vegas (1992),Comedy|Romance
2,1,4796,0.452122,"Grass Is Greener, The (1960)",Comedy|Romance
3,2,145,0.0,Bad Boys (1995),Action|Comedy|Crime|Drama|Thriller
4,2,171,0.0,Jeffrey (1995),Comedy|Drama
5,2,228,0.0,Destiny Turns on the Radio (1995),Comedy


In [18]:
tfidf_recommender = TFIDFRecommender()

results = [['TFIDFRecommender'] + list(evaluate_leave_one_out_implicit(
    tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(results.to_html()))

Unnamed: 0,Recommender,HR@1,HR@3,HR@5,HR@10,NDCG@1,NDCG@3,NDCG@5,NDCG@10
0,TFIDFRecommender,0.006667,0.053333,0.123333,0.233333,0.006667,0.033491,0.062178,0.096151


In [16]:
def evaluate_leave_one_out_implicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):
    rng = np.random.RandomState(seed=seed)
    
    # Prepare splits of the datasets
    kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)
    
    hr_1 = []
    hr_3 = []
    hr_5 = []
    hr_10 = []
    ndcg_1 = []
    ndcg_3 = []
    ndcg_5 = []
    ndcg_10 = []
    
    # For each split of the dataset train the recommender, generate recommendations and evaluate
    
    n_eval = 1
    for train_index, test_index in kf.split(interactions_df.index):
        interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]
        interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]
                
        recommender.fit(interactions_df_train, None, items_df)
        recommendations = recommender.recommend(interactions_df_test.loc[:, ['user_id']], items_df, n_recommendations=10)
        
        hr_1.append(hr(recommendations, interactions_df_test, n=1))
        hr_3.append(hr(recommendations, interactions_df_test, n=3))
        hr_5.append(hr(recommendations, interactions_df_test, n=5))
        hr_10.append(hr(recommendations, interactions_df_test, n=10))
        ndcg_1.append(ndcg(recommendations, interactions_df_test, n=1))
        ndcg_3.append(ndcg(recommendations, interactions_df_test, n=3))
        ndcg_5.append(ndcg(recommendations, interactions_df_test, n=5))
        ndcg_10.append(ndcg(recommendations, interactions_df_test, n=10))
        
        if n_eval == max_evals:
            break
        n_eval += 1
        
    hr_1 = np.mean(hr_1)
    hr_3 = np.mean(hr_3)
    hr_5 = np.mean(hr_5)
    hr_10 = np.mean(hr_10)
    ndcg_1 = np.mean(ndcg_1)
    ndcg_3 = np.mean(ndcg_3)
    ndcg_5 = np.mean(ndcg_5)
    ndcg_10 = np.mean(ndcg_10)
    
    return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10

# recommender = TFIDFRecommender()

# results = [['BaseRecommender'] + list(evaluate_leave_one_out_implicit(
#     recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]

# results = pd.DataFrame(results, 
#                        columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

# display(HTML(results.to_html()))

recommender = MostPopularRecommender()
results = [['MostPopularRecommender'] + list(evaluate_leave_one_out_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(results.to_html()))

Unnamed: 0,Recommender,HR@1,HR@3,HR@5,HR@10,NDCG@1,NDCG@3,NDCG@5,NDCG@10
0,MostPopularRecommender,0.026667,0.05,0.083333,0.163333,0.026667,0.040079,0.053997,0.079749


# Tasks

**Task 3.** Implement the MostPopularRecommender (check the slides for class 1), evaluate it with leave-one-out procedure for implicit feedback, print HR@1, HR@3, HR@5, HR@10, NDCG@1, NDCG@3, NDCG@5, NDCG@10.

In [15]:
class MostPopularRecommender(object):
    """
    Most popular recommender class.
    """
    
    def __init__(self):
        """
        Initialize base recommender params and variables.
        """
        self.most_popular = []
        pass
    
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.
        
        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items 
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
        """
        grouped = interactions_df.groupby('item_id').size().reset_index(name='counts').sort_values(by=['counts'], ascending=False)
#         grouped = interactions_df.groupby('item_id').count().sort_values(by=['user_id'], ascending=False)
        self.most_popular = list(grouped['item_id'])
    
    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns 
        top n_recommendations for each user.
        
        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations 
            for each user.
        :rtype: pd.DataFrame
        """
        
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        
        for ix, user in users_df.iterrows():
            user_recommendations = pd.DataFrame({'user_id': user['user_id'],
                                                 'item_id': self.most_popular[:n_recommendations],
                                                 'score': [3.0] * n_recommendations})

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

recommender = MostPopularRecommender()
results = [['MostPopularRecommender'] + list(evaluate_leave_one_out_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(results.to_html()))

Unnamed: 0,Recommender,HR@1,HR@3,HR@5,HR@10,NDCG@1,NDCG@3,NDCG@5,NDCG@10
0,MostPopularRecommender,0.026667,0.05,0.083333,0.163333,0.026667,0.040079,0.053997,0.079749


**Task 4.** Implement the HighestRatedRecommender (check the slides for class 1), evaluate it with leave-one-out procedure for implicit feedback, print HR@1, HR@3, HR@5, HR@10, NDCG@1, NDCG@3, NDCG@5, NDCG@10.

In [56]:
class HighestRatedRecommender(object):
    """
    Highest rated recommender class.
    """
    
    def __init__(self):
        """
        Initialize base recommender params and variables.
        """
        self.highest_rated = []
        pass
    
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.
        
        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items 
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
        """
        grouped = interactions_df.drop(columns=['user_id']).groupby('item_id').mean('rating').sort_values(by=['rating', 'item_id'], ascending=[False, True])
        self.highest_rated = list(grouped.index)
    
    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns 
        top n_recommendations for each user.
        
        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations 
            for each user.
        :rtype: pd.DataFrame
        """
        
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        
        for ix, user in users_df.iterrows():
            user_recommendations = pd.DataFrame({'user_id': user['user_id'],
                                                 'item_id': self.highest_rated[:n_recommendations],
                                                 'score': [3.0] * n_recommendations})

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

recommender = HighestRatedRecommender()
results = [['HighestRatedRecommender'] + list(evaluate_leave_one_out_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df, max_evals=300, seed=6789))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(results.to_html()))

Unnamed: 0,Recommender,HR@1,HR@3,HR@5,HR@10,NDCG@1,NDCG@3,NDCG@5,NDCG@10
0,HighestRatedRecommender,0.0,0.0,0.0,0.003333,0.0,0.0,0.0,0.001111


**Task 5.** Implement the RandomRecommender (check the slides for class 1), evaluate it with leave-one-out procedure for implicit feedback, print HR@1, HR@3, HR@5, HR@10, NDCG@1, NDCG@3, NDCG@5, NDCG@10.

In [19]:
class RandomRecommender(object):
    """
    Random recommender class.
    """
    
    def __init__(self):
        """
        Initialize base recommender params and variables.
        """
        self.shuffled = []
        self.rng = np.random.RandomState(seed=6789)
        pass
    
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.
        
        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items 
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
        """
        shuffle = np.arange(len(items_df))
        self.rng.shuffle(shuffle)
        shuffle = list(shuffle)
        
        self.shuffled = [items_df.iloc[x]['item_id'] for x in shuffle]
    
    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns 
        top n_recommendations for each user.
        
        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations 
            for each user.
        :rtype: pd.DataFrame
        """
        
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        
        for ix, user in users_df.iterrows():
            user_recommendations = pd.DataFrame({'user_id': user['user_id'],
                                                 'item_id': self.shuffled[:n_recommendations],
                                                 'score': [3.0] * n_recommendations})

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

recommender = RandomRecommender()
recommender.fit(ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], None, ml_movies_df)
results = [['RandomRecommender'] + list(evaluate_leave_one_out_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df, max_evals=300, seed=6789))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(results.to_html()))

Unnamed: 0,Recommender,HR@1,HR@3,HR@5,HR@10,NDCG@1,NDCG@3,NDCG@5,NDCG@10
0,RandomRecommender,0.0,0.003333,0.006667,0.006667,0.0,0.001667,0.003102,0.003102


**Task 6.** Gather the results for TFIDFRecommender, MostPopularRecommender, HighestRatedRecommender, RandomRecommender in one DataFrame and print it.

In [54]:
tfidf_recommender = TFIDFRecommender()
tf_results = [['TFIDFRecommender'] + list(evaluate_leave_one_out_implicit(
    tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]

tf_results = pd.DataFrame(tf_results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])


recommender = MostPopularRecommender()
mp_results = [['MostPopularRecommender'] + list(evaluate_leave_one_out_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]

mp_results = pd.DataFrame(mp_results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])


recommender = HighestRatedRecommender()
hr_results = [['HighestRatedRecommender'] + list(evaluate_leave_one_out_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df, max_evals=300, seed=6789))]

hr_results = pd.DataFrame(hr_results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])


recommender = RandomRecommender()
r_results = [['RandomRecommender'] + list(evaluate_leave_one_out_implicit(
    recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df, max_evals=300, seed=6789))]

r_results = pd.DataFrame(r_results, 
                       columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])


result = pd.concat([tf_results, mp_results, hr_results, r_results])
result

[472, 26631, 63276, 2815, 31553, 46335, 1107, 42728, 4558, 77866, 150254, 146662, 2713, 7256, 139717, 112552, 145418, 6751, 132454, 48982, 2739, 27865, 1937, 76738, 1449, 6688, 150993, 1130, 95313, 5155, 99, 96964, 5384, 6342, 27134, 44613, 2313, 70697, 71732, 7832, 168846, 3912, 1777, 3451, 146684, 2307, 207, 3053, 61406, 103606, 1079, 2068, 62437, 979, 2059, 1704, 87869, 82684, 67618, 149011, 2878, 3479, 114818, 7991, 4629, 8581, 69640, 26622, 606, 837, 182715, 3564, 3557, 2504, 7071, 1322, 2301, 4978, 4135, 77191, 2574, 327, 41617, 3687, 189381, 175485, 875, 4573, 68650, 27644, 3668, 6223, 81191, 3394, 1914, 168252, 168456, 4987, 179953, 3628, 842, 89678, 3165, 8861, 3017, 112556, 3468, 163925, 7833, 161830, 3000, 88267, 2046, 4006, 68536, 89386, 8331, 175431, 5636, 113159, 423, 3797, 55272, 3439, 5450, 50356, 44195, 171701, 5093, 3846, 4967, 4259, 1021, 4255, 7988, 151315, 32460, 6720, 7451, 8753, 5752, 1033, 5489, 3981, 1707, 3211, 143365, 1891, 5231, 59018, 8366, 132488, 5041, 21

[108795, 4773, 5513, 8340, 7988, 6927, 95004, 5585, 2088, 1489, 34520, 143472, 274, 3053, 4745, 102716, 7179, 279, 55444, 26265, 69640, 3017, 6342, 7646, 48774, 813, 1151, 31553, 5562, 173209, 8581, 116529, 6193, 8238, 6720, 39292, 70946, 82767, 4947, 3327, 4139, 460, 3667, 27772, 33124, 95873, 2935, 472, 5231, 3507, 3640, 2716, 2815, 56801, 134334, 6216, 1760, 117529, 477, 152105, 145, 313, 750, 60408, 1707, 64501, 2964, 2932, 40819, 70451, 3899, 159061, 101360, 7340, 212, 1862, 346, 4927, 1996, 208, 1107, 3067, 172577, 2661, 423, 103606, 2275, 6755, 790, 70697, 77191, 8908, 7832, 61628, 3211, 8506, 5455, 3021, 135861, 1223, 1353, 4488, 1859, 3394, 875, 959, 3313, 165103, 100390, 3684, 139717, 3846, 2682, 6549, 59018, 3479, 121372, 280, 6301, 158872, 4787, 6808, 6713, 95508, 112818, 80126, 5464, 59604, 74510, 90746, 5212, 95313, 2295, 82167, 2671, 5668, 59336, 3219, 26357, 131578, 6390, 68536, 131656, 122892, 2690, 6039, 5710, 1040, 6884, 3341, 151763, 156553, 32799, 41573, 233, 6581,

[40966, 343, 31921, 4115, 7071, 25962, 280, 1893, 1963, 32294, 26133, 2082, 139747, 180, 1223, 4002, 56339, 103606, 99, 134334, 95147, 3768, 112818, 4616, 3799, 175485, 6187, 146662, 3591, 95165, 2594, 952, 37741, 173941, 2964, 64575, 6927, 134021, 60684, 53000, 5633, 2713, 8605, 994, 94867, 31909, 30822, 8987, 1172, 132796, 2517, 3330, 6058, 7714, 163925, 7116, 5954, 31867, 1439, 3640, 379, 3684, 93242, 39292, 869, 7051, 25752, 248, 4006, 4156, 5568, 92046, 59220, 7256, 1381, 606, 1891, 93982, 3839, 67799, 2806, 1942, 3289, 64010, 159061, 27036, 2521, 2818, 3040, 141816, 152970, 6185, 8798, 2574, 82767, 1040, 7438, 1500, 5625, 1937, 327, 51562, 1980, 1365, 173307, 6948, 788, 3786, 55729, 8581, 6581, 2212, 53138, 1777, 6342, 70305, 150993, 82684, 2180, 99437, 116138, 1322, 274, 46850, 979, 46335, 4081, 1862, 1417, 121372, 5456, 114762, 43912, 2698, 67734, 8331, 47099, 3406, 137218, 3771, 138208, 140850, 7319, 79428, 6616, 98239, 4663, 55272, 3926, 6881, 8644, 190207, 72701, 7832, 15944

[8894, 1489, 171749, 595, 2370, 140110, 2059, 55729, 2350, 121372, 406, 2170, 6669, 3690, 6927, 7311, 543, 2046, 4522, 171701, 66198, 5205, 8753, 4441, 3451, 7482, 141816, 4074, 41617, 131578, 2818, 4156, 405, 2886, 147282, 4509, 370, 71341, 6342, 780, 98160, 118270, 2071, 73804, 2725, 30822, 67799, 2420, 3839, 5155, 3017, 39292, 50601, 134184, 3598, 182749, 8405, 112818, 7831, 3639, 32799, 4243, 2100, 3801, 8132, 7988, 47810, 188, 3165, 606, 4573, 43912, 152105, 95313, 5093, 1893, 26237, 3134, 106144, 51884, 173209, 46347, 4006, 6141, 46335, 6557, 99437, 3768, 3327, 92008, 56801, 64501, 7071, 83349, 31309, 6654, 31737, 1976, 4773, 5127, 149566, 4863, 101360, 44761, 5538, 4483, 111146, 61406, 88267, 542, 49132, 27704, 26375, 31702, 26631, 4052, 76738, 3551, 26357, 5636, 119218, 193, 99636, 6537, 2988, 55272, 85394, 280, 2108, 3211, 2360, 4115, 7345, 1752, 46578, 5710, 156781, 161830, 142509, 73015, 3899, 5502, 89386, 2534, 79677, 53447, 175485, 47382, 48161, 55555, 3524, 104218, 2698, 

[41573, 145, 49274, 3756, 4239, 3994, 139855, 189381, 144352, 62437, 70565, 193, 46347, 6306, 175485, 5384, 4573, 4180, 26524, 126088, 150254, 3165, 32914, 26375, 1862, 6669, 7938, 4630, 26357, 1107, 790, 5568, 101360, 112421, 97230, 337, 757, 5237, 175431, 1704, 2295, 8605, 107338, 3557, 121372, 5127, 3286, 44397, 46578, 117133, 6808, 53972, 2921, 95, 5538, 1021, 25752, 139747, 30822, 2116, 2212, 53447, 5364, 2692, 1417, 5752, 3042, 41566, 91842, 98122, 3524, 94494, 3714, 3981, 122916, 4785, 6285, 3451, 72701, 57368, 6305, 69844, 3912, 1184, 67295, 140301, 3497, 842, 3359, 67618, 156675, 159161, 164540, 69436, 2110, 6192, 4663, 4787, 207, 2046, 4629, 91353, 4156, 1414, 126548, 6058, 6644, 42556, 125916, 3932, 170401, 158872, 5455, 5562, 2504, 26590, 2313, 2108, 95135, 6837, 8753, 27416, 4967, 5502, 2043, 49132, 2698, 100527, 146730, 3839, 3341, 2170, 3000, 2932, 50601, 3040, 6616, 7064, 61406, 5799, 95510, 4509, 2068, 2725, 5489, 185473, 4558, 33171, 1365, 63276, 6538, 5074, 2947, 699

[3912, 5733, 6654, 3823, 5512, 132362, 2692, 85565, 2517, 143525, 4785, 2574, 7225, 4441, 58103, 175387, 170401, 4704, 2262, 4967, 84847, 3986, 126142, 809, 5498, 3643, 92008, 47382, 1964, 174551, 44761, 5364, 207, 757, 6688, 4135, 74685, 70305, 3042, 7757, 39715, 73323, 3060, 93242, 73804, 41573, 1465, 1414, 87960, 2135, 33138, 5990, 2699, 37741, 6223, 3021, 6342, 8025, 2304, 97230, 157110, 113453, 8405, 7071, 126088, 5799, 5103, 37444, 26357, 6973, 7192, 8894, 5562, 66335, 4789, 175485, 1489, 1980, 6245, 8861, 85316, 2212, 5214, 3359, 3053, 4630, 2100, 5288, 34520, 80586, 96964, 3799, 33124, 46578, 2307, 3771, 70697, 4890, 27644, 4509, 7345, 3406, 7932, 1151, 140289, 104760, 6537, 47099, 2922, 5093, 1411, 54768, 280, 98160, 49132, 2713, 6669, 3639, 2190, 5231, 4031, 37830, 74668, 65037, 3289, 172577, 5962, 3302, 168846, 2082, 99145, 156675, 160978, 232, 5181, 44195, 3705, 6957, 7988, 55555, 48161, 2988, 62437, 6390, 5504, 2725, 7482, 670, 31921, 25752, 108795, 6720, 31702, 8605, 1027

[4953, 6185, 3013, 116138, 6808, 289, 952, 940, 55444, 2163, 3614, 3912, 869, 190207, 31921, 92391, 159441, 1365, 3089, 48774, 2116, 26357, 46335, 5212, 477, 1489, 164540, 6957, 26554, 5752, 2314, 33896, 58103, 96726, 65738, 6107, 6722, 31553, 3839, 71341, 1996, 95147, 4180, 4243, 41617, 124404, 94867, 7319, 50189, 70451, 50440, 7833, 63276, 6948, 426, 31737, 27731, 55555, 85565, 135861, 106072, 2970, 106144, 65130, 3213, 3652, 111146, 95135, 68886, 42556, 1760, 3473, 103606, 5468, 116529, 5346, 2357, 1955, 8462, 1799, 179813, 7831, 4522, 119145, 112334, 53972, 33154, 43912, 947, 2692, 3667, 112818, 182715, 5839, 8366, 3479, 147282, 586, 4052, 4704, 4798, 3846, 1439, 8238, 2815, 33171, 3567, 5788, 3932, 2370, 60363, 81191, 145, 8644, 3551, 4464, 152970, 47518, 4204, 3690, 44195, 7005, 2313, 119218, 98239, 327, 5074, 2123, 46850, 304, 61406, 101025, 4616, 82167, 160289, 5364, 27563, 4663, 73804, 4715, 69844, 2301, 2360, 7714, 27618, 2737, 94494, 4509, 61628, 8014, 27744, 7116, 4009, 128

[8656, 4792, 176371, 274, 58103, 3451, 3598, 952, 71304, 136359, 7005, 3327, 81191, 8958, 228, 7071, 33421, 6538, 145418, 99, 3159, 3628, 50189, 128512, 46347, 31867, 74275, 4117, 3899, 56775, 2713, 85394, 4616, 4715, 59220, 7833, 31804, 5288, 4785, 140110, 2110, 5464, 173941, 106072, 160289, 159976, 5093, 52579, 145935, 6187, 180, 426, 131656, 93831, 82767, 2860, 89678, 5888, 780, 56389, 159161, 140301, 3134, 137218, 3289, 1021, 2116, 2295, 27563, 7040, 149011, 88672, 46850, 6193, 26133, 1130, 6957, 26313, 117133, 6348, 7192, 170401, 6223, 1783, 1891, 7179, 4483, 103606, 147282, 6837, 2932, 788, 25752, 5990, 104218, 285, 2163, 5538, 4967, 4787, 8238, 95004, 179813, 606, 126142, 2463, 6509, 94867, 31909, 4294, 74685, 4509, 2304, 73015, 5461, 33896, 2301, 32914, 5543, 2504, 77191, 3639, 3342, 141890, 2989, 8908, 48161, 4397, 4794, 64010, 4558, 1151, 51562, 7451, 6945, 2824, 91974, 3912, 25959, 6342, 3771, 50356, 2065, 56801, 89087, 79428, 3068, 3536, 1449, 5797, 69844, 3597, 7319, 475, 

[3882, 2307, 114762, 875, 4782, 1225, 274, 406, 31367, 131656, 69640, 67618, 4505, 4395, 2878, 54910, 149011, 60684, 4745, 6654, 156553, 104760, 6722, 5612, 6973, 150993, 49132, 26237, 132454, 5074, 5636, 2932, 1996, 34520, 3507, 7319, 98239, 2824, 2357, 2964, 3751, 6193, 6994, 5788, 1752, 128914, 53129, 50601, 64957, 3524, 95004, 6751, 102716, 3912, 4074, 4522, 7051, 3289, 96964, 72701, 4488, 31804, 26133, 1353, 2373, 6305, 111146, 2952, 33779, 81191, 858, 5513, 151315, 2046, 112818, 41573, 1439, 89386, 32294, 4483, 670, 4573, 64114, 33124, 1185, 168252, 94867, 77191, 63808, 343, 6502, 3159, 55272, 4, 4259, 3211, 7833, 27644, 27731, 4987, 3302, 68536, 4255, 140301, 119218, 4947, 7411, 27480, 69606, 289, 2006, 1980, 2739, 5364, 3797, 947, 2100, 5489, 4349, 100527, 2682, 6216, 164655, 41627, 107846, 190207, 3394, 31702, 3536, 140289, 141890, 50, 157110, 34153, 542, 27773, 5585, 105250, 2145, 84952, 146684, 74275, 173209, 4715, 4006, 233, 132796, 3846, 117133, 3714, 34534, 2078, 132, 237

[1783, 146684, 2590, 5461, 3797, 3086, 136359, 1432, 6306, 5456, 6305, 5839, 121372, 1704, 80489, 26237, 55272, 1755, 2818, 212, 3591, 1862, 670, 93831, 102716, 3751, 1417, 2350, 158398, 143472, 3289, 2043, 68205, 117133, 117533, 44195, 51562, 172577, 477, 31921, 2301, 53129, 2108, 99437, 405, 6688, 7451, 3355, 26313, 34312, 5212, 8861, 145418, 248, 1893, 337, 182715, 55729, 175387, 107338, 3684, 6994, 6581, 460, 1707, 3264, 6285, 1130, 124404, 53138, 66783, 97230, 4629, 48738, 1777, 97643, 55555, 5562, 27744, 33677, 87960, 46850, 3423, 1172, 6216, 93242, 7646, 138208, 3628, 6927, 27704, 4055, 7311, 5498, 4792, 2521, 175485, 1414, 174551, 3302, 1496, 53956, 7051, 2736, 3524, 159441, 5364, 5346, 150993, 31553, 33421, 5237, 53447, 4239, 2311, 3823, 134021, 1322, 7938, 48262, 82167, 67734, 152105, 3690, 4876, 2100, 56389, 467, 67799, 150254, 184987, 112370, 7005, 6837, 2145, 60408, 3017, 44397, 60471, 3213, 7083, 3341, 2986, 4987, 165103, 100277, 5214, 158872, 4900, 33779, 2699, 61628, 30

[3799, 2311, 1223, 122916, 3327, 175387, 33162, 33677, 66915, 70451, 132362, 171749, 5181, 2983, 41627, 5512, 2078, 5954, 8366, 156781, 99114, 3053, 3342, 940, 2059, 5455, 7179, 70697, 72701, 586, 3516, 6755, 327, 3591, 2857, 66198, 1033, 66090, 70565, 74851, 6957, 858, 3564, 46850, 8605, 31804, 2262, 4794, 41617, 3926, 39292, 146028, 7051, 50, 750, 3479, 73804, 4055, 6538, 6058, 289, 32294, 5636, 8644, 3406, 7482, 33124, 114707, 2903, 60408, 73323, 53129, 179953, 183301, 1414, 128914, 2190, 74510, 95165, 190213, 80124, 112897, 1411, 180, 124404, 947, 145418, 44397, 2739, 27744, 2677, 2860, 2932, 68650, 460, 48262, 2986, 2716, 4978, 5074, 108795, 135137, 105250, 2815, 875, 6549, 435, 2301, 4042, 3667, 3597, 3165, 4863, 1914, 27644, 152079, 779, 7411, 181719, 543, 32666, 125916, 5502, 3567, 74275, 207, 5513, 3786, 5288, 3551, 2068, 2295, 1799, 5127, 34153, 4573, 165103, 1976, 6348, 53956, 1964, 5205, 2534, 156553, 106072, 4558, 34520, 4509, 7116, 5504, 64114, 1859, 146730, 85316, 111146

[208, 70545, 5710, 95510, 85394, 947, 87960, 27155, 80489, 5612, 55729, 4350, 126088, 26313, 6509, 31909, 2190, 5797, 3516, 280, 114184, 3668, 1184, 6033, 1346, 3021, 74, 6187, 3789, 67734, 89386, 4732, 48342, 3986, 47810, 56801, 2145, 159976, 3671, 70361, 48738, 2725, 6039, 5896, 481, 3264, 53127, 99145, 3912, 6342, 2123, 44761, 7005, 6611, 842, 5155, 1893, 3756, 3751, 106144, 26133, 54910, 158872, 8656, 837, 5231, 85316, 405, 95, 93982, 2590, 125916, 49274, 37830, 118530, 112556, 7832, 2135, 81819, 1439, 1040, 3932, 53138, 140289, 149011, 5585, 58103, 185473, 99, 71205, 1225, 2878, 6722, 51562, 3711, 2964, 27822, 757, 3359, 117133, 179813, 2065, 4663, 140110, 3524, 585, 39292, 47774, 2818, 6808, 3423, 4008, 147286, 4006, 2921, 49132, 7360, 5512, 160289, 7150, 93510, 6538, 4967, 4204, 3404, 116529, 4987, 82767, 5219, 50440, 2275, 4785, 5269, 143365, 116817, 4927, 2989, 42761, 627, 132454, 5502, 5461, 4042, 4947, 31367, 1172, 48982, 2357, 3406, 69606, 117529, 73015, 7071, 31921, 4900, 

[7988, 2082, 6722, 2275, 60363, 947, 6945, 176371, 6549, 257, 174551, 133545, 97230, 313, 2935, 159976, 4978, 3652, 171, 107846, 2517, 3567, 4509, 33421, 2110, 1891, 36931, 5962, 1381, 56389, 97643, 1996, 56801, 7360, 7005, 55555, 95313, 7482, 5455, 4663, 4792, 3628, 175431, 190207, 4139, 126548, 71619, 26704, 4789, 78, 100527, 3451, 4074, 81782, 152970, 3882, 2878, 5733, 5668, 190, 65130, 2065, 6713, 139747, 146684, 25962, 83349, 4794, 1862, 1322, 7051, 3286, 4745, 156706, 346, 2116, 4504, 3432, 53129, 266, 2123, 627, 2692, 44399, 122916, 5103, 6107, 72701, 5041, 3536, 69640, 1353, 30822, 6948, 4704, 274, 3327, 3396, 151315, 98160, 2190, 93242, 98607, 475, 4787, 3801, 158398, 151763, 67734, 147286, 32799, 31309, 1449, 1040, 6348, 2970, 4782, 33162, 3313, 1963, 26313, 4798, 92391, 219, 80124, 89678, 3211, 132, 8238, 2304, 4616, 70545, 96964, 6884, 1107, 2145, 141816, 116817, 289, 112334, 6720, 1365, 6669, 1018, 4505, 92046, 56775, 164540, 4773, 95147, 32657, 2006, 43460, 1489, 3423, 53

[61406, 98122, 101360, 5990, 3394, 145, 50, 467, 5888, 4509, 6185, 26133, 42556, 130452, 2006, 118270, 947, 7345, 3614, 100277, 1996, 2922, 139717, 108795, 3264, 670, 5752, 105250, 2088, 6688, 4927, 2145, 813, 5502, 2163, 179953, 1414, 132796, 26726, 2295, 6927, 1233, 788, 168846, 7282, 159161, 91974, 164540, 2052, 1955, 165103, 49274, 5839, 5668, 40819, 267, 132362, 8605, 757, 8908, 8238, 4663, 5788, 27563, 85565, 8462, 4441, 69640, 70361, 2180, 4715, 460, 8743, 141816, 132, 152970, 4773, 1172, 104837, 2903, 128914, 112334, 4464, 2190, 2886, 89386, 25962, 133780, 116529, 51884, 95147, 53127, 2010, 1381, 7833, 8025, 6538, 182715, 64010, 3687, 37830, 44397, 143525, 63276, 78103, 6342, 232, 97230, 5615, 2135, 142509, 102716, 3591, 7064, 5745, 114184, 3986, 4042, 2857, 2170, 2370, 280, 64501, 4890, 1980, 180, 1012, 423, 790, 2360, 5456, 70946, 161830, 164655, 55444, 66915, 57368, 7051, 32460, 979, 5214, 31309, 228, 112421, 152079, 3652, 6100, 173307, 184987, 68886, 346, 114028, 2860, 9963

[190, 1777, 6720, 5568, 3756, 2716, 112556, 7071, 48982, 138208, 71899, 4139, 2504, 27155, 139747, 173307, 3468, 3598, 90746, 27134, 33138, 80489, 82767, 51884, 179953, 49132, 145418, 4350, 54910, 4896, 8605, 3614, 6616, 3667, 44397, 869, 6039, 3926, 6058, 7179, 149011, 6581, 51088, 2677, 114184, 83349, 190207, 70697, 232, 7832, 2661, 55444, 8581, 105250, 4002, 181719, 6107, 2082, 2110, 3219, 1079, 132454, 172577, 66090, 34153, 151311, 33677, 27685, 3557, 1760, 4616, 1040, 77866, 43460, 1707, 1107, 31867, 171701, 1799, 2262, 6223, 4009, 4464, 97230, 37741, 6881, 3189, 7282, 3608, 168252, 64010, 132, 68205, 74851, 8753, 3768, 3159, 47518, 8238, 8743, 149566, 95313, 5288, 71867, 3423, 5562, 74, 27744, 89678, 343, 4798, 1489, 7064, 41627, 8506, 100277, 477, 6537, 6502, 133780, 5364, 3751, 4259, 26554, 4055, 4900, 121372, 26265, 2289, 55272, 67799, 2006, 95, 1914, 146028, 132362, 7345, 140110, 7757, 133545, 39292, 2692, 40962, 104837, 8987, 7833, 2295, 41617, 3396, 4863, 158398, 5450, 460,

[6713, 100390, 6141, 3567, 112552, 96726, 89087, 1172, 134021, 4704, 59220, 47382, 902, 3557, 4616, 670, 3053, 170401, 6039, 4239, 132454, 48262, 6306, 5346, 2818, 1862, 5041, 1891, 3684, 2006, 2699, 46335, 7071, 1185, 3355, 74851, 257, 2463, 151745, 48774, 115713, 143472, 3396, 139855, 4124, 105250, 7932, 27644, 3067, 460, 8861, 4558, 27416, 140110, 280, 4663, 5797, 1914, 74510, 5745, 121372, 50356, 1012, 7835, 5974, 27822, 114707, 26375, 2043, 65130, 55444, 78103, 6216, 979, 26242, 3062, 31702, 5710, 2275, 1976, 41617, 2135, 3628, 31952, 4522, 1500, 7340, 156781, 6185, 122916, 168252, 27826, 8331, 42191, 176371, 40819, 64957, 168456, 73804, 146662, 165103, 3846, 2373, 3597, 54768, 1223, 289, 4081, 46347, 98239, 26524, 3994, 96964, 1346, 5450, 6751, 7757, 68536, 842, 70697, 2986, 8753, 8636, 119218, 60363, 6348, 3635, 64010, 2052, 181719, 5896, 266, 858, 8462, 232, 50601, 44399, 84847, 7451, 3643, 95508, 2357, 7005, 3719, 1107, 182715, 327, 828, 4504, 33154, 46850, 4395, 7991, 26726, 

[1752, 2396, 952, 36931, 142509, 156553, 81191, 7951, 6581, 406, 158872, 34534, 4052, 159061, 136016, 48982, 7345, 34153, 71899, 3013, 93242, 172577, 3591, 3756, 59018, 2286, 4782, 33896, 5797, 3406, 3342, 171749, 49274, 2716, 140289, 3086, 42556, 1760, 2525, 3497, 3668, 125916, 5288, 3714, 152079, 1862, 70565, 2986, 4204, 2502, 3799, 156706, 267, 3684, 1417, 8366, 5673, 67734, 91974, 96964, 3330, 46347, 233, 3423, 140301, 112818, 59336, 3479, 2922, 1184, 1964, 7646, 979, 144352, 3786, 25959, 4794, 33677, 656, 8238, 1414, 6192, 26375, 151315, 3394, 6285, 188, 8987, 116817, 4704, 173941, 98160, 8014, 80124, 95004, 2275, 6107, 175485, 3289, 8644, 1496, 2698, 94867, 53972, 2970, 41617, 88744, 74275, 6502, 26242, 26357, 171701, 44195, 304, 6305, 73323, 59604, 84952, 139855, 69640, 48161, 170401, 4798, 3652, 160848, 70946, 6424, 4124, 3189, 113159, 4117, 80126, 2262, 79677, 1223, 89386, 3986, 1203, 59220, 159976, 5710, 56775, 5269, 2517, 132362, 8405, 6193, 27773, 106072, 190207, 55272, 369

[7311, 207, 3690, 179953, 3614, 115713, 3213, 107338, 6216, 5455, 2986, 828, 1107, 32914, 2059, 869, 5512, 140850, 31804, 99636, 4787, 26375, 1213, 6192, 3053, 8644, 140289, 185473, 180, 3639, 4009, 102716, 3406, 136016, 2145, 6688, 902, 74668, 3355, 149011, 5489, 4115, 257, 2815, 70361, 1151, 1964, 40966, 2289, 2068, 1225, 33162, 337, 132454, 3684, 4954, 3524, 33138, 4350, 8462, 2698, 44399, 55272, 77191, 5127, 27773, 3640, 3908, 2275, 74851, 95147, 88267, 5498, 355, 4704, 2892, 2065, 4927, 41573, 46850, 44195, 3597, 51088, 7225, 595, 93242, 3451, 2947, 3067, 979, 132, 27865, 481, 139717, 12, 130452, 4504, 2295, 2988, 122916, 4055, 3196, 5896, 7835, 6390, 87960, 69436, 31702, 4031, 31909, 31553, 128512, 1411, 813, 1760, 6948, 42556, 152079, 151315, 4483, 586, 1783, 27685, 3797, 6245, 2716, 8331, 3062, 2502, 3551, 3608, 5468, 122892, 4294, 93988, 141816, 3687, 56339, 1996, 26265, 92391, 56801, 5543, 3040, 2677, 5513, 5346, 3060, 116817, 4745, 423, 3839, 4008, 53956, 173209, 44613, 5074

[189381, 95165, 93988, 2000, 8025, 4745, 59604, 2170, 145, 82167, 157110, 6424, 135861, 7988, 3060, 4789, 1033, 2935, 67295, 3341, 2180, 27416, 2275, 3926, 595, 113453, 71899, 2574, 8366, 4630, 3053, 3640, 98607, 48161, 133545, 160848, 1107, 32460, 114818, 4792, 6342, 5513, 161830, 285, 59336, 64010, 627, 32799, 6216, 2059, 1414, 4663, 74851, 55729, 902, 79677, 6884, 6285, 152970, 53000, 279, 1151, 117533, 3711, 27618, 2286, 4785, 475, 228, 6390, 2970, 69640, 757, 343, 100277, 135137, 26726, 3394, 34520, 53129, 50, 4180, 93982, 3567, 26375, 2716, 152079, 5502, 4732, 6654, 82767, 131578, 31952, 26590, 1365, 146662, 1021, 2311, 132454, 190207, 3134, 136016, 113159, 3643, 134334, 7083, 4987, 49274, 27685, 2145, 232, 2357, 165103, 7832, 6881, 2690, 168252, 48262, 1500, 2071, 179953, 8636, 5214, 3432, 51884, 27134, 185473, 4573, 2818, 156675, 3667, 4773, 175387, 67618, 5489, 126548, 71619, 31223, 2671, 3882, 405, 158398, 5625, 5205, 66198, 147384, 25752, 5962, 89898, 1704, 1213, 4552, 31083

[31804, 84952, 1955, 2517, 89898, 71304, 78103, 3006, 55729, 5962, 1496, 33124, 145, 5504, 81782, 460, 4954, 4009, 4573, 1411, 2088, 6994, 99114, 8894, 8987, 132488, 64575, 379, 930, 790, 4889, 71619, 126088, 2725, 606, 3089, 25752, 5562, 4552, 174551, 104141, 2903, 65738, 6654, 130452, 151311, 7192, 3060, 6223, 4630, 140850, 143365, 2824, 5797, 7932, 103606, 37830, 2100, 1799, 3134, 95165, 44761, 2671, 8861, 8132, 8581, 1465, 6041, 6581, 7646, 67799, 2989, 3516, 1346, 27155, 170, 27036, 175485, 355, 3667, 50851, 3994, 87869, 4, 93510, 327, 6644, 164540, 4239, 7438, 208, 97230, 4715, 1963, 134021, 109723, 2212, 4002, 1107, 595, 4967, 4798, 27704, 266, 4782, 3286, 93988, 190207, 5464, 32914, 53956, 3668, 27731, 3714, 2370, 173209, 757, 1891, 7319, 4081, 207, 2110, 4055, 3017, 3771, 6957, 2350, 5127, 71205, 94494, 2071, 53138, 85565, 1021, 3021, 98607, 7116, 149011, 5468, 93242, 481, 3013, 47518, 3468, 27563, 2313, 117133, 6808, 3536, 2739, 26524, 80586, 141816, 477, 4522, 27772, 59018, 

[4504, 4255, 44399, 2878, 48262, 5826, 4889, 280, 3557, 2736, 4081, 41573, 6927, 2534, 1107, 65130, 6245, 190213, 2116, 2275, 84847, 3597, 5668, 112552, 33124, 2698, 95313, 163925, 2052, 6033, 6957, 2921, 70565, 1185, 130840, 2123, 5127, 50440, 3690, 114707, 107846, 7150, 171749, 4663, 2396, 69844, 4630, 4009, 4052, 56775, 180031, 139747, 5710, 71304, 2988, 952, 41566, 3327, 98160, 5562, 5237, 147286, 47774, 64575, 4204, 757, 6654, 122892, 780, 65037, 34534, 30822, 31952, 212, 91842, 3751, 2682, 64501, 3165, 5974, 2065, 7319, 95004, 6669, 74, 5607, 5346, 3786, 150993, 53127, 2517, 5093, 100277, 33896, 32294, 136016, 6751, 7311, 66198, 7482, 6390, 33162, 100527, 112556, 5954, 1760, 106144, 7646, 5839, 6616, 8331, 2989, 40819, 3359, 7438, 152079, 116718, 59018, 4787, 55444, 6881, 842, 99, 2100, 636, 3756, 274, 42761, 145, 74275, 164540, 1018, 3159, 4483, 147384, 6581, 5040, 114762, 6688, 88267, 3768, 266, 6107, 159976, 3628, 2135, 143472, 4896, 53972, 142509, 176371, 131578, 79677, 585, 

[7714, 1980, 172577, 176371, 3432, 33779, 146028, 57368, 266, 56775, 147282, 1417, 2935, 3089, 2502, 81819, 70697, 26704, 6837, 6644, 122916, 50, 37830, 2921, 8656, 1107, 93988, 2790, 148592, 994, 5041, 1760, 31952, 257, 39292, 1233, 12, 6390, 875, 61406, 51884, 117529, 103606, 31702, 60363, 2304, 6306, 3711, 112818, 92008, 66783, 3355, 56339, 145935, 3668, 8238, 101025, 4052, 5219, 44195, 4896, 5346, 170, 150254, 1322, 119145, 106072, 3196, 147384, 788, 139717, 3957, 112552, 61628, 44613, 3597, 4009, 2373, 33421, 85394, 70305, 4488, 7019, 97643, 27773, 116138, 7282, 3062, 67799, 6537, 4008, 4139, 5969, 150993, 91842, 5896, 3345, 43460, 33162, 4204, 156706, 4395, 64501, 95510, 2857, 4798, 1752, 136016, 2986, 5468, 1465, 68536, 1172, 140110, 2145, 189381, 85565, 6100, 2071, 99636, 27704, 1185, 33677, 4006, 131656, 3799, 30822, 6611, 97230, 41617, 85316, 121372, 2972, 27563, 4900, 26631, 53447, 7832, 126548, 668, 7071, 80489, 4785, 82684, 481, 7040, 2574, 3823, 7064, 27416, 160848, 98607

[3768, 3468, 41627, 27685, 1760, 4796, 636, 405, 7040, 112556, 6538, 779, 49132, 3042, 5212, 3598, 207, 54768, 73015, 135137, 31867, 1489, 148592, 6039, 3652, 3756, 53127, 158398, 26631, 97230, 119145, 4616, 8894, 6713, 3302, 4787, 5799, 813, 47423, 104141, 3394, 59604, 89087, 97643, 3451, 7833, 266, 25959, 60408, 33421, 2964, 3286, 4464, 1244, 7064, 4663, 6041, 3341, 1704, 2886, 3000, 106144, 26357, 147282, 6581, 3986, 2514, 172577, 109723, 6616, 85565, 66783, 91842, 2690, 89678, 3067, 6306, 140301, 51088, 2739, 3040, 8581, 5607, 87869, 65037, 4794, 156706, 5538, 2295, 146028, 4055, 3705, 2671, 27731, 858, 3551, 6100, 2699, 95873, 106072, 55250, 126548, 2661, 2100, 5962, 1322, 171, 94494, 4002, 4074, 5612, 175387, 135861, 98239, 5668, 31260, 95165, 132796, 6808, 27480, 32294, 183301, 2000, 34153, 3639, 56801, 3799, 116138, 107338, 40819, 3479, 355, 4978, 34520, 2682, 4846, 6881, 6502, 95313, 7482, 85316, 31909, 134021, 3017, 7360, 118270, 1233, 6141, 49274, 80, 3536, 37830, 74275, 549

[4, 3687, 7757, 88744, 41566, 266, 67734, 4987, 69436, 170, 66090, 3330, 84952, 3165, 3797, 656, 3211, 7938, 101025, 3473, 2590, 27685, 97230, 193, 54910, 2983, 93988, 2068, 25959, 4204, 47382, 2972, 114707, 257, 7179, 3719, 93831, 5384, 5568, 152105, 145935, 6033, 160289, 48982, 2739, 173941, 64501, 111146, 74075, 3986, 2170, 4294, 6957, 4978, 147282, 32294, 114818, 93982, 4505, 95313, 2163, 37830, 7835, 2071, 33138, 143365, 5456, 142509, 43460, 41617, 27773, 25752, 91842, 3524, 4900, 26237, 7714, 1914, 47423, 5040, 53000, 4115, 4522, 3189, 181719, 130452, 53127, 26265, 585, 228, 140289, 156781, 171701, 5839, 4789, 6538, 26242, 69606, 168252, 2110, 50440, 2574, 6245, 8581, 6185, 70361, 5625, 670, 139855, 4616, 170401, 146730, 108795, 3196, 4349, 2314, 190207, 2502, 7319, 5455, 6722, 68205, 36931, 95873, 6041, 435, 71304, 2790, 930, 89386, 1449, 179813, 6348, 2046, 355, 3926, 49132, 68358, 119145, 7646, 8894, 7282, 175485, 3981, 1233, 26524, 114184, 2065, 1937, 6537, 112897, 3516, 4052

[7932, 33421, 280, 171749, 145418, 6713, 627, 4052, 152105, 61628, 2373, 57368, 27772, 182749, 112818, 2952, 2360, 78, 172577, 2043, 27822, 940, 668, 55272, 4890, 81782, 71341, 70305, 3507, 8894, 132, 71899, 173941, 3264, 5502, 7071, 6305, 1980, 5745, 5888, 3882, 7064, 1223, 142509, 3342, 74851, 133780, 1432, 42761, 26554, 31223, 7319, 4616, 2525, 543, 5269, 2311, 33154, 157110, 5205, 100527, 82167, 32666, 813, 4294, 114184, 7714, 56339, 1346, 5543, 40966, 3899, 6973, 26622, 159061, 3797, 88267, 1213, 1976, 27480, 355, 2314, 25962, 42191, 790, 5464, 1937, 930, 837, 6837, 189381, 66783, 3189, 5710, 140110, 3635, 143472, 47382, 875, 8908, 47774, 3557, 93982, 140850, 4794, 3789, 179813, 5673, 53972, 4629, 74668, 3751, 150993, 1233, 107846, 8366, 656, 2964, 6881, 165103, 46335, 5512, 4259, 65130, 3932, 3000, 122916, 27036, 183301, 2082, 279, 3667, 2350, 3912, 2690, 3994, 6493, 8861, 2396, 26313, 1893, 2059, 51884, 141890, 6688, 99437, 43912, 76738, 97643, 67799, 1996, 5214, 7150, 2574, 194

[2947, 46347, 4785, 1799, 1213, 780, 3557, 5962, 4156, 82684, 4483, 4558, 7835, 3986, 4504, 595, 60471, 2952, 27826, 7991, 145, 5103, 2078, 59018, 4846, 171, 6245, 5788, 2100, 8798, 98607, 3341, 3473, 47774, 172577, 7064, 3667, 47099, 2286, 8656, 117529, 346, 304, 57368, 152079, 26590, 5710, 1432, 132454, 31083, 4294, 46335, 7832, 228, 140110, 176371, 4927, 3060, 3524, 7411, 5974, 1381, 2517, 3714, 3846, 6722, 423, 3687, 2935, 117133, 96964, 143472, 7831, 467, 4798, 5214, 280, 71867, 144352, 114818, 5093, 2857, 32666, 1244, 4876, 5673, 149566, 47810, 27704, 104837, 118270, 6185, 37741, 55444, 112556, 134184, 88744, 132488, 6509, 146028, 2145, 6285, 1012, 48738, 89898, 5636, 2010, 84952, 6192, 3286, 3327, 405, 26622, 27773, 5538, 95147, 4987, 2521, 119218, 2699, 7083, 26726, 3189, 152105, 145935, 2574, 2988, 40819, 67734, 481, 119145, 6616, 3406, 7482, 61406, 2052, 100527, 190207, 41566, 27644, 143525, 1976, 2180, 1223, 48342, 3628, 1411, 62437, 49132, 48262, 952, 5155, 1893, 3643, 3342

[1185, 3957, 6305, 27618, 6616, 36931, 27416, 6808, 1225, 4002, 304, 4978, 4255, 174551, 31702, 7646, 53129, 190, 7991, 92391, 31553, 95510, 6493, 53000, 64501, 8798, 4863, 7482, 3797, 3567, 3479, 6537, 4042, 3768, 3926, 7019, 46578, 149011, 2190, 46347, 5969, 1996, 70305, 89678, 4117, 31083, 3912, 126142, 59604, 53127, 27744, 6549, 2311, 2903, 81782, 1184, 5498, 32460, 147286, 7083, 2692, 104141, 156675, 2078, 151763, 3497, 183301, 4074, 31921, 146730, 3714, 2304, 126088, 346, 5181, 5269, 56389, 2010, 2286, 104837, 4732, 31909, 2463, 63276, 73323, 91842, 2108, 3067, 813, 180031, 7256, 3213, 50601, 2314, 47518, 49132, 1322, 2824, 99145, 156553, 2699, 56339, 7832, 8581, 2964, 66915, 3652, 160978, 106144, 2116, 3608, 111146, 2677, 138208, 3771, 4052, 1439, 145, 5826, 869, 3219, 8636, 56801, 32657, 110746, 6342, 52579, 6611, 8958, 26357, 3668, 5346, 79428, 26242, 39715, 68886, 2860, 435, 42728, 69606, 3705, 79677, 4349, 5103, 53447, 5450, 65130, 74685, 4504, 3017, 636, 3196, 5288, 164655,

[5990, 136359, 95004, 3013, 101360, 53956, 91842, 4006, 48982, 1914, 7835, 2295, 5543, 6301, 156553, 228, 82767, 168846, 66198, 100527, 7988, 93510, 74075, 81819, 7040, 33421, 6193, 8014, 5797, 4008, 4052, 2713, 88672, 6669, 48774, 46850, 117533, 7438, 2986, 132454, 4081, 1417, 135137, 3801, 138208, 2275, 7757, 100390, 5458, 46347, 3507, 6305, 133780, 42761, 96964, 6884, 147282, 8405, 146684, 2123, 6537, 2857, 49132, 1964, 257, 3643, 64114, 37444, 1955, 142509, 144352, 5464, 157110, 26704, 3640, 7319, 3846, 3789, 36931, 809, 47810, 31737, 7646, 7179, 5888, 3396, 179953, 53138, 50189, 139855, 4953, 31952, 337, 1233, 71205, 53127, 2000, 3591, 73015, 102716, 185473, 55555, 66915, 313, 26622, 1203, 3957, 190, 2534, 6187, 143365, 606, 94494, 70451, 87444, 586, 48161, 32666, 232, 1489, 2463, 31921, 164540, 4773, 184987, 875, 595, 2504, 670, 790, 56801, 132, 2065, 78, 95165, 837, 3211, 8340, 7938, 175485, 2594, 947, 27731, 1500, 33896, 2068, 74685, 3786, 4135, 26590, 131578, 5615, 152079, 506

[4008, 79428, 3756, 902, 26590, 50440, 6185, 41573, 113159, 1976, 2517, 27036, 2682, 6301, 2964, 7064, 8132, 5663, 33162, 6502, 6039, 103606, 25962, 3667, 1411, 4397, 3957, 4074, 8405, 4, 99437, 53972, 472, 6927, 2065, 49274, 4558, 70305, 2574, 2594, 4117, 131656, 355, 2313, 2301, 228, 89087, 5212, 4927, 132488, 2275, 190213, 56801, 668, 92008, 51884, 92046, 168846, 959, 188, 3342, 160848, 5797, 6305, 4204, 4483, 125916, 65130, 3067, 2357, 3994, 156781, 1942, 2903, 65037, 6537, 3687, 2932, 126548, 5498, 87869, 87444, 4987, 5458, 2824, 5607, 26704, 6884, 994, 52579, 8743, 47518, 50, 27563, 26622, 59018, 5364, 2690, 104837, 104141, 95, 3000, 139717, 7192, 1449, 30822, 32460, 6187, 60471, 132362, 2370, 69606, 26133, 71867, 7438, 1489, 55729, 4794, 59220, 1414, 6193, 4002, 27826, 27773, 8894, 4616, 7345, 145418, 4889, 435, 5468, 6957, 5636, 1752, 6644, 4789, 627, 6306, 42191, 1012, 115713, 467, 4239, 26524, 42728, 122916, 543, 159061, 89386, 3042, 2970, 3668, 2262, 5990, 7319, 147282, 2770

[2886, 4509, 8908, 1777, 55729, 44761, 6493, 3932, 42556, 2082, 146684, 72701, 140289, 3507, 124404, 92046, 171701, 31702, 189381, 112370, 1955, 207, 5745, 65738, 4156, 8656, 2123, 8753, 5231, 2043, 26133, 7225, 2006, 3406, 5512, 423, 106072, 26237, 34520, 6141, 8132, 788, 59220, 93510, 55250, 56801, 3608, 114818, 89087, 2921, 1489, 112897, 58103, 164655, 2690, 467, 78, 2824, 3289, 2661, 280, 71619, 27644, 6837, 64010, 7071, 135137, 3797, 8644, 150254, 757, 68650, 184987, 57368, 88744, 52579, 114028, 46347, 426, 1172, 133780, 7988, 8894, 116817, 2350, 26313, 7256, 44613, 4978, 2163, 3567, 68205, 2594, 1891, 5364, 112556, 6616, 3396, 6285, 3302, 1976, 3013, 6033, 4255, 8014, 3684, 87869, 3473, 2088, 71732, 979, 3068, 813, 2170, 6581, 4180, 172577, 6688, 149566, 2360, 95135, 74075, 5205, 3652, 83349, 152970, 7150, 2314, 188, 3908, 60684, 61406, 2068, 2932, 5455, 95165, 26590, 84952, 2262, 5636, 2420, 5607, 285, 53956, 750, 4927, 4782, 6884, 3912, 4139, 50440, 327, 156706, 27685, 2190, 30

[2725, 4953, 3711, 8025, 81819, 3219, 2857, 3608, 31702, 6688, 132454, 5039, 4483, 140850, 6957, 4785, 2504, 5212, 3557, 1040, 74275, 3751, 3053, 179953, 4488, 4156, 32294, 426, 52579, 48262, 2690, 101025, 67799, 43460, 2212, 151315, 1496, 33896, 65738, 175485, 44195, 7179, 1996, 5041, 25752, 83480, 1707, 116817, 3359, 6390, 6557, 3286, 104837, 31309, 60408, 2878, 5585, 3327, 2082, 2100, 4081, 31367, 160978, 32657, 2932, 6185, 139747, 141890, 170401, 3932, 47810, 2682, 48738, 8656, 156675, 280, 2163, 44399, 5962, 2059, 33162, 53127, 3786, 164655, 2952, 27480, 55555, 5668, 78103, 64501, 54910, 40819, 33154, 2313, 93242, 2311, 88267, 3536, 5538, 79428, 175431, 56775, 8366, 8238, 73323, 67618, 6549, 4715, 41566, 2190, 5450, 116138, 2396, 2088, 60471, 3768, 7835, 6301, 101360, 173209, 70946, 7282, 183301, 6881, 69436, 337, 7019, 122892, 1172, 3313, 6424, 2713, 837, 69844, 115713, 6722, 7225, 4115, 145418, 50, 164540, 140301, 2886, 26265, 173941, 95313, 93988, 586, 68536, 49132, 2373, 5103,

[41617, 87444, 171749, 80124, 3219, 7282, 2514, 6945, 53972, 100527, 6713, 70361, 6245, 136359, 2314, 3557, 3313, 33779, 107338, 50356, 4139, 138208, 71304, 105250, 1185, 140289, 5954, 104760, 3507, 189381, 152079, 8331, 2737, 219, 4552, 279, 3213, 147282, 1783, 81782, 4509, 5364, 99, 98122, 96726, 4031, 3614, 3006, 2504, 60684, 6669, 6644, 8644, 51884, 280, 27036, 6187, 64501, 131578, 2671, 31223, 59604, 146730, 207, 57368, 70946, 668, 87869, 3786, 6502, 55444, 173209, 66090, 4395, 8238, 1439, 2065, 104837, 1033, 143525, 1996, 7757, 460, 95873, 152105, 26590, 104218, 37444, 102716, 77191, 5562, 70545, 2690, 59220, 1465, 2350, 289, 188, 54910, 32799, 6301, 1225, 7438, 5607, 5673, 81819, 55729, 2370, 25959, 3067, 6424, 5568, 4890, 5633, 152970, 6755, 543, 3957, 6223, 3756, 3635, 159161, 6305, 426, 27685, 56339, 2790, 74275, 33124, 132362, 89898, 168252, 2725, 27416, 7040, 55250, 66783, 8405, 159061, 47518, 6751, 3705, 4294, 6688, 1704, 7225, 2357, 232, 6100, 7451, 4987, 55555, 212, 1171

[5562, 7451, 5788, 69640, 99437, 164540, 88267, 173209, 147286, 33896, 126142, 3912, 4139, 4552, 50356, 4789, 31553, 130840, 64114, 89386, 6245, 627, 44397, 87869, 5585, 4616, 71619, 3614, 3771, 5041, 112818, 80586, 979, 3507, 4863, 6502, 930, 1489, 65130, 47382, 6041, 2989, 6755, 1417, 147282, 135137, 32666, 6713, 55444, 108795, 4164, 135861, 4395, 8894, 47774, 3640, 6881, 3926, 5489, 42761, 2682, 5040, 31367, 26622, 70697, 112421, 636, 7150, 114184, 3068, 2521, 5093, 6751, 40966, 26242, 93982, 49274, 99145, 5456, 7019, 2818, 48982, 85316, 50601, 82684, 54768, 1955, 477, 34153, 175485, 46347, 4204, 32460, 4896, 2212, 750, 8340, 828, 110553, 126548, 2534, 842, 66783, 1777, 112552, 1704, 27416, 8506, 55555, 147384, 145935, 33162, 5974, 5710, 156675, 55272, 7646, 3932, 257, 6654, 1033, 3635, 71899, 1465, 55729, 91353, 132454, 4509, 2313, 95135, 5607, 73804, 3213, 6994, 67799, 266, 3189, 31867, 6549, 2815, 44399, 27772, 5752, 67734, 2737, 80489, 68650, 4488, 136359, 2892, 4987, 138208, 64

[62437, 6216, 1417, 3823, 140301, 133780, 1040, 89386, 1914, 47382, 145, 7451, 337, 2212, 159441, 26357, 46578, 4135, 69640, 180031, 175387, 121372, 126548, 143365, 130452, 110553, 34520, 3327, 2145, 8331, 173209, 68886, 128914, 757, 2699, 31867, 26590, 3981, 63808, 3302, 1760, 5538, 6973, 60684, 48342, 5450, 64114, 5093, 3341, 5364, 51088, 142509, 78, 66090, 542, 4732, 2514, 3564, 1891, 7150, 3159, 114184, 2818, 6193, 136359, 2116, 3768, 285, 5733, 5585, 257, 112818, 42556, 190213, 27036, 7951, 4663, 93242, 48161, 95165, 4204, 2311, 146730, 115713, 2000, 3756, 4009, 140110, 165103, 2713, 156675, 3219, 902, 98239, 1955, 3342, 3671, 1012, 56775, 160978, 4505, 4987, 27731, 2180, 84952, 3042, 1942, 5612, 87960, 47774, 66783, 93831, 26313, 6502, 64575, 48774, 6305, 7646, 69746, 114707, 6306, 1151, 3908, 3690, 91353, 32666, 143525, 26631, 33154, 4042, 101360, 5455, 4947, 1996, 6348, 70565, 139747, 2190, 4, 4846, 67799, 40966, 95135, 126088, 3957, 6581, 70946, 3423, 54910, 4927, 42728, 11853

KeyboardInterrupt: 

**Task 7\*.** Implement an SVRRecommender - one-hot encode genres and fit an SVR model to 

(genre_1, genre_2, ..., genre_N) -> rating

Tune params of the SVR model to obtain as good results as you can. 

To do tuning properly (although in practive people are often happy with leave-one-out and do not bother with dividing the set into training, validation and test sets):
    - divide the set into training, validation and test sets (randomly divide the dataset in proportions 60%-20%-20%),
    - train the model with different sets of tunable parameters on the training set, 
    - choose the best tunable params based on results on the validation set, 
    - provide the final evaluation metrics on the test set for the best model obtained during tuning.

Recommended method of tuning: use hyperopt. Install the package using the following command: `pip install hyperopt`
    
Print the RMSE and MAE on the test set generated with numpy with seed 6789.

In [20]:
from sklearn.svm import SVR
from sklearn.preprocessing import MultiLabelBinarizer

class SVRRecommender(object):
    
    def __init__(self):
        self.model = None
        self.mlb = None
    
    def fit(self, interactions_df, users_df, items_df): 
        print('in fit')
        interactions_df = pd.merge(interactions_df, items_df, on='item_id')
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.replace("-", "_", regex=False)
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.replace(" ", "_", regex=False)
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.lower()
        interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.split("|")
        
        self.mlb = MultiLabelBinarizer()
        interactions_df = interactions_df.join(
            pd.DataFrame(self.mlb.fit_transform(interactions_df.pop('genres')),
                         columns=self.mlb.classes_,
                         index=interactions_df.index))
        
        x = interactions_df.loc[:, self.mlb.classes_].values
        y = interactions_df['rating'].values
    
        self.model = SVR(kernel='rbf', C=1.0, epsilon=0.1, gamma='scale').fit(x, y)
        print('end fit')
    
    def recommend(self, users_df, items_df, n_recommendations=1):
        
        items_df = items_df.copy()
        items_df.loc[:, 'genres'] = items_df['genres'].str.replace("-", "_", regex=False)
        items_df.loc[:, 'genres'] = items_df['genres'].str.replace(" ", "_", regex=False)
        items_df.loc[:, 'genres'] = items_df['genres'].str.lower()
        items_df.loc[:, 'genres'] = items_df['genres'].str.split("|")
        
        items_df = items_df.join(
            pd.DataFrame(self.mlb.transform(items_df.pop('genres')),
                         columns=self.mlb.classes_,
                         index=items_df.index))
    
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        
        for ix, user in users_df.iterrows():
            score = self.model.predict(items_df.loc[:, self.mlb.classes_].values)[0]
                
            user_recommendations = pd.DataFrame({'user_id': [user['user_id']],
                                                 'item_id': items_df.iloc[0]['item_id'],
                                                 'score': score})

            recommendations = pd.concat([recommendations, user_recommendations])

        return recommendations

In [None]:
svr_recommender = SVRRecommender()

results = [['SVRRecommender'] + list(evaluate_train_test_split_explicit(
    svr_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df, seed=6789))]

results = pd.DataFrame(results, 
                       columns=['Recommender', 'RMSE', 'MRE'])

display(HTML(results.to_html()))