# Top popular
This recomender uses the top popular strategy.

### Libraries and seed

In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy.sparse as sps
import matplotlib.pyplot as pyplot

from sklearn.model_selection import train_test_split

# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

### Load data

In [2]:
item_type = pd.read_csv('data/data_ICM_type.csv', usecols = ['item_id', 'feature_id'], dtype={0:int, 1:int})
item_length = pd.read_csv('data/data_ICM_length.csv', usecols = ['item_id', 'data'], dtype={0:int, 1:int})
interactions = pd.read_csv('data/interactions_and_impressions.csv', dtype={0:int, 1:int, 2:str, 3:int})
interactions = interactions.rename(columns={'UserID': 'user_id', 'ItemID': 'item_id', 'Data': 'data', 'Impressions': 'impressions'})

In [3]:
interactions

Unnamed: 0,user_id,item_id,impressions,data
0,0,11,012345678910111213141516171819,1
1,0,21,,0
2,0,21,,0
3,0,21,20212223242526272829,0
4,0,21,,1
...,...,...,...,...
5826501,41628,20448,,0
5826502,41628,20896,,1
5826503,41628,21506,,1
5826504,41628,22882,,0


### Preprocessing

In [4]:
views = interactions[interactions["data"] == 0].drop(['data', 'impressions'], axis=1)
views.head()

Unnamed: 0,user_id,item_id
1,0,21
2,0,21
3,0,21
37,0,124
42,0,808


In [5]:
views = views.drop_duplicates()
views.head()

Unnamed: 0,user_id,item_id
1,0,21
37,0,124
42,0,808
44,0,1326
49,0,1995


In [6]:
views_matrix = sps.coo_matrix((np.ones(len(views)), (views["user_id"].values, views["item_id"].values)))
# views_matrix = views_matrix.tocsr()
views_matrix

<41629x24507 sparse matrix of type '<class 'numpy.float64'>'
	with 1051828 stored elements in COOrdinate format>

In [7]:
item_popularity = np.sort(np.ediff1d(views_matrix.tocsc().indptr))
item_popularity

array([   3,    4,    5, ..., 4676, 5366, 7382], dtype=int32)

In [8]:
train_test_split = 0.80

train_mask = np.random.choice([True,False], views_matrix.nnz, p=[train_test_split, 1-train_test_split])
test_mask = np.logical_not(train_mask)

views_train = sps.csr_matrix((views_matrix.data[train_mask], (views_matrix.row[train_mask], views_matrix.col[train_mask])))
views_test = sps.csr_matrix((views_matrix.data[test_mask], (views_matrix.row[test_mask], views_matrix.col[test_mask])))

### Recomender

In [9]:
class TopPopRecommender(object):
    def __init__(self, URM_train):
        self.URM_train = URM_train

    def fit(self):
        item_popularity = np.ediff1d(self.URM_train.tocsc().indptr)
        self.popular_items = np.argsort(item_popularity)
        self.popular_items = np.flip(self.popular_items, axis = 0)
    
    
    def recommend(self, user_id, at=10):
        seen_items = self.URM_train.indices[self.URM_train.indptr[user_id]:self.URM_train.indptr[user_id+1]]
        unseen_items_mask = np.in1d(self.popular_items, seen_items, assume_unique=True, invert = True)
        unseen_items = self.popular_items[unseen_items_mask]
        recommended_items = unseen_items[0:at]
            
        return recommended_items


top_pop = TopPopRecommender(views_train)
top_pop.fit()

for user_id in range(10):
    print(top_pop.recommend(user_id, at=5))

[22 56 20 25 23]
[21 22 56 20 25]
[21 22 56 25 23]
[ 21  20  25  23 450]
[21 22 56 20 23]
[21 22 20 25 23]
[21 22 56 20 25]
[21 22 56 20 23]
[21 22 56 20 25]
[ 21  56  25  23 450]


### Evaluation

### Retrain and submission

In [11]:
final_recommender = TopPopRecommender(views)
final_recommender.fit()

AttributeError: 'DataFrame' object has no attribute 'tocsc'

In [None]:
from utils import create_submission

create_submission(final_recommender)