In [None]:
# default_exp models.itempop

# ItemPop
> Implementation of item popularity model.

In [None]:
#hide
from nbdev.showdoc import *
from fastcore.nb_imports import *
from fastcore.test import *

In [None]:
#export
import pandas as pd
from itertools import islice, cycle
import operator

## ItemPop

In [None]:
#export
class ItemPop():
    """
    Makes recommendations based on popular items
    """

    def __init__(self, max_K=10, days=30, item_column='item_id', user_column='user_id', dt_column='date', with_filter=False):
        self.max_K = max_K if not with_filter else 300
        self.days = days
        self.item_column = item_column
        self.user_column = user_column
        self.dt_column = dt_column
        self.recommendations = []

    def fit(self, df, ):
        min_date = df[self.dt_column].max().normalize() - pd.DateOffset(days=self.days)
        self.recommendations = df.loc[df[self.dt_column] > min_date,
                                      self.item_column].value_counts().head(self.max_K).index.values

    def recommend(self, users=None, N=10):
        recs = self.recommendations[:N]
        if users is None:
            return recs
        else:
            return list(islice(cycle([recs]), len(users)))

    def recommend_with_filter(self, train, user_ids, top_K=10):
        user_ids = pd.Series(user_ids)
        _users = user_ids[user_ids.isin(train[self.user_column])]
        new_users = user_ids[~user_ids.isin(_users)]
        full_recs = self.recommendations
        topk_recs = full_recs[:top_K]
        new_recs = pd.DataFrame({self.user_column: new_users})
        new_recs[self.item_column] = list(islice(cycle([topk_recs]), len(new_users)))
        _recs = pd.DataFrame({self.user_column: _users})
        _recs[self.item_column] = 0
        known_items = train.groupby(self.user_column)[self.item_column].apply(list).to_dict()
        _recs['additional_N'] = _recs[self.user_column].apply(
            lambda user_id: len(known_items[user_id]) if user_id in known_items else 0)
        _recs['total_N'] = _recs['additional_N'].apply(
            lambda add_N: add_N + top_K if add_N + top_K < len(full_recs) else len(full_recs))
        _recs['total_recs'] = _recs['total_N'].apply(
            lambda total_N: full_recs[:total_N])
        filter_func = lambda row: [item for item in row['total_recs']
                                   if item not in known_items[row[self.user_column]]][:top_K]
        _recs[self.item_column] = _recs.loc[:, ['total_recs', self.user_column]].apply(
            filter_func, axis=1)
        _recs = _recs[[self.user_column, self.item_column]]
        return pd.concat([new_recs, _recs], axis=0)

Example

In [None]:
# Making predictions for cold users with Popular Recommender
idx_for_popular = list(set(pd.Series(random_items).unique()).difference(
    set(boost_recs['user_id'].unique())))
idx_for_popular

[20000, 367658, 203823, 10000, 737202]

In [None]:
interactions_df

Unnamed: 0,user_id,item_id,last_watch_dt,total_dur,watched_pct
0,917575,10353,2021-03-13,11131,58
1060,275080,15574,2021-03-13,670,11
1059,120517,9550,2021-03-13,32456,100
1058,15045,6115,2021-03-13,22830,100
1057,92904,10135,2021-03-13,3709,71
...,...,...,...,...,...
542914,484870,9157,2021-08-22,9435,6
542913,8428,5732,2021-08-22,6570,100
542912,818134,11505,2021-08-22,60,0
542923,314358,14111,2021-08-22,2590,35


In [None]:
pop_model = ItemPop(days=30, dt_column='last_watch_dt',
                               with_filter=True)
pop_model.fit(interactions_df)

In [None]:
recs_popular = pop_model.recommend_with_filter(interactions_df, idx_for_popular, top_K=10)
recs_popular

Unnamed: 0,user_id,item_id
3,10000,"[10440, 9728, 15297, 13865, 3734, 12192, 4151,..."
0,20000,"[10440, 9728, 15297, 13865, 3734, 12192, 4151,..."
1,367658,"[10440, 9728, 15297, 13865, 3734, 12192, 4151,..."
2,203823,"[10440, 9728, 15297, 13865, 3734, 12192, 4151,..."
4,737202,"[9728, 15297, 13865, 3734, 12192, 4151, 11863,..."


## ItemPop_v2

In [None]:
#export
class ItemPop_v2:
    """
    ItemPop_v2 simply recommends items ordered by their popularity 
    in the training set.
    """
    def __init__(self):
        super().__init__()

    def fit(self, train_data, seq_col='sequence'):
        sequences = train_data[seq_col].values

        count_dict = {}
        for s in sequences:
            for item in s:
                if item not in count_dict:
                    count_dict[item] = 1
                else:
                    count_dict[item] += 1

        self.top = sorted(count_dict.items(), key=operator.itemgetter(1), reverse=True)
        self.top = [([x[0]], x[1]) for x in self.top]

    def recommend(self, user_profile, user_id=None):
        """
        Given the user profile return a list of recommendation
        :param user_profile: the user profile as a list of item identifiers
        :param user_id: (optional) the user id
        :return: list of recommendations e.g. [([2], 0.875), ([6], 1.0)]
        """
        return self.top

    def get_popular_list(self):
        return self.top

    @staticmethod
    def get_recommendation_list(recommendation):
        return list(map(lambda x: x[0], recommendation))

    @staticmethod
    def get_recommendation_confidence_list(recommendation):
        return list(map(lambda x: x[1], recommendation))

> **References**
> - https://github.com/blondered/ods_MTS_RecSys_Challenge_solution