In [3]:
%cd -q data/actr_reco

In [2]:
import pandas as pd
import datetime
import numpy as np

In [3]:
data = [
    ["user1", "song1", datetime.datetime(2000, 1, 1, 0)],
    ["user1", "song1", datetime.datetime(2000, 1, 1, 0)],
    ["user1", "song2", datetime.datetime(2000, 1, 1, 1)],
    ["user1", "song2", datetime.datetime(2000, 1, 1, 1)],
    ["user1", "song2", datetime.datetime(2000, 1, 1, 1)],
    ["user1", "song2", datetime.datetime(2000, 1, 1, 1)],
    ["user1", "song1", datetime.datetime(2000, 1, 1, 2)],
    ["user1", "song2", datetime.datetime(2000, 1, 1, 2)],
    ["user1", "song1", datetime.datetime(2000, 1, 1, 2)],
    
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
    ["user2", "song3", datetime.datetime(2000, 1, 1, 2)],
]

In [4]:
events = pd.DataFrame(data, columns=["user", "item", "timestamp"]).set_index("user")

In [5]:
events["v"] = np.random.rand(len(events))
events["a"] = np.random.rand(len(events))
events["d"] = np.random.rand(len(events))

In [6]:
events["reward"] = np.random.choice([-1] + 9*[1], len(events))

In [7]:
events["session"] = np.random.choice([1] + 2*[0], len(events)).cumsum()

In [8]:
events.head()

Unnamed: 0_level_0,item,timestamp,v,a,d,reward,session
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
user1,song1,2000-01-01 00:00:00,0.082969,0.094969,0.864947,1,0
user1,song1,2000-01-01 00:00:00,0.838377,0.792869,0.027937,1,0
user1,song2,2000-01-01 01:00:00,0.098741,0.133426,0.643574,1,1
user1,song2,2000-01-01 01:00:00,0.492087,0.361749,0.614685,1,1
user1,song2,2000-01-01 01:00:00,0.92853,0.509011,0.841318,1,1


In [9]:
example_user = "user1"
user_events = events.loc[example_user]

# Algorithms

In [10]:
%%writefile baseline_models.py
import numpy as np

class MostRecent:
    def __str__(self):
        return type(self).__name__
    
    def recommend_next(self, user_events):
        return user_events["item"].values[-1]
    
    def recommend(self, user_events, topn):
        return user_events["item"].iloc[::-1].unique().tolist()[:topn]

Writing baseline_models.py


In [None]:
%run baseline_models.py

mr = MostRecent()

In [None]:
# Next item prediction
assert mr.recommend_next(user_events) == user_events["item"].values[-1]
mr.recommend_next(user_events)

In [None]:
# TopN item predictions
assert mr.recommend(user_events, 3)[0] == mr.recommend_next(user_events)
mr.recommend(user_events, 3)

In [15]:
%%writefile transition_models.py
class UserBasedTransitionProbability:
    def __str__(self):
        return type(self).__name__
        
    def recommend_next(self, user_events):
        cur_item = user_events["item"].iloc[-1]
        events_on_cur_item = user_events[(user_events["item"] == cur_item).shift().fillna(False)]
        if not events_on_cur_item.empty:
            return events_on_cur_item["item"].mode().values[-1]
        else:
            # Return no recommendation
            return -1
        
    def recommend(self, user_events, topn):
        cur_item = user_events["item"].iloc[-1]
        events_on_cur_item = user_events[(user_events["item"] == cur_item).shift().fillna(False)]
        return events_on_cur_item["item"].value_counts().index.tolist()[:topn]

Writing transition_models.py


In [None]:
%run transition_models.py

ubtp = UserBasedTransitionProbability()

In [None]:
assert ubtp.recommend_next(user_events)
ubtp.recommend_next(user_events)

In [18]:
assert ubtp.recommend(user_events, 3)[0] == ubtp.recommend_next(user_events)
ubtp.recommend(user_events, 3)

(['song2', 'song1'], ['song2', 'song1'], ['song2'])

In [9]:
%%writefile emomem_model.py
import numpy as np
import pandas as pd
from scipy import stats, special
import operator

class DecayFitterMixin:
    def fit(self, events):
        delta = events.groupby(["user", "item"])["timestamp"].diff().dropna().dt.total_seconds() / 3600
        delta = delta[delta != 0]
        delta_bins = delta.value_counts()
        log_x = np.log10(delta_bins.index.tolist())
        log_y = np.log10(delta_bins.values.tolist())
        slope, intercept, r_value, p_value, std_err = stats.linregress(log_x, log_y)
        self.decay = -slope
        return slope
    
class ScoreToRecommenderMixin:
    """Requires a score(self, user_events) function."""
    def recommend_next(self, user_events):
        item_scores = self.score(user_events)
        return item_scores.idxmax()
    
    def recommend(self, user_events, topn):
        item_scores = self.score(user_events)
        return item_scores.nlargest(topn).index.tolist()
    
class BaseLevelComponent(ScoreToRecommenderMixin, DecayFitterMixin):
    """Models occurence."""
    def __init__(self, decay=0.5, time_col="timestamp"):
        self.decay = decay
        self.time_col = time_col
        
    def __str__(self):
        if self.decay == 0.5:
            return type(self).__name__
        else:
            return type(self).__name__ + str(self.decay)
        
    def score(self, user_events):
        user_events = user_events.copy()
        ts_ref = user_events["timestamp"].iloc[-1]
        
        user_events["ts_diff"] = (-(user_events[self.time_col] - ts_ref) + pd.Timedelta("1hour")).dt.total_seconds()/3600
        bll_scores = user_events.groupby("item", sort=False)["ts_diff"].apply(lambda x: np.sum(np.power(x.values, -self.decay)))
        return bll_scores
        
class AssociativeComponent(ScoreToRecommenderMixin):
    """Models co-occurence."""
    def __init__(self, session_col="session"):
        self.session_col = session_col
        
    def __str__(self):
        return type(self).__name__
        
    def score(self, user_events):
        context_item = user_events["item"].iloc[-1]
        context_sessions = set(user_events[user_events["item"] == context_item][self.session_col].unique())
        
        num_sessions = user_events[self.session_col].nunique()
        probability_of_item = user_events.groupby("item")[self.session_col].nunique() / num_sessions
        
        def overlap(sessions):
            return len(set(sessions.unique()).intersection(context_sessions))
        
        overlap_sessions = user_events.groupby("item")[self.session_col].apply(overlap)
        condidtional_probability = overlap_sessions/len(context_sessions)
        
        return condidtional_probability/probability_of_item
        
class PartialMatchingComponent(ScoreToRecommenderMixin):
    """Models similarity."""
    def __init__(self, name=None, feature_cols=None, similarity_function=np.dot):
        self.name = name if name else type(self).__name__
        self.feature_cols = feature_cols
        self.similarity_function = similarity_function
        
    def __str__(self):
        return self.name
        
    def score(self, user_events):
        context_features = user_events[self.feature_cols].iloc[-1]
        
        items = user_events.drop_duplicates(subset=["item"])
        item_index = items["item"].values
        cand_features = items[self.feature_cols].values
        
        pm_scores = self.similarity_function(cand_features, context_features)
        return pd.Series(data=pm_scores, index=item_index)
        
class ValuationComponent(ScoreToRecommenderMixin):
    """Models affect."""
    def __init__(self, name=None, learning_rate=0.2, initial_valuation=0, reward_col="reward"):
        self.name = name if name else type(self).__name__
        self.learning_rate = learning_rate
        self.initial_valuation = initial_valuation
        self.reward_col = reward_col
        
    def __str__(self):
        return self.name
        
    def score(self, user_events):
        def update_valuation(prev, reward=1, lr=0.05):
            return prev + lr * (reward - prev)
        
        def aggreagte_valuation(reward_s):
            valuation = self.initial_valuation
            for reward in reward_s.values:
                valuation = update_valuation(valuation, reward, self.learning_rate)
            return valuation
        
        valuation_scores = user_events.groupby("item")[self.reward_col].apply(aggreagte_valuation)
        return valuation_scores
    
class NoiseComponent(ScoreToRecommenderMixin):
    """Adds randomnes."""
    def __init__(self, seed=42):
        self.rng = np.random.default_rng(seed)
    
    def __str__(self):
        return type(self).__name__
    
    def score(self, user_events):
        return pd.Series(data=self.rng.random(user_events["item"].nunique()), index=user_events["item"].unique())
    
class ActrRecommender(ScoreToRecommenderMixin):
    """Combines multiple components."""
    def __init__(self, components, weights=None, softmax=True, name=None, use_normalize_trick=False):
        self.components = components
        self.weights = weights if weights else [1]*len(components)
        self.softmax = softmax
        self.name = name if name else type(self).__name__ + "(" + ",".join(map(str, self.components)) + ")"
        self.use_normalize_trick = use_normalize_trick
        
    def __str__(self):
        return self.name
        
    def score(self, user_events):
        scores = pd.Series()
        
        for comp, w_c in zip(self.components, self.weights):
            comp_scores = comp.score(user_events)
            if self.softmax:
                if self.use_normalize_trick:
                    # https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
                    comp_scores = comp_scores - np.max(comp_scores)
                comp_scores = special.softmax(comp_scores)
            comp_scores = comp_scores * w_c
            scores = scores.combine(comp_scores, operator.add, 0)
        return scores

Overwriting emomem_model.py


In [22]:
%run emomem_model.py

bll_new = BaseLevelComponent(decay=2)
assert bll_new.recommend_next(user_events) == bll_new.recommend(user_events, 3)[0]

assoc = AssociativeComponent()
assert assoc.recommend_next(user_events) == assoc.recommend(user_events, 3)[0]

emo_new = PartialMatchingComponent(feature_cols=["v", "a", "d"])
assert emo_new.recommend_next(user_events) == emo_new.recommend(user_events, 3)[0]

valu = ValuationComponent()
assert valu.recommend_next(user_events) == valu.recommend(user_events, 3)[0]

noise = NoiseComponent()

assoc.recommend(user_events, 3), bll_new.recommend(user_events, 3), emo_new.recommend(user_events, 3), valu.recommend(user_events, 3), noise.recommend(user_events, 3), 

(['song1', 'song2'],
 ['song1', 'song2'],
 ['song1', 'song2'],
 ['song2', 'song1'],
 ['song1', 'song2'])

In [23]:
actr = ActrRecommender([bll_new, assoc, emo_new, valu], weights=[2, 1, 1, 1], softmax=True)
print(actr)
assert actr.recommend_next(user_events) == actr.recommend(user_events, 3)[0]

actr.recommend(user_events, 3)

ActrRecommender(BaseLevelComponent,AssociativeComponent,PartialMatchingComponent,ValuationComponent)


['song1', 'song2']

In [26]:
def valuation(prev, reward=1, lr=0.05):
    return prev + lr * (reward - prev)

val = 0
normal_reward = 1
alt_reward = -1
alt_sim = False
# alt_sim = 8*[0]+2*[1]
for i in range(100):
    if alt_sim and np.random.choice(alt_sim):  # simulate negative rewards
        val = valuation(val, alt_reward)
        print("alt: " + str(val))
        continue
    val = valuation(val, normal_reward)
print(val)

0.9940794707796661


In [27]:
ts = range(1, 1000)

tas = 0
for t in ts:
    a = np.power(t, -0.5)
    tas += a
print(tas)
print(np.log(tas))

61.7693859886415
4.123407869374147


In [28]:
np.power(3, -np.log(tas)), tas

(0.010780389779679721, 61.7693859886415)

In [29]:
def valuation(prev, reward):
    return prev + 0.05 * (reward - prev)

rew_list = np.random.choice([-1]+[1]*9, 100)
valuation_ufunc = np.frompyfunc(valuation, 2, 1)

In [30]:
valuation_ufunc.reduce(rew_list)

0.9205833149740941