In [108]:
!pip install recogym



## Offline

In [2]:
import gym, recogym

# env_0_args is a dictionary of default parameters (i.e. number of products)
from recogym import env_1_args, Configuration

# You can overwrite environment arguments here:
env_1_args['random_seed'] = 42

# Initialize the gym for the first time by calling .make() and .init_gym()
env = gym.make('reco-gym-v1')
env.init_gym(env_1_args)

# .reset() env before each episode (one episode per user).
env.reset()
done = False

# Counting how many steps.
i = 0

observation, reward, done = None, 0, False
while not done:
    action, observation, reward, done, info = env.step_offline(observation, reward, done)
    print(f"Step: {i} - Action: {action} - Observation: {observation.sessions()} - Reward: {reward}")
    i += 1

Step: 0 - Action: None - Observation: [{'t': 0, 'u': 0, 'z': 'pageview', 'v': 0}] - Reward: None
Step: 1 - Action: {'t': 1, 'u': 0, 'a': 3, 'ps': 0.1, 'ps-a': ()} - Observation: [] - Reward: 0
Step: 2 - Action: {'t': 2, 'u': 0, 'a': 4, 'ps': 0.1, 'ps-a': ()} - Observation: [] - Reward: 0
Step: 3 - Action: {'t': 3, 'u': 0, 'a': 5, 'ps': 0.1, 'ps-a': ()} - Observation: [] - Reward: 0


## Online

In [3]:
# Create list of hard coded actions.
actions = [None] + [1, 2, 3, 4, 5]

# Reset env and set done to False.
env.reset()
done = False

# Counting how many steps.
i = 0

while not done and i < len(actions):
    action = actions[i]
    observation, reward, done, info = env.step(action)
    print(f"Step: {i} - Action: {action} - Observation: {observation.sessions()} - Reward: {reward}")
    i += 1

Step: 0 - Action: None - Observation: [{'t': 0, 'u': 0, 'z': 'pageview', 'v': 1}] - Reward: None
Step: 1 - Action: 1 - Observation: [] - Reward: 0
Step: 2 - Action: 2 - Observation: [] - Reward: 0
Step: 3 - Action: 3 - Observation: [] - Reward: 0
Step: 4 - Action: 4 - Observation: [] - Reward: 0
Step: 5 - Action: 5 - Observation: [{'t': 6, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 7, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 8, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 9, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 10, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 11, 'u': 0, 'z': 'pageview', 'v': 6}, {'t': 12, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 13, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 14, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 15, 'u': 0, 'z': 'pageview', 'v': 6}, {'t': 16, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 17, 'u': 0, 'z': 'pageview', 'v': 1}, {'t': 18, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 19, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 20, 'u': 0, 'z': 'pageview', 'v': 4}, {'t': 21, '

## Agents

In [4]:
import numpy as np
from numpy.random import choice
from recogym.agents import Agent

In [5]:
class PopularityAgent(Agent):
    def __init__(self, config):
        # Set number of products as an attribute of the Agent.
        super(PopularityAgent, self).__init__(config)

        # Track number of times each item viewed in Organic session.
        self.organic_views = np.zeros(self.config.num_products)

    def train(self, observation, action, reward, done):
        """Train method learns from a tuple of data.
            this method can be called for offline or online learning"""

        # Adding organic session to organic view counts.
        if observation:
            for session in observation.sessions():
                #print(session)
                self.organic_views[session['v']] += 1

    def act(self, observation, reward, done):
        """Act method returns an action based on current observation and past
            history"""

        # Choosing action randomly in proportion with number of views.
        prob = self.organic_views / sum(self.organic_views)
        action = choice(self.config.num_products, p = prob)

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': prob[action]
            }
        }

    def __str__(self):
        return "Popularity Agent"

    def __repr__(self):
        return str(self)

In [6]:
from recogym import env_1_args

from copy import deepcopy

env_1_args['random_seed'] = 42
env_1_args['num_products'] = 10

env = gym.make('reco-gym-v1')
env.init_gym(env_1_args)

# Import the random agent.
from recogym.agents import RandomAgent, random_args

# Create the two agents.
num_products = env_1_args['num_products']
popularity_agent = PopularityAgent(Configuration(env_1_args))
agent_rand = RandomAgent(Configuration({
    **env_1_args,
    **random_args,
}))

In [7]:
    
# Credible interval of the CTR median and 0.025 0.975 quantile.
recogym.test_agent(deepcopy(env), deepcopy(popularity_agent), 100, 100)

Organic Users: 100%|██████████| 100/100 [00:00<00:00, 2289.29it/s]
Users:   0%|          | 0/100 [00:00<?, ?it/s]

START: Agent Training #0
START: Agent Training @ Epoch #0


Users: 100%|██████████| 100/100 [00:01<00:00, 58.08it/s]
Organic Users: 0it [00:00, ?it/s]
Users:   8%|▊         | 8/100 [00:00<00:01, 72.84it/s]

END: Agent Training @ Epoch #0 (1.76804518699646s)
START: Agent Evaluating @ Epoch #0


Users: 100%|██████████| 100/100 [00:01<00:00, 50.44it/s]

END: Agent Evaluating @ Epoch #0 (2.003143548965454s)





(0.013802502037802979, 0.011412602818724443, 0.01649998372466488)

In [8]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

class UserSimilarityAgent(Agent):
    def __init__(self, config):
        # Set number of products as an attribute of the Agent.
        super(UserSimilarityAgent, self).__init__(config)

        # Track number of times each item viewed in Organic session.
        self.user_items = pd.DataFrame(columns=range(self.config.num_products))
        
        self.organic_views = np.zeros(self.config.num_products)


    def train(self, observation, action, reward, done):

        # Adding organic session to organic view counts.
        if observation:
            for session in observation.sessions():
                self.organic_views[session['v']] += 1


        if observation is not None and len(observation.sessions()) > 0:

            user = observation.current_sessions[-1]['u']
            if not user in self.user_items.index:
                self.user_items.loc[user] = np.zeros((self.config.num_products))

            for elt in observation.sessions():
                item = elt['v']
                self.user_items.loc[[user], [item]] += 1

            

    def act(self, observation, reward, done):
        """Act method returns an action based on current observation and past
            history"""
            
        cos_similarity = cosine_similarity(self.user_items)

        prob = self.organic_views / sum(self.organic_views)

        if observation is not None and len(observation.current_sessions) > 0:
            current_user = observation.current_sessions[-1]['u']
            if not current_user in self.user_items.index:
                action = choice(self.config.num_products, p = prob)
            else:
                np.fill_diagonal(cos_similarity, 0)
                indexes = list(self.user_items.index)
                
                most_similar_user = np.argmax(cos_similarity[indexes.index(current_user), :])
                most_similar_user_items = self.user_items.iloc[most_similar_user]
                action = np.argmax(most_similar_user_items, axis=1)
                print(action)
        else:
            action = choice(self.config.num_products, p = prob)

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': 1#prob[action]
            }
        }

        def __str__(self):
            return "User Similarity Agent"

        def __repr(self):
            return str(self)

In [9]:
user_similarity_agent = UserSimilarityAgent(Configuration(env_1_args))

In [10]:
recogym.test_agent(deepcopy(env), deepcopy(user_similarity_agent), 10, 10)

Organic Users:   0%|          | 0/100 [00:00<?, ?it/s]

START: Agent Training #0
START: Agent Training @ Epoch #0


Organic Users: 100%|██████████| 100/100 [00:01<00:00, 83.56it/s]
Users: 100%|██████████| 10/10 [00:00<00:00, 20.71it/s]
Organic Users: 0it [00:00, ?it/s]
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return bound(*args, **kwds)
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return bound(*args, **kwds)
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(va

END: Agent Training @ Epoch #0 (1.6821050643920898s)
START: Agent Evaluating @ Epoch #0
0
1
1
1
9
9
9
9
9
9
9
4
4
4
4
4
4
3
3
3
9
9
9
9
9
9
9
9


The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return bound(*args, **kwds)
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return bound(*args, **kwds)
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return bound(*args, **kwds)
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead

9
9
9
9
9
9
9
9
9
9
9
9
4
9
1
1
1
1
1
4
4
4
4
4
4
END: Agent Evaluating @ Epoch #0 (0.3655250072479248s)





(0.010769031843008092, 0.004558382632979687, 0.02097758313786202)

In [11]:
import pandas as pd
from sklearn.cluster import KMeans

class ClusteringAgent(Agent):
    def __init__(self, config, nb_clusters):
        # Set number of products as an attribute of the Agent.
        super(ClusteringAgent, self).__init__(config)

        # Track number of times each item viewed in Organic session.
        self.user_items = pd.DataFrame(columns=range(self.config.num_products))
        
        self.organic_views = np.zeros(self.config.num_products)
        self.nb_clusters = nb_clusters
        self.kmeans = None
        self.nb_sessions = 0    #count the sessions
        self.training_rate = 10 #train each training_rate session to allow scaling

    def train(self, observation, action, reward, done):

        # Adding organic session to organic view counts.
        if observation:
            for session in observation.sessions():
                self.organic_views[session['v']] += 1


        if observation is not None and len(observation.sessions()) > 0:

            user = observation.current_sessions[-1]['u']
            if not user in self.user_items.index:
                self.user_items.loc[user] = np.zeros((self.config.num_products))

            for elt in observation.sessions():
                item = elt['v']
                self.user_items.loc[[user], [item]] += 1

        if self.user_items.shape[0] > self.nb_clusters and self.nb_sessions % 10 == 0:#if enought data and not trained since a few sessions
            self.kmeans = KMeans(n_clusters=self.nb_clusters, random_state=0).fit(self.user_items)

        self.nb_sessions += 1

    def act(self, observation, reward, done):
        """Act method returns an action based on current observation and past
            history"""
            
        prob = self.organic_views / sum(self.organic_views)

        if observation is not None and len(observation.current_sessions) > 0 and self.kmeans:
            history = np.zeros((self.config.num_products))
            for sess in observation.current_sessions:
                history[sess['v']] += 1
            
            history = history.reshape((1, history.shape[0]))#reshape because single data prediction
        
            pred = self.kmeans.predict(history)
            centroid = self.kmeans.cluster_centers_[pred]
            action = np.argmax(centroid)

        else:
            action = choice(self.config.num_products, p = prob)

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': 1#prob[action]
            }
        }

        def __str__(self):
            return "User Similarity Agent"

        def __repr(self):
            return str(self)

In [12]:
clustering_agent = ClusteringAgent(Configuration(env_1_args), nb_clusters=10)

In [13]:
recogym.test_agent(deepcopy(env), deepcopy(clustering_agent), 10, 10)

Organic Users:   0%|          | 0/100 [00:00<?, ?it/s]

START: Agent Training #0
START: Agent Training @ Epoch #0


Organic Users: 100%|██████████| 100/100 [00:01<00:00, 64.74it/s]
Users: 100%|██████████| 10/10 [00:02<00:00,  3.61it/s]
Organic Users: 0it [00:00, ?it/s]
Users: 100%|██████████| 10/10 [00:00<00:00, 58.54it/s]

END: Agent Training @ Epoch #0 (4.319154739379883s)
START: Agent Evaluating @ Epoch #0
END: Agent Evaluating @ Epoch #0 (0.17881321907043457s)





(0.009009764104843622, 0.0035084439191863173, 0.018452446640405684)

In [111]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class FeedForward(torch.nn.Module):
        def __init__(self, input_size, hidden_size, num_classes):
            super(FeedForward, self).__init__()
            self.input_size = input_size
            self.hidden_size  = hidden_size
            self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
            self.relu = torch.nn.ReLU()
            self.fc2 = torch.nn.Linear(self.hidden_size, num_classes)
            #self.softmax = torch.nn.Softmax()

        def forward(self, x):
            hidden = self.fc1(x)
            relu = self.relu(hidden)
            output = self.fc2(relu)
            #output = self.softmax(output)
            return output


def get_model(max_length, hidden, num_classes):
    net = FeedForward(max_length, hidden, num_classes)
    return net

def criterion(out, label):
    return (label - out)**2

def train_model(model, x_train, y_train, optimizer, criterion):
    for epoch in range(100):
        for idx, (x, y) in enumerate(zip(x_train, y_train)):
            print(f"ID({id}), X = {x}, y = {y}")
            x = Variable(torch.FloatTensor([x]), requires_grad=True)
            y = Variable(torch.FloatTensor([y]), requires_grad=False)
            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y_pred.squeeze(), y)
            loss.backward()
            optimizer.step()
            if (i % 10 == 0):
                print("Epoch {} - loss: {}".format(epoch, loss.data[0]))

In [114]:
import torch
import tensorflow.keras

class MLPAgent(Agent):
    def __init__(self, config, max_length, epochs=100):
        # Set number of products as an attribute of the Agent.
        super(MLPAgent, self).__init__(config)

        # Track number of times each item viewed in Organic session.
        self.user_items = pd.DataFrame(columns=range(self.config.num_products))
        
        self.organic_views = np.zeros(self.config.num_products)
        self.nb_sessions = 0    #count the sessions
        self.training_rate = 10 #train each training_rate session to allow scaling
        self.max_length = max_length
        self.epochs = epochs
        self.model = get_model(self.max_length, 256, self.config.num_products)
        self.trained = False
        self.sequences = []
        self.labels = []

    def train(self, observation, action, reward, done):
        # Adding organic session to organic view counts.
        if observation:
            for session in observation.sessions():
                self.organic_views[session['v']] += 1


        if observation is not None and len(observation.sessions()) > 0:
            user = observation.current_sessions[-1]['u']
            if not user in self.user_items.index:
                self.user_items.loc[user] = np.zeros((self.config.num_products))
            seq = np.zeros(self.max_length)
            idx = 0
            sessions = observation.sessions()
            #print(sessions)
            for elt in sessions[-(self.max_length + 1):-1]:  
                item = elt['v'] # Because we need 0 as a padding value
                seq[idx] = item + 1
                idx += 1 # We want to update the position, but we want to keep last
                self.user_items.loc[[user], [item]] += 1
            idx = min(self.max_length - 1, idx)
            label = sessions[-1]["v"]
            #print(seq, "=", label)

            seq[idx] = 0 # set to mask to remove last
            self.sequences.append(seq)
            self.labels.append(label)

        self.nb_sessions += 1

    def act(self, observation, reward, done):
        """Act method returns an action based on current observation and past
            history"""
        if not self.trained:
            x_train = torch.FloatTensor(self.sequences)
            #print(x_train.size())
            y_train = torch.LongTensor(self.labels)
            criterion = torch.nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(self.model.parameters(), lr = 0.01)   
            self.model.train()
            for epoch in range(self.epochs):
                optimizer.zero_grad()    # Forward pass
                y_pred = self.model(x_train)    # Compute Loss
                loss = criterion(y_pred.squeeze(), y_train)
                #print('Epoch {}: train loss: {}'.format(epoch, loss.item()))    # Backward pass
                loss.backward()
                optimizer.step()
            self.model.eval()
            self.trained = True
            
            
        prob = self.organic_views / sum(self.organic_views)

        if observation is not None and len(observation.current_sessions) > 0:# and self.kmeans:
            history = np.zeros((self.max_length))
            #history = []
            sessions = observation.sessions()
            for idx, elt in enumerate(sessions[-(self.max_length):]):  
                item = elt['v'] # Because we need 0 as a padding value
                history[idx] = item + 1
            action = self.model(torch.tensor(history,  dtype=torch.float32, requires_grad=False))
            
            #history = history.reshape((1, history.shape[0]))#reshape because single data prediction
        
            #pred = self.kmeans.predict(history)
            #centroid = self.kmeans.cluster_centers_[pred]
            action = np.argmax(action.detach().numpy())

        else:
            action = choice(self.config.num_products, p = prob)

        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': 1#prob[action]
            }
        }

        def __str__(self):
            return "User Similarity Agent"

        def __repr(self):
            return str(self)

In [115]:
rnn_agent = MLPAgent(Configuration(env_1_args), max_length=10, epochs=100)

In [116]:
recogym.test_agent(deepcopy(env), deepcopy(rnn_agent), 10, 10)

Organic Users:   9%|▉         | 9/100 [00:00<00:01, 82.14it/s]

START: Agent Training #0
START: Agent Training @ Epoch #0


Organic Users: 100%|██████████| 100/100 [00:00<00:00, 127.28it/s]
Users: 100%|██████████| 10/10 [00:00<00:00, 30.13it/s]
Organic Users: 0it [00:00, ?it/s]
Users:  30%|███       | 3/10 [00:00<00:00, 28.45it/s]

END: Agent Training @ Epoch #0 (1.1242835521697998s)
START: Agent Evaluating @ Epoch #0


Users: 100%|██████████| 10/10 [00:00<00:00, 47.16it/s]

END: Agent Evaluating @ Epoch #0 (0.22292566299438477s)





(0.007215591865689798, 0.0025138757590767572, 0.01575313602523143)

## Benchmark for our agents

In [None]:
import matplotlib.pyplot as plt

#simple class that run nb_simulations with a given number of users and plot the resulting curves
#just to see which agent is better on the long run

class Benchmark:
    def __init__(self, agents):
        self.agents = agents
        self.agents_simu_res = {}
        self.agents_scale_res = {}
        self.scales = None

    def simulate(self, nb_simulations=10, num_offline_users=1000, num_online_users=1000):
        self.nb_simulations = nb_simulations

        for ag in self.agents:
            results = []
            for i in range(self.nb_simulations):
                
                quantiles = recogym.test_agent(deepcopy(env), deepcopy(user_similarity_agent), num_online_users, num_offline_users)
                results.append(quantiles)

            self.agents_simu_res[str(ag)] = results

    def plot_simu_medians(self, quantile=2):#plot median by default
        if len(self.agents_simu_res) == 0:
            print("you need to simulate first")
            return

        for k, v in self.agents_simu_res.items():
            medians = []

            for med in v:
                medians.append(med[quantile-1])
        
            plt.plot(range(1, self.nb_simulations+1), medians, label=k)

    def check_scale(self, scales, num_offline_users=1000, num_online_users=1000):
        self.scales = scales
        for ag in self.agents:
            results = []
            for nb_users in scales:
                quantiles = recogym.test_agent(deepcopy(env), deepcopy(user_similarity_agent), num_online_users, num_offline_users)
                results.append(quantiles)
            self.agents_scale_res[str(ag)] = results

    def plot_scale_res(self, quantile=2):#plot median by default
        if len(self.agents_scale_res) == 0:
            print("you need to check scale first")
            return
        
        for k, v in self.agents_scale_res.items():
            medians = []

            for med in v:
                medians.append(med[quantile-1])
        
            plt.plot(self.scales, medians, label=k)

In [None]:
bench = Benchmark([popularity_agent, user_similarity_agent, clustering_agent])

In [None]:
%%capture
bench.simulate(nb_simulations=15, num_offline_users=100, num_online_users=100)

In [None]:
bench.plot_simu_medians()

In [None]:
%%capture
bench.check_scale([10, 20, 50, 100, 500, 1000], num_offline_users=100, num_online_users=100)

In [None]:
bench.plot_scale_res()