In [379]:
# implementing OPE of the IPWLearner using synthetic bandit data
from sklearn.linear_model import LogisticRegression
# import open bandit pipeline (obp)
from obp.dataset import SyntheticBanditDataset, logistic_reward_function
from obp.policy import IPWLearner
from obp.ope import (
    OffPolicyEvaluation,
    RegressionModel,
    InverseProbabilityWeighting as IPW,
    DirectMethod as DM,
    DoublyRobust as DR,
    ReplayMethod as RM,
    SelfNormalizedInverseProbabilityWeighting as SNIP
)

from src.custom_estimators import (
    NaiveMethod as NW,
    ReplayProbabilistic as RP,
    MAP
)


import numpy as np
from copy import deepcopy

import matplotlib.pyplot as plt
import pandas as pd
import random 

from typing import Iterable


from src.libs.dataclass import DataClass
from src.libs.preprocessors import IDEncoder, random_split
from src.models.contentbased import CBF

In [2]:
def index_of_n_true(bool_array, n):

    true_indices = np.where(bool_array)[0]

    if n <= len(true_indices):
        return true_indices[n - 1]

In [167]:
n_items = 1000
n_users = 100
dim_user_emd = 128
dim_item_emd = 128
reward_type = "binary", 
reward_function = logistic_reward_function

n_like_explore = 10

In [168]:
np.random.seed(1)
items_emd_true = np.random.normal(size=(n_items, dim_item_emd))
users_emd_true = np.random.normal(size=(n_users, dim_user_emd))

In [169]:
dataset = SyntheticBanditDataset(n_actions=n_items, 
                                 dim_context=dim_user_emd,
                                 action_context=items_emd_true,
                                 reward_type="binary", 
                                 reward_function=logistic_reward_function,
                                 random_state=2)

In [170]:
true_reward_deterministic = np.array([dataset.sample_reward(users_emd_true, np.repeat(i, n_users)) for i in range(n_items)]).T

In [420]:
true_reward_deterministic.mean()

0.48328

In [171]:
ranks = np.array([random.sample(list(range(n_items)), n_items) for u in range(n_users)])
reorder = true_reward_deterministic[np.arange(true_reward_deterministic.shape[0])[:, np.newaxis], ranks]

In [172]:
feedback_uniform = np.full((n_users, n_items), np.nan)


In [173]:
for i in range(n_users):
    idx = index_of_n_true(reorder[i]==1, n_like_explore)
    rec_items = ranks[i][:idx+1]
    rewards = reorder[i][:idx+1]

    feedback_uniform[i, rec_items] = rewards

In [315]:
feedback_uniform_df = pd.DataFrame(columns=['user_id', 'item_id', 'feedback'])

feedback_uniform_df[['user_id', 'item_id']] = list(zip(*np.where(feedback_uniform==0))) + list(zip(*np.where(feedback_uniform==1)))
feedback_uniform_df['feedback'] = [0] * len(np.where(feedback_uniform==0)[0]) + [1] * len(np.where(feedback_uniform==1)[0])

In [414]:
# Genrate Observed user / item features

feature_obs_dim = 32

np.random.seed(42)
noise_level = 100000
items_emd_obs = items_emd_true[:, :feature_obs_dim] + np.random.normal(scale=noise_level, size=items_emd_true[:, :feature_obs_dim].shape)
users_emd_obs = users_emd_true[:, :feature_obs_dim] + np.random.normal(scale=noise_level, size=users_emd_true[:, :feature_obs_dim].shape)

items_emd_obs_df = pd.DataFrame(items_emd_obs, columns=[f'emd_{i}' for i in range(feature_obs_dim)])
items_emd_obs_df = items_emd_obs_df.rename_axis('item_id').reset_index()

users_emd_obs_df = pd.DataFrame(users_emd_obs, columns=[f'emd_{i}' for i in range(feature_obs_dim)])
users_emd_obs_df = users_emd_obs_df.rename_axis('user_id').reset_index()

data_full = DataClass(feedback_uniform_df, users_emd_obs_df, items_emd_obs_df)

# Train Test Split 
data_train, data_test = random_split(data_full)


In [343]:
enc = IDEncoder()

data_train_enc = enc.fit_transform(data=data_train)
data_test_enc = enc.transform(data=data_test)

In [346]:
enc.transform([1,2,3])

array([1, 2, 3])

In [400]:
from scipy import sparse
from implicit.als import AlternatingLeastSquares


class ALS:

    def __init__(self, n_factors=32) -> None:

        self.n_factors = n_factors

        self.id_encoder = IDEncoder()

        self._model = AlternatingLeastSquares(
            factors=self.n_factors,
            random_state=42,
        )

    def _preprocess(self, data: DataClass) -> sparse.csr_matrix:
        """Applies all filters and sampling methods for training set.

        Args:
            data (pyspark.sql.dataframe.DataFrame): Master dataframe of transactions which also includes users and
                items' attributes.

        Returns:
            sparse.csr_matrix: Confidence matrix where each row is each user, and each column is each item.
        """

        data_enc = self.id_encoder.fit_transform(data)
        self.seen_items = data_enc.interactions.groupby('user_id')['item_id'].agg(list).sort_index().values.tolist()
    
        feedback_sprs = sparse.csr_matrix((data_enc.interactions['feedback'], 
                                         (data_enc.interactions['user_id'], data_enc.interactions['item_id'])
                                         ))

        return feedback_sprs
    

    def fit(self, data: DataClass):
        

        self._feedback_sprs = self._preprocess(data)
        self._model.fit(self._feedback_sprs)
        
    
    def recommend(self, user_id=None, top_n=20):
        
        user_items = self._feedback_sprs
        
        user_idx = self._process_user_id(user_id)

        recs, scores = self._model.recommend(
            userid=user_idx,
            user_items=user_items,
            N=top_n,
            filter_already_liked_items=True,
        )

        return recs #, scores
    
    def _process_user_id(self, user_id):

        if user_id is not None:
            if isinstance(user_id, Iterable):
                user_idx = self.id_encoder.user_encoder.transform(user_id)
            else:
                user_idx = np.array([user_idx])
        else:
            user_idx = np.array(range(len(self.id_encoder.user_encoder.categories_[0])))

        return user_idx


In [401]:
als = ALS()
als.fit(data=data_train)


100%|██████████| 15/15 [00:00<00:00, 194.56it/s]


In [404]:
rec_items_pi1 = als.recommend()

In [405]:
reward_pi1 = true_reward_deterministic[np.tile(np.arange(n_users), (rec_items_pi1.shape[1],1)).T, rec_items_pi1]
reward_pi1.mean()

0.4905

In [415]:
model = CBF()
model.fit(data_train)
rec_items_pi0 = model.recommend(top_n=20)

In [416]:
reward_pi0 = true_reward_deterministic[np.tile(np.arange(n_users), (rec_items_pi0.shape[1],1)).T, rec_items_pi0]

In [417]:
reward_pi0.mean()

0.488

In [66]:
numberOfPorts = 3
transmissionTIme = 2
packetIds = [4, 7, 10, 6]




In [72]:
def sentTimes(numberOfPorts, transmissionTIme, packetIds):

    org_dest = np.array(packetIds) % numberOfPorts  # Array of original destination
    port_finish_time = {i: 1 for i in range(numberOfPorts)}  # Dictionary to keep track of each port's finish time
                                                             # Initialize all at 1 because 1st packet arrives at t=1

    final_dest = []  # List to contian final destinations of each packet.
    for i, dest in enumerate(org_dest):
        
        arr_time = i + 1  # Arrival time of the packet

        while True:
            if port_finish_time[dest] <= arr_time:  # If port is free (finish time <= arrivial time)
                final_dest.append(dest)  # Add this destination to the result
                port_finish_time[dest] += transmissionTIme  # Add new finish time to the port
                break
            else:  # If port is not free
                dest = (dest + 1) % numberOfPorts  # Send to next port

    return final_dest

In [73]:
numberOfPorts = 3
transmissionTIme = 2
packetIds = [4, 7, 10, 6]

sentTimes(numberOfPorts, transmissionTIme, packetIds)

[1, 2, 1, 0]

In [65]:
while False:
    print(1)

In [71]:
from sklearn.preprocessing import OneHotEncoder


action_embed = np.array([
    [.1,.2,.3],
    [.2,.5,.6],
    [.2,.5,.6],
    [.2,.4,.6]
])

c = OneHotEncoder(
            sparse=False,
            drop="first",
        ).fit_transform(action_embed)

In [76]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

x = np.array([
    [.1,.2,.3],
    [.2,.5,.6],
    [.2,.5,.6],
    [.2,.4,.6]
])

a = np.array([[0],[1],[2],[3]])

model.fit(x,a)

  y = column_or_1d(y, warn=True)


In [78]:
model.predict_proba(x)

array([[0.27373277, 0.24051313, 0.24051313, 0.24524097],
       [0.24050587, 0.2539727 , 0.2539727 , 0.25154873],
       [0.24050587, 0.2539727 , 0.2539727 , 0.25154873],
       [0.24524816, 0.25156219, 0.25156219, 0.25162747]])