In [60]:
import numpy as np
import pandas as pd

num_contexts = 2000

In [28]:
def one_hot(df, cols):
    for col in cols:
        dummies = pd.get_dummies(df[col], prefix = col, drop_first = False)
        df = pd.concat([df, dummies], axis = 1)
        df = df.drop(col, axis = 1)
    return df

In [30]:
one_hot(data, data.columns).head() # one-hot vectorized data

Unnamed: 0,0_e,0_p,1_b,1_c,1_f,1_k,1_s,1_x,2_f,2_g,...,21_s,21_v,21_y,22_d,22_g,22_l,22_m,22_p,22_u,22_w
0,0,1,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,1,0
1,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0,1,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,1,0
4,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0


In [61]:
data = pd.read_csv("mushroom.data", header = None)
data = one_hot(data, data.columns)
ind = np.random.choice(range(data.shape[0]), num_contexts, replace = True)

contexts = data.iloc[ind, 2:]

In [62]:
r_noeat = 0
r_eat_safe = 5
r_eat_poison_bad = -35
r_eat_poison_good = 5
prob_poison_bad = 0.5

In [63]:
no_eat_reward = r_noeat * np.ones((num_contexts, 1))
random_poison = np.random.choice([r_eat_poison_good, r_eat_poison_bad],
                                 p = [1 - prob_poison_bad, prob_poison_bad],
                                 size = num_contexts) # should i reshape

eat_reward = r_eat_safe * data.iloc[ind, 0] # fit to eat
eat_reward += np.multiply(random_poison, data.iloc[ind, 1])
eat_reward = eat_reward.to_numpy().reshape((num_contexts, 1))

exp_eat_poison_reward = prob_poison_bad * r_eat_poison_bad + (1 - prob_poison_bad) * r_eat_poison_good

opt_exp_reward = data.iloc[ind, 0] * r_eat_safe + data.iloc[ind, 1] * max(r_noeat, exp_eat_poison_reward)

if r_noeat > exp_eat_poison_reward:
    # actions : no_eat = 0, eat = 1
    opt_actions = data.iloc[ind, 0].to_numpy().reshape((num_contexts, 1))
else:
    opt_actions = np.ones((num_contexts, 1)) 
    
opt_vals = (opt_exp_reward.values, opt_actions)

dataset = np.hstack((contexts, no_eat_reward, eat_reward))

In [65]:
opt_rewards = opt_vals[0]
num_actions = 2
context_dim = 117

In [67]:
class ContextualBandit(object):
    def __init__(self, context_dim, num_actions):
        self._context_dim = context_dim
        self._num_actions = num_actions
        
    def feed_data(self, data):
        if data.shape[1] != self._context_dim + self._num_actions:
            raise ValueError("Data dimensions do not match")
            
        self._number_contexts = data.shape[0]
        self.data = data
        self.order = range(self._number_contexts) # why this at all
        
        def reset(self):
            # why would you want to do this/
            self.order = np.order.permutations(self._number_contexts)
            
        def context(self, number):
            return self.data[self.order[number]][:self.context_dim]
        
        def reward(self, number, action):
            return self.data[self.order[number]][self.context_dim + action]
        
        def optimal(self, number):
            return argmax(self.data[self.order[number]][self.context_dim:])
        
        @property
        def context_dim(self):
            return self._context_dim
        
        @property
        def num_actions(self):
            return self._num_actions
        
        @property
        def number_contexts(self):
            return self._number_contexts

In [68]:
def run_contextual_bandit(context_dim, num_actions, dataset, algos):
    
    num_contexts = dataset.shape[0]
    
    h_actions = np.empty((0, len(algos)), float)
    h_rewards = np.empty((0, len(algos)), float)
    
    cmab = ContextualBandit(context_dim, num_actions)
    cmab.feed_data(dataset)
    
    for i in range(num_contexts):
        context = cmab.context(i)
        actions = [algo.action(context) for algo in algos]
        rewards = [cmab.reward(action) for action in actions]
        
        # but what about optimal arm related stuff... loss...?!
        # update each algo with the seen c, a, r pair
        for j, algo in enumerate(algos):
            algo.update(context, actions[j], rewards[j])
            
        h_actions = np.vstack((h_actions, np.array(actions)))
        h_rewards = np.vstack((h_rewards, np.array(rewards)))
        
    return h_actions, h_rewards

In [70]:
class BayesianNN(object):
    
    def __init__(self, optimizer):
        pass

    def build_model(self):
        pass

    def train(self, data):
        pass

    def sample(self, steps):
        pass

class BanditAlgorithm(object):

    def action(self, context):
        pass

    def update(self, context, action, reward):
        pass

In [77]:
class ContextualDataset(object):
    
    def __init__(self, context_dim, num_actions, buffer_s = -1, intercept = False):
        
        self._context_dim = context_dim
        self._num_actions = num_actions
        self._contexts = None
        self._rewards = None
        self.actions = []
        self.buffer_s = buffer_s
        self.intercept = intercept
        
    def add(self, context, action, reward):
        # triplet (c, a, r)
        
        if self.intercept:
            c = np.array(context)
            c = np.append(c, 1.0).reshape((1, self.context_dim + 1))
        else:
            c = np.array(context).reshape((1, self.context_dim))
            
        if self.contexts is None:
            self.contexts = c
        else:
            self.contexts = np.vstack((self.contexts, c))
            
        r = np.zeros((1, num_actions))
        r[0, action] = reward # assume reward of other actions zero
        
        if self.rewards is None:
            self.rewards = r
        else:
            self.rewards = np.vstack((self.rewards, r))
            
        self.actions.append(action)
        
    def replace_data(self, contexts = None, actions = None, rewards = None):
        # why would you need this
        
        if contexts is not None:
            self.contexts = contexts
        if actions is not None:
            self.actions = actions
        if rewards is not None:
            self.rewards = rewards
            
    def get_batch(self, batch_size):
        
        n, _ = self.contexts.shape
        
        if self.buffer_s == -1:
            ind = np.random.choice(range(n), batch_size)
        else:
            ind = np.random.choice(range(max(0, n - self.buffer_s), n), batch_size)
        
        # contexts, rewards
        return self.contexts[ind, :], self.rewards[ind, :]
    
    def get_data(self, action):
        # return data wherever a particular action was played
        # why this
        
        n, _ = self.contexts.shape
        ind = np.array([i for i in range(n) if self.actions[i] == action])
        return self.contexts[ind, :], self.rewards[ind, :]
    
    def get_data_with_weights(self):
        """Returns all observations with one-hot weights for actions."""
        weights = np.zeros((self.contexts.shape[0], self.num_actions))
        a_ind = np.array([(i, val) for i, val in enumerate(self.actions)])
        weights[a_ind[:, 0], a_ind[:, 1]] = 1.0
        return self.contexts, self.rewards, weights

    def get_batch_with_weights(self, batch_size):
        """Returns a random mini-batch with one-hot weights for actions."""
        n, _ = self.contexts.shape
        if self.buffer_s == -1:
          # use all the data
          ind = np.random.choice(range(n), batch_size)
        else:
          # use only buffer (last buffer_s obs)
          ind = np.random.choice(range(max(0, n - self.buffer_s), n), batch_size)

        weights = np.zeros((batch_size, self.num_actions))
        sampled_actions = np.array(self.actions)[ind]
        a_ind = np.array([(i, val) for i, val in enumerate(sampled_actions)])
        weights[a_ind[:, 0], a_ind[:, 1]] = 1.0
        return self.contexts[ind, :], self.rewards[ind, :], weights 
    
    def num_points(self, f=None):
        """Returns number of points in the buffer (after applying function f)."""
        if f is not None:
          return f(self.contexts.shape[0])
        return self.contexts.shape[0]
    
    @property
    def context_dim(self):
        return self._context_dim

    @property
    def num_actions(self):
        return self._num_actions

    @property
    def contexts(self):
        return self._contexts

    @contexts.setter
    def contexts(self, value):
        self._contexts = value

    @property
    def actions(self):
        return self._actions

    @actions.setter
    def actions(self, value):
        self._actions = value

    @property
    def rewards(self):
        return self._rewards

    @rewards.setter
    def rewards(self, value):
        self._rewards = value

In [1]:
import numpy as np

x = np.load("tfidf_pca_25_inc.npy")
x.shape

(69226, 25)

In [3]:
import pandas as pd

book = pd.read_csv("delicious_csv/bookmarks.csv")
book_tags = pd.read_csv("delicious_csv/bookmark_tags.csv")
tags = pd.read_csv("delicious_csv/tags.csv")
user_contacts = pd.read_csv("delicious_csv/user_contacts.csv")
user_contacts_t = pd.read_csv("delicious_csv/user_contacts_timestamps.csv")
user_taggedb = pd.read_csv("delicious_csv/user_taggedbookmarks.csv")
user_taggedbt = pd.read_csv("delicious_csv/user_taggedbookmarks_timestamps.csv")

In [18]:
user_taggedbt

Unnamed: 0,userID,bookmarkID,tagID,timestamp
0,8,1,1,1289255362000
1,8,2,1,1289255159000
2,8,7,1,1289238901000
3,8,7,6,1289238901000
4,8,7,7,1289238901000
...,...,...,...,...
437588,108035,30993,193,1277495315000
437589,108035,30993,673,1277495315000
437590,108035,30994,130,1277223715000
437591,108035,30994,267,1277223715000


In [None]:
user_contacts_t