In [None]:
import pandas as pd
import pickle as pkl
import numpy as np
import rpy2.robjects as robjects
from collections import OrderedDict
import scipy.stats as stats
import scipy.linalg as linalg

In [None]:
PKL_DATA_PATH = "/Users/sghosh/Dropbox (Harvard University)/HeartStepsV2V3/Susobhan/all91.pkl"
PRIOR_DATA_PATH = "/Users/sghosh/Dropbox (Harvard University)/HeartStepsV2V3/Susobhan/bandit-prior.RData"
NDAYS = 90
NUSERS = 91
NTIMES = 5

LAMBDA = 0.95

F_KEYS = ["intercept", "dosage", "engagement", "other_location", "variation"]
G_KEYS = ["intercept", "dosage", "engagement", "other_location", "variation", "temperature", "logpresteps", "sqrt_totalsteps"]

E0 = 0.2
E1 = 0.1

In [None]:
# Load data
def load_data():
    with open(PKL_DATA_PATH, "rb") as f:
        data = pkl.load(f)
    return data

In [None]:
def determine_user_state(data, dosage):
    '''Determine the state of each user at each time point'''
    availability = data[2]

    features = {}

    features["engagement"] = data[7]
    features["other_location"] = data[8]
    # features["work_location"] = data[9]
    features["variation"] = data[10]
    features["temperature"] = data[11]
    features["logpresteps"] = data[12]
    features["sqrt_totalsteps"] = data[13]
    features["prior_anti"] = data[14]
    features["dosage"] = LAMBDA * dosage + features["prior_anti"]

    features["intercept"] = 1

    fs = np.array([features[k] for k in F_KEYS])
    gs = np.array([features[k] for k in G_KEYS])

    return availability, fs, gs, features["dosage"]

In [None]:
def load_priors():
    '''Load priors from RData file'''
    robjects.r['load'](PRIOR_DATA_PATH)
    priors = robjects.r['bandit.prior']
    alpha_pmean = np.array(priors.rx2("mu1"))
    alpha_psd = np.array(priors.rx2("Sigma1"))
    beta_pmean = np.array(priors.rx2("mu2"))
    beta_psd = np.array(priors.rx2("Sigma2"))
    sigma = float(priors.rx2("sigma")[0])

    prior_sigma = linalg.block_diag(alpha_psd, beta_psd, beta_psd)
    prior_mu = np.concatenate([alpha_pmean, beta_pmean, beta_pmean])

    return prior_sigma, prior_mu, sigma

In [None]:
def get_priors_alpha_beta(post_mu, post_sigma):
    '''Get alpha and beta priors from mu and sigma'''
    alpha_pmean = post_mu[:len(G_KEYS)].flatten()
    alpha_psd = post_sigma[:len(G_KEYS), :len(G_KEYS)]
    beta_pmean = post_mu[-len(F_KEYS):].flatten()
    beta_psd = post_sigma[-len(F_KEYS):, -len(F_KEYS):]

    return alpha_pmean, alpha_psd, beta_pmean, beta_psd

In [None]:
def sample_lr_params(alpha_pmean, alpha_psd, beta_pmean, beta_psd, sigma):
    '''Sample alpha, beta and noise from priors for BLR'''

    alpha0 = np.random.multivariate_normal(alpha_pmean, alpha_psd)
    alpha1 = np.random.multivariate_normal(beta_pmean, beta_psd)
    beta = np.random.multivariate_normal(beta_pmean, beta_psd)
    et = np.random.normal(0, np.sqrt(sigma**2))

    return alpha0, alpha1, beta, et

In [None]:
def clip(x, eta = 0):
    '''Clipping function'''
    return min(1 - E0, max(x, E1))

In [None]:
def calculate_post_prob(fs, post_mu, post_sigma, eta = 0):
    '''Calculate the posterior probability of Pr(fs * b > eta)'''

    # Get beta's posterior mean and covariance
    _, _, beta_pmean, beta_psd = get_priors_alpha_beta(post_mu, post_sigma)

    # Calculate the mean of the fs*beta distribution
    fs_beta_mean = fs.T.dot(beta_pmean)

    # Calculate the variance of the fs*beta distribution
    fs_beta_cov = fs.T @ beta_psd @ fs

    # Calculate the probability of Pr(fs * b > eta) using cdf
    post_prob = 1 - stats.norm.cdf(eta, fs_beta_mean, np.sqrt(fs_beta_cov))

    # Clip the probability
    phi_prob = clip(post_prob)
    
    return phi_prob

In [None]:
def calculate_reward(post_mu, post_sigma, fs, gs, sigma, action, prob):
    '''Calculate the reward for a given action'''
    # Get priors for alpha and beta
    alpha_pmean, alpha_psd, beta_pmean, beta_psd = get_priors_alpha_beta(post_mu, post_sigma)

    # Sample alpha, beta and noise
    alpha0, alpha1, beta, et = sample_lr_params(alpha_pmean, alpha_psd, beta_pmean, beta_psd, sigma)

    # Calculate reward
    reward = gs @ alpha0 + (prob * (fs @ alpha1)) + (action - prob) * (fs @ beta) + et

    return reward

In [None]:
def calculate_phi(prob_matrix, action_matrix, fs_matrix, gs_matrix):
    '''Calculate phi for each user at each time point'''
    Phi = np.expand_dims(np.hstack((gs_matrix, fs_matrix * prob_matrix.reshape(-1, 1), \
                (fs_matrix * (action_matrix - prob_matrix).reshape(-1, 1)))), axis=2)
    return Phi

In [None]:
def calculate_post_sigma(prior_sigma, sigma, availability_matrix, Phi):
    '''Calculate the posterior sigma'''

    # Phi squared
    Phi_square = np.multiply(Phi, Phi.transpose(0, 2, 1))

    # Sum of availability times Phi squared
    avail_phi_squared_sum = np.sum(np.multiply(availability_matrix.reshape(-1, 1, 1), Phi_square), axis=0) / (sigma**2)

    # Posterior sigma
    post_sigma = np.linalg.inv(np.linalg.inv(prior_sigma) + avail_phi_squared_sum)

    return post_sigma

In [None]:
def calculate_post_mu(prior_sigma, prior_mu, sigma, availability_matrix, reward_matrix, Phi, post_sigma):
    '''Calculate the posterior mu'''

    # Product of prior sigma inverse and prior mu
    sig_mu = (np.linalg.inv(prior_sigma) @ prior_mu.T).reshape(-1, 1)
    
    # Product of Phi and reward
    Phi_reward = np.multiply(Phi, reward_matrix.reshape(-1, 1, 1))

    # Sum of availability times Phi and reward
    avail_phi_reward_sum = np.sum(np.multiply(availability_matrix.reshape(-1, 1, 1), Phi_reward), axis=0)

    # Posterior mu
    post_mu = (post_sigma @ (sig_mu + avail_phi_reward_sum)) / (sigma ** 2)

    return post_mu

In [None]:
def calculate_posterior(prior_sigma, prior_mu, sigma, availability_matrix, prob_matrix, reward_matrix, action_matrix, fs_matrix, gs_matrix):
    '''Calculate the posterior distribution'''
    
    # Calculate phi(s, a)
    Phi = calculate_phi(prob_matrix, action_matrix, fs_matrix, gs_matrix)

    # Calculate posterior sigma
    post_sigma = calculate_post_sigma(prior_sigma, sigma, availability_matrix, Phi)

    # Calculate posterior mu
    post_mu = calculate_post_mu(prior_sigma, prior_mu, sigma, availability_matrix, reward_matrix, Phi, post_sigma)
    
    return post_mu, post_sigma


In [None]:
def select_action(p):
    '''Select action from bernoulli distribution with probability p'''
    return stats.bernoulli.rvs(p)

In [None]:
# Load data
data = load_data()

# Load priors
prior_sigma, prior_mu, sigma = load_priors()

# DS to store availability, probabilities, features, actions and rewards
availability_matrix = np.zeros((NUSERS, NDAYS * NTIMES))
prob_matrix = np.zeros((NUSERS, NDAYS * NTIMES))
reward_matrix = np.zeros((NUSERS, NDAYS * NTIMES))
action_matrix = np.zeros((NUSERS, NDAYS * NTIMES))
fs_matrix = np.zeros((NUSERS, NDAYS * NTIMES, len(F_KEYS)))
gs_matrix = np.zeros((NUSERS, NDAYS * NTIMES, len(G_KEYS)))


for user in range(NUSERS):

    # Initializing dosage to first dosage value (can be non-zero if user was already in the trial)
    dosage = data[user][0][6]

    # Posterior initialized using priors
    post_sigma, post_mu = np.copy(prior_sigma), np.copy(prior_mu)

    for day in range(NDAYS):

        # loop for each decision time during the day
        for time in range(NTIMES):

            # Get the current timeslot
            ts = (day) * 5 + time
            
            # State of the user at time ts
            availability, fs, gs, dosage = determine_user_state(data[user][ts], dosage)

            # Save user's availability
            availability_matrix[user, ts] = availability
            
            # If user is available
            if availability == 1:

                # Calculate probability of (fs x beta) > n
                prob_fsb = calculate_post_prob(fs, post_mu, post_sigma)
                

                # Sample action with probability prob_fsb from bernoulli distribution
                action = select_action(prob_fsb)

                # Bayesian LR to estimate reward
                reward = calculate_reward(post_mu, post_sigma, fs, gs, sigma, action, prob_fsb)

                # Save probability, features, action and reward
                fs_matrix[user, ts] = fs
                gs_matrix[user, ts] = gs
                prob_matrix[user, ts] = prob_fsb
                action_matrix[user, ts] = action
                reward_matrix[user, ts] = reward
            
                # print(user, day, time, prob_fsb, action_matrix[user, ts])

        # Update posterior
        post_mu, post_sigma = calculate_posterior(prior_sigma, prior_mu, sigma, availability_matrix[user][:ts + 1], prob_matrix[user][:ts + 1], 
                                                    reward_matrix[user][:ts + 1], action_matrix[user][:ts + 1], fs_matrix[user][:ts + 1], gs_matrix[user][:ts + 1])

In [None]:
# data = load_data()

# import pandas as pd

# df = pd.DataFrame(columns=["Last Dosage", "Last action", "AS", "Next Dosage"])

# for user in range(1,2):
#     for day in range(NDAYS):
#         for time in range(NTIMES):
#             ts = (day * 5) + time
#             dosage = data[user][ts][6]
#             if ts != 0:
#                 last_dosage = data[user][ts - 2][6]
#                 anti_sedentary = data[user][ts][14]
#                 last_action = data[user][ts - 1][4]

#                 # print(last_dosage, last_action, anti_sedentary, dosage)
#                 df.loc[-1] = [last_dosage, last_action, anti_sedentary, dosage]
#                 df.index = df.index + 1
#                 newdosage = 0.95 * last_dosage
#                 if last_action == 1. or anti_sedentary == 1.:
#                     newdosage += 1.
#                 print(ts, dosage, newdosage)
#             # print(data[user][ts])
#     break

In [None]:
# pd.set_option('display.max_rows', None)
# print(df)

In [None]:
# pd.DataFrame(data[1, :100])[[4, 14, 6]]