# **Team**

> Balestrieri Niccolò - 10936955 <br>
  Bertogalli Andrea - 10702303 <br>
  Cavalieri Francesco - 11020855    
  Tombini Nicolò - 10912627


## Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from scipy import stats
from scipy import optimize

## Constants

In [None]:
class CampainSettings:
  def __init__(self):
    self.n_advertisers = 4
    self.ctrs = np.ones(self.n_advertisers)
    self.T = 250
    self.n_customers = 1500
    self.value_per_click = 1
    self.my_valuation = self.ctrs[0]*self.value_per_click
    self.B = 135
    self.days = 10
    self.n_trials = 10
    self.min_price = 0
    self.max_price = 1
    self.K = 50
    self.cost = 0.25
    self.prices_UCB = np.linspace(self.min_price, self.max_price, self.K)
    self.prices_clairvoyant = np.linspace(self.min_price, self.max_price, self.K)
    self.conversion_probability = lambda p: 1-p

c = CampainSettings()

## Pricing

### Environment definition

In [None]:
class PricingEnvironment:
    def __init__(self, conversion_probability, cost):
        self.conversion_probability = conversion_probability
        self.cost = cost

    def round(self, p_t, n_t):
        d_t = np.random.binomial(n_t, self.conversion_probability(p_t))
        r_t = (p_t - self.cost)*d_t
        return d_t, r_t

### Agents definition

In [None]:
class RBFGaussianProcess:
    def __init__(self, scale=1, reg=1e-2):
        self.scale = scale
        self.reg = reg
        self.k_xx_inv = None

    def rbf_kernel_incr_inv(self, B, C, D):
        temp = np.linalg.inv(D - C @ self.k_xx_inv @ B)
        block1 = self.k_xx_inv + self.k_xx_inv @ B @ temp @ C @ self.k_xx_inv
        block2 = - self.k_xx_inv @ B @ temp
        block3 = - temp @ C @ self.k_xx_inv
        block4 = temp
        res1 = np.concatenate((block1, block2), axis=1)
        res2 = np.concatenate((block3, block4), axis=1)
        res = np.concatenate((res1, res2), axis=0)
        return res

    def rbf_kernel(self, a, b):
        a_ = a.reshape(-1, 1)
        b_ = b.reshape(-1, 1)
        output = -1 * np.ones((a_.shape[0], b_.shape[0]))
        for i in range(a_.shape[0]):
            output[i, :] = np.power(a_[i] - b_, 2).ravel()
        return np.exp(-self.scale * output)

    def fit(self, x=np.array([]), y=np.array([])):
        x,y = np.array(x),np.array(y)
        if self.k_xx_inv is None:
            self.y = y.reshape(-1,1)
            self.x = x.reshape(-1,1)
            k_xx = self.rbf_kernel(self.x, self.x) + self.reg * np.eye(self.x.shape[0])
            self.k_xx_inv = np.linalg.inv(k_xx)
        else:
            B = self.rbf_kernel(self.x, x)
            self.x = np.vstack((self.x, x))
            self.y = np.vstack((self.y, y))
            self.k_xx_inv = self.rbf_kernel_incr_inv(B, B.T, np.array([1 + self.reg]))

        return self

    def predict(self, x_predict):
        k = self.rbf_kernel(x_predict, self.x)

        mu_hat = k @ self.k_xx_inv @ self.y
        sigma_hat = 1 - np.diag(k @ self.k_xx_inv @ k.T)

        return mu_hat.ravel(), sigma_hat.ravel()

In [None]:
class GPUCBAgent:
    def __init__(self, T, discretization=100):
        self.T = T
        self.arms = np.linspace(0, 1, discretization)
        self.gp = RBFGaussianProcess(scale=2).fit()
        self.a_t = None
        self.action_hist = np.array([])
        self.reward_hist = np.array([])
        self.mu_t = np.zeros(discretization)
        self.sigma_t = np.zeros(discretization)
        self.gamma = lambda t: np.log(t+1)**2
        self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))
        self.N_pulls = np.zeros(discretization)
        self.t = 0

    def pull_arm(self):
        self.mu_t, self.sigma_t = self.gp.predict(self.arms)
        ucbs = self.mu_t + self.beta(self.t) * self.sigma_t
        self.a_t = np.argmax(ucbs)
        return self.arms[self.a_t]

    def update(self, r_t):
        self.N_pulls[self.a_t] += 1
        self.action_hist = np.append(self.action_hist, self.arms[self.a_t])
        self.reward_hist = np.append(self.reward_hist, r_t)
        self.gp = self.gp.fit(self.arms[self.a_t], r_t)
        self.t += 1

### Pricing iterations

#### Profit Curve

In [None]:
conversion_probability = lambda p: 1-p
reward_function = lambda price, n_sales: (price-c.cost)*n_sales
maximum_profit = reward_function(max(c.prices_clairvoyant), c.n_customers)

profit_curve = reward_function(c.prices_clairvoyant, c.n_customers*conversion_probability(c.prices_clairvoyant))
best_price_index = np.argmax(profit_curve)
best_price = c.prices_clairvoyant[best_price_index]
expected_clairvoyant_rewards = np.repeat(profit_curve[best_price_index], c.T)

In [None]:
positive_indices = profit_curve > 0
positive_prices = c.prices_clairvoyant[positive_indices]
positive_profits = profit_curve[positive_indices]

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=positive_prices,
    y=positive_profits,
    mode='lines',
    name='Profit Curve'
))

fig.add_trace(go.Scatter(
    x=[best_price, best_price],
    y=[min(positive_profits), max(positive_profits)],
    mode='lines',
    line=dict(color='green', dash='dash'),
    name='Optimal Price'
))

fig.update_layout(
    title="Profit Curve",
    xaxis_title='Price',
    yaxis_title='Profit',
    width=700,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
)

fig.show()

In [None]:

x = np.linspace(0, 3, 500)
y = np.where(x < 1, 1, 0)

fig = go.Figure()

fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='y = 1 for x < 1, y = 0 for x > 1'))

fig.update_layout(
    title="Random uniform distribution",
    xaxis_title="x",
    yaxis_title="y"
)

fig.show()


#### Run algorithm

In [None]:
actions = []
rewards = []
regret_per_trial = []

for seed in range(c.n_trials):
    np.random.seed(seed)
    env = PricingEnvironment(conversion_probability=conversion_probability, cost=c.cost)
    ucb_agent = GPUCBAgent(c.T, c.K)

    agent_rewards = np.array([])

    for t in range(c.T):
      pi_t = ucb_agent.pull_arm()
      d_t, r_t = env.round(pi_t, c.n_customers)

      ucb_agent.update(r_t/c.n_customers)
      agent_rewards = np.append(agent_rewards, r_t)

    actions.append(ucb_agent.action_hist)
    rewards.append(ucb_agent.reward_hist)

    cumulative_regret = np.cumsum(expected_clairvoyant_rewards-agent_rewards)
    regret_per_trial.append(cumulative_regret)

regret_per_trial = np.array(regret_per_trial)

average_regret = regret_per_trial.mean(axis=0)
regret_sd = regret_per_trial.std(axis=0)

lower_bound = average_regret - regret_sd / np.sqrt(c.n_trials)
upper_bound = average_regret + regret_sd / np.sqrt(c.n_trials)

In [None]:
t_values = np.arange(c.T)
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=np.concatenate([t_values, t_values[::-1]]),
    y=np.concatenate([lower_bound, upper_bound[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 0, 255, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo='skip',
    showlegend=True,
    name='Uncertainty'
))

fig.add_trace(go.Scatter(
    x=t_values,
    y=average_regret,
    mode='lines',
    line=dict(color='blue'),
    name='Average Regret'
))

fig.update_layout(
    title='Cumulative regret of GP-UCB',
    xaxis_title='t',
    yaxis_title='Regret',
    width=700,
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
    hovermode='x unified'
)

fig.show()

#### Final estimated profit curve

In [None]:
actions = np.array(actions)
actions_mean = actions.mean(axis=0)
rewards = np.array(rewards)
rewards_mean = rewards.mean(axis=0)

prices = actions_mean
profits = rewards_mean

gp = RBFGaussianProcess(scale=2)
gp.fit(prices, profits)
x_vals = np.linspace(0, 1, c.K)
mu, sigma = gp.predict(x_vals)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=np.concatenate([x_vals, x_vals[::-1]]),
    y=np.concatenate([mu - sigma, (mu + sigma)[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 0, 255, 0.3)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo="skip",
    showlegend=True,
    name='Uncertainty'
))

fig.add_trace(go.Scatter(
    x=x_vals,
    y=mu,
    mode='lines',
    line=dict(color='blue'),
    name='Average Normalized Profit'
))

fig.add_trace(go.Scatter(
    x=prices,
    y=profits,
    mode='markers',
    marker=dict(color='orange'),
    name='Samples'
))

fig.add_trace(go.Scatter(
    x=[best_price, best_price],
    y=[min(mu - sigma), max(mu + sigma)],
    mode='lines',
    line=dict(color='red', dash='dash'),
    name='Optimal Price'
))

fig.update_layout(
    title='GPUCB - Final estimated profit curve (normalized)',
    xaxis_title='Price',
    yaxis_title='Profit',
    width=900,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    hovermode='x unified'
)
fig.show()

## Bidding

### Environment definition

In [None]:
class Auction:
    def __init__(self, *args, **kwargs):
        pass

    def get_winners(self, bids):
        pass

    def get_payments_per_click(self, winners, values, bids):
        pass

    def round(self, bids):
        winners, values = self.get_winners(bids)
        payments_per_click = self.get_payments_per_click(winners, values, bids)
        return winners, payments_per_click

In [None]:
class SecondPriceAuction(Auction):
    def __init__(self, ctrs):
        self.ctrs = ctrs
        self.n_adv = len(self.ctrs)

    def get_winners(self, bids):
        adv_values = self.ctrs*bids
        adv_ranking = np.argsort(adv_values)
        winner = adv_ranking[-1]
        return winner, adv_values

    def get_payments_per_click(self, winners, values, bids):
        adv_ranking = np.argsort(values)
        second = adv_ranking[-2]
        payment = values[second]/self.ctrs[winners]
        return payment.round(2)

### Agent definition

In [None]:
class Agent:
  def __init__(self):
    raise NotImplementedError

  def pull_arm(self):
    raise NotImplementedError

  def update(self, r_t):
    raise NotImplementedError

In [None]:
class SecondPriceMultiplicativePacingAgent:
    def __init__(self, valuation, budget, T, eta):
        self.valuation = valuation
        self.budget = budget
        self.eta = eta
        self.T = T
        self.rho = self.budget/self.T
        self.lmbd = 1
        self.t = 0

    def bid(self):
        if self.budget < 1:
            return 0
        return self.valuation/(self.lmbd+1)

    def update(self, f_t, c_t):
        self.lmbd = np.clip(self.lmbd-self.eta*(self.rho-c_t), a_min=0, a_max=1/self.rho)
        self.budget -= c_t

In [None]:
class UCBLikeAgent(Agent):
  def __init__(self, K, T, range=1):
    self.K = K
    self.T = T
    self.range = range
    self.a_t = None
    self.average_profit = np.zeros(self.K)
    self.average_cost = np.zeros(self.K)
    self.profit_ucbs = np.zeros(self.K)
    self.cost_ucbs = np.zeros(self.K)
    self.N_pulls = np.zeros(self.K)
    self.arm_probs = np.zeros(self.K)
    self.t = 0

  def pull_arm(self, rho):
    if self.t < self.K:
      self.a_t = self.t
    else:
      self.profit_ucbs = self.average_profit + self.range*np.sqrt(2*np.log(self.T)/self.N_pulls)
      self.cost_ucbs = self.average_cost - self.range*np.sqrt(2*np.log(self.T)/self.N_pulls)

      c = -self.profit_ucbs
      A_ub = [self.cost_ucbs]
      b_ub = [rho]
      A_eq = [np.ones(self.K)]
      b_eq = [1]
      res = optimize.linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=(0,1))
      gamma = res.x

      self.arm_probs = gamma
      bid_index = np.random.choice(self.K, p=gamma)
      self.a_t = bid_index

    return self.a_t

  def update(self, f_t, c_t):
    self.N_pulls[self.a_t] += 1
    self.average_profit[self.a_t] += (f_t - self.average_profit[self.a_t])/self.N_pulls[self.a_t]
    self.average_cost[self.a_t] += (c_t - self.average_cost[self.a_t])/self.N_pulls[self.a_t]

    self.t += 1

In [None]:
class FirstPriceMultiplicativePacingUCBLikeAgent(Agent):
  def __init__(self, bids_set, budget, T, eta):
    self.bids_set = bids_set
    self.K = len(bids_set)
    self.T = T
    self.ucb = UCBLikeAgent(self.K, self.T, range=0.01)
    self.budget = budget
    self.eta = eta
    self.rho = self.budget/self.T
    self.t = 0

  def bid(self):
    bid = self.bids_set[self.ucb.pull_arm(self.rho)]
    if self.budget < bid:
        bid = self.budget
    return bid

  def update(self, f_t, c_t):
    self.ucb.update(f_t, c_t)
    self.budget -= c_t

### Utils


In [None]:
def get_clairvoyant_truthful(B, my_valuation, m_t, n_users):
    utility = (my_valuation-m_t)*(my_valuation>=m_t)

    sorted_round_utility = np.flip(np.argsort(utility))
    clairvoyant_utilities = np.zeros(n_users)
    clairvoyant_bids= np.zeros(n_users)
    clairvoyant_payments = np.zeros(n_users)
    c = 0
    i = 0
    while c <= B-1 and i < n_users:
        clairvoyant_bids[sorted_round_utility[i]] = 1
        clairvoyant_utilities[sorted_round_utility[i]] = utility[sorted_round_utility[i]]
        clairvoyant_payments[sorted_round_utility[i]] = m_t[sorted_round_utility[i]]
        c += m_t[sorted_round_utility[i]]
        i+=1
    return clairvoyant_bids, clairvoyant_utilities, clairvoyant_payments

In [None]:
class LogsBiddingTrials:
  def __init__(self):
    pass

### Bidding iterations

In [None]:
def bidding_algorithm(campain_settings, atype):
  logs = LogsBiddingTrials()
  all_regrets = []
  cum_all_regrets = []
  cum_all_payments = []
  all_bids = []
  all_wins = []
  all_rewards = []
  all_utilities = []
  all_clairvoyant_utilities = []
  trial_wins_logs = np.array([])

  for trials in range(campain_settings.n_trials):
    np.random.seed(trials)
    auction = SecondPriceAuction(campain_settings.ctrs)
    agent = None
    if(atype == 'mpa'):
      agent = SecondPriceMultiplicativePacingAgent(valuation=c.my_valuation,
                                        budget=c.B,
                                        T=c.n_customers,
                                        eta=1/np.sqrt(c.n_customers))
    else:
      agent = FirstPriceMultiplicativePacingUCBLikeAgent(bids_set = c.prices_UCB,
                                          budget=c.B,
                                          T=c.n_customers,
                                          eta=1/np.sqrt(c.n_customers))

    # competitors
    other_bids = np.random.uniform(0, 1, size = (campain_settings.n_advertisers-1, campain_settings.n_customers))
    m_t = other_bids.max(axis=0)

    clairvoyant_bids, clairvoyant_utilities, clairvoyant_payments = get_clairvoyant_truthful(campain_settings.B, campain_settings.my_valuation, m_t, campain_settings.n_customers)
    all_clairvoyant_utilities.append(clairvoyant_utilities)
    eta = 1/np.sqrt(campain_settings.n_customers)

    utilities = np.array([])
    my_bids = np.array([])
    my_payments = np.array([])
    my_wins = np.array([])
    trial_wins = np.array([])

    for u in range(campain_settings.n_customers):
        my_bid = agent.bid()
        bids = np.append(my_bid, other_bids[:, u].ravel())
        winners, payments_per_click = auction.round(bids=bids)
        my_win = int(winners==0)
        f_t, c_t = (campain_settings.my_valuation-m_t[u])*my_win, m_t[u]*my_win
        if(winners==0):
           trial_wins = np.append(trial_wins, f_t)
        agent.update(f_t, c_t)

        utilities = np.append(utilities, f_t)
        my_bids = np.append(my_bids, my_bid)
        my_payments = np.append(my_payments, c_t)
        my_wins = np.append(my_wins, my_win)

    trial_wins_logs = np.append(trial_wins_logs, trial_wins)

    diff = clairvoyant_utilities-utilities

    all_regrets.append(np.cumsum(clairvoyant_utilities-utilities))
    negative_values = diff[diff < 0]

    all_regrets.append(np.cumsum(clairvoyant_utilities-utilities))
    cum_all_payments.append(np.cumsum(my_payments))
    cum_all_regrets.append(all_regrets)
    all_bids.append(my_bids)
    all_wins.append(np.cumsum(my_wins))
    all_rewards.append(np.cumsum(utilities))
    all_utilities.append(utilities)

  all_regrets = np.vstack(all_regrets)

  all_bids = np.vstack(all_bids)
  all_utilities = np.vstack(all_utilities)
  cum_all_payments = np.vstack(cum_all_payments)
  cum_all_regrets = np.vstack(cum_all_regrets)
  all_clairvoyant_utilities = np.vstack(all_clairvoyant_utilities)

  avg_regret = np.array(all_regrets).mean(axis=0)
  std_regret = np.array(all_regrets).std(axis=0)

  avg_wins = np.array(all_wins).mean(axis=0)
  std_wins = np.array(all_wins).std(axis=0)

  avg_bids = np.array(all_bids).mean(axis=0)
  std_bids = np.array(all_bids).std(axis=0)

  avg_payments = np.array(cum_all_payments).mean(axis=0)
  std_payments = np.array(cum_all_payments).std(axis=0)

  avg_rewards = np.array(all_rewards).mean(axis=0)
  std_rewards = np.array(all_rewards).std(axis=0)

  logs.__dict__['avg_regret'] = avg_regret
  logs.__dict__['std_regret'] = std_regret
  logs.__dict__['avg_payments'] = avg_payments
  logs.__dict__['std_payments'] = std_payments
  logs.__dict__['avg_bids'] = avg_bids
  logs.__dict__['std_bids'] = std_bids
  logs.__dict__['avg_wins'] = avg_wins
  logs.__dict__['std_wins'] = std_wins
  logs.__dict__['avg_rewards'] = avg_rewards
  logs.__dict__['avg_rewards'] = std_rewards
  logs.__dict__['trials_bids'] = all_bids
  logs.__dict__['cum_all_payments']= cum_all_payments
  logs.__dict__['cum_all_regrets']= all_regrets
  logs.__dict__['all_bids'] = all_bids
  logs.__dict__['all_utilities'] = all_utilities
  logs.__dict__['trial_wins_logs'] = trial_wins_logs
  logs.__dict__['all_clairvoyant_utilities'] = all_clairvoyant_utilities

  return logs

#### Bidding plots

In [None]:
def plot_winned_auctions_distribution(logs, title):
  merged_array_hstack = np.hstack(logs)
  mask = merged_array_hstack > 0
  positive_mean_values = merged_array_hstack[mask]

  median_reward = np.median(positive_mean_values)

  fig = go.Figure(data=[go.Histogram(x=positive_mean_values, nbinsx=30, marker=dict(color='blue', line=dict(color='black', width=1)), name=f'Frequency')])

  fig.add_shape(
    type="line",
    x0=median_reward, x1=median_reward,
    y0=0, y1=1,
    xref='x', yref='paper',
    line=dict(color="red", width=2, dash="dash"),
    name=f'Median: {median_reward:.2f}',
  )

  fig.add_trace(go.Scatter(
    x=[median_reward], y=[0],
    mode='lines',
    line=dict(color="red", width=2, dash="dash"),
    name=f'Median: {median_reward:.2f}',
    showlegend=True
  ))

  fig.update_layout(
    title=title,
    xaxis_title='Reward Value (Utility)',
    yaxis_title='Frequency',
    legend=dict(x=0.7, y=0.95)
  )

  fig.show()

In [None]:
def plot_cumulative_payments(logs, B):

  mu = logs.mean(axis=0)
  sd = logs.std(axis=0)

  fig = go.Figure()

  fig.add_shape(type="line",
              x0=0, x1=logs.shape[1] - 1,
              y0=B, y1=B,
              line=dict(color="red", width=2),
              name='Budget',
              showlegend=True)

  fig.add_trace(go.Scatter(
    x=np.arange(logs.shape[1]),
    y=mu,
    mode='lines',
    name='Mean',
    line=dict(color='blue'),
  ))

  fig.add_trace(go.Scatter(
    x=np.concatenate([np.arange(logs.shape[1]), np.arange(logs.shape[1])[::-1]]),
    y=np.concatenate([mu - sd, (mu + sd)[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 100, 250, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Uncertainty',
    showlegend=True
  ))

  fig.update_layout(
    title='Cumulative Payment',
    xaxis_title='$c_t$',
    yaxis_title='Cumulative payment',
    legend=dict(x=0.8, y=0.2),
  )

  fig.show()

In [None]:
def plot_chosen_bids(logs):
  mu = logs.mean(axis=0)
  sd = logs.std(axis=0)

  fig = go.Figure()

  fig.add_trace(go.Scatter(
      x=np.arange(logs.shape[1]),
      y=mu,
      mode='lines',
      name='Mean',
      line=dict(color='blue')
  ))

  fig.add_trace(go.Scatter(
      x=np.concatenate([np.arange(logs.shape[1]), np.arange(logs.shape[1])[::-1]]),
      y=np.concatenate([mu - sd, (mu + sd)[::-1]]),
      fill='toself',
      fillcolor='rgba(0, 100, 250, 0.2)',
      line=dict(color='rgba(255,255,255,0)'),
      name='Uncertainty',
      showlegend=True
  ))

  fig.update_layout(
      title='Chosen Bids',
      xaxis_title='$c_t$',
      yaxis_title='Bids',
  )

  fig.show()

In [None]:
def plot_regret(logs):
  mu = logs.mean(axis=0)
  sd = logs.std(axis=0)

  fig = go.Figure()

  fig.add_trace(go.Scatter(
    x=np.arange(logs.shape[1]),
    y=mu,
    mode='lines',
    name='Mean',
    line=dict(color='blue')
  ))

  fig.add_trace(go.Scatter(
    x=np.concatenate([np.arange(logs.shape[1]), np.arange(logs.shape[1])[::-1]]),
    y=np.concatenate([mu - sd, (mu + sd)[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 100, 250, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Uncertainty',
    showlegend=True
  ))

  fig.update_layout(
    title='Cumulative Regret',
    xaxis_title='$c_t$',
    yaxis_title='$R_t$',
    legend_title="Legend",
  )

  fig.show()

In [None]:
def plot_profit_trials(logs):
  mu = logs.mean(axis=0)
  sd = logs.std(axis=0)

  fig = go.Figure()

  fig.add_trace(go.Scatter(
    x=np.arange(logs.shape[1]),
    y=mu,
    mode='lines',
    name='Mean',
    line=dict(color='blue'),
  ))

  fig.add_trace(go.Scatter(
    x=np.concatenate([np.arange(logs.shape[1]), np.arange(logs.shape[1])[::-1]]),
    y=np.concatenate([mu - sd, (mu + sd)[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 100, 250, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Uncertainty',
    showlegend=True
  ))

  fig.update_layout(
    title='Profits',
    xaxis_title='$t$',
    yaxis_title='$b_t$',
    legend=dict(x=0.8, y=0.2),
  )

  fig.show()

In [None]:
def plot_prices_trials(logs):
  mu = logs.mean(axis=0)
  sd = logs.std(axis=0)

  fig = go.Figure()

  fig.add_trace(go.Scatter(
    x=np.arange(logs.shape[1]),
    y=mu,
    mode='lines',
    name='Mean',
    line=dict(color='blue'),
  ))

  fig.add_trace(go.Scatter(
    x=np.concatenate([np.arange(logs.shape[1]), np.arange(logs.shape[1])[::-1]]),
    y=np.concatenate([mu - sd, (mu + sd)[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 100, 250, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Uncertainty',
    showlegend=True
  ))

  fig.update_layout(
    title='Prices Trials',
    xaxis_title='$t$',
    yaxis_title='$b_t$',
    legend=dict(x=0.8, y=0.2),
  )

  fig.show()

#### Bidding algorithm

In [None]:
logs_mpa = bidding_algorithm(c, 'mpa')

In [None]:
plot_winned_auctions_distribution(logs_mpa.all_clairvoyant_utilities, "Distribution of utilies - clairvoyant")

In [None]:
plot_winned_auctions_distribution(logs_mpa.trial_wins_logs, "Distribution of utilies - algorithm")

In [None]:
plot_cumulative_payments(logs_mpa.cum_all_payments, c.B)

In [None]:
plot_chosen_bids(logs_mpa.all_bids)

In [None]:
plot_regret(logs_mpa.cum_all_regrets)

In [None]:
logs_ucblike = bidding_algorithm(c, 'ucblike')

In [None]:
plot_winned_auctions_distribution(logs_ucblike.all_clairvoyant_utilities, "Distribution of utilies - clairvoyant")

In [None]:
plot_winned_auctions_distribution(logs_ucblike.trial_wins_logs, "Distribution of utilies - algorithm")

In [None]:
plot_cumulative_payments(logs_ucblike.cum_all_payments, c.B)

In [None]:
plot_chosen_bids(logs_ucblike.all_bids)

In [None]:
plot_regret(logs_ucblike.cum_all_regrets)

# Pricing + Bidding

In [None]:
def bidding_per_day(auction_type, campaign):
    logs = LogsBiddingTrials()
    cumulative_regrets = []
    cumulative_payments = []
    bid_histories = []
    win_histories = []
    reward_histories = []
    utility_histories = []
    clairvoyant_utility_histories = []
    trial_win_logs = np.array([])

    trial_win_results = []
    profit_over_time = []
    daily_prices = []

    for trial_index in range(campaign.n_trials):
        print(f'+Trial {trial_index}')
        pricing_agent = GPUCBAgent(campaign.days, campaign.K)
        pricing_environment = PricingEnvironment(
            conversion_probability=campaign.conversion_probability,
            cost=campaign.cost
        )
        np.random.seed(trial_index)

        daily_profits = []
        trial_prices = []

        for day_index in range(campaign.days):
            daily_win_data = []
            np.random.seed(trial_index + day_index)
            sell_price = pricing_agent.pull_arm()
            trial_prices.append(sell_price)
            auction = SecondPriceAuction(campaign.ctrs)
            pacing_agent = None

            if auction_type == 'mpa':
                pacing_agent = SecondPriceMultiplicativePacingAgent(
                    valuation=campaign.my_valuation,
                    budget=campaign.B,
                    T=campaign.n_customers,
                    eta=1/np.sqrt(campaign.n_customers)
                )
            else:
                pacing_agent = FirstPriceMultiplicativePacingUCBLikeAgent(
                    bids_set=campaign.prices_UCB,
                    budget=campaign.B,
                    T=campaign.n_customers,
                    eta=1/np.sqrt(campaign.n_customers)
                )

            competitor_bids = np.random.uniform(0, 1, size=(campaign.n_advertisers - 1, campaign.n_customers))
            max_competitor_bids = competitor_bids.max(axis=0)

            clairvoyant_bids, clairvoyant_utilities, clairvoyant_payments = get_clairvoyant_truthful(
                campaign.B, campaign.my_valuation, max_competitor_bids, campaign.n_customers
            )
            clairvoyant_utility_histories.append(clairvoyant_utilities)
            eta = 1 / np.sqrt(campaign.n_customers)

            utilities = np.array([])
            agent_bids = np.array([])
            agent_payments = np.array([])
            agent_wins = np.array([])
            trial_wins = np.array([])

            for customer_index in range(campaign.n_customers):
                bid = pacing_agent.bid()
                bids = np.append(bid, competitor_bids[:, customer_index].ravel())
                winners, payments_per_click = auction.round(bids=bids)
                win_status = int(winners == 0)
                revenue, cost = (campaign.my_valuation - max_competitor_bids[customer_index]) * win_status, max_competitor_bids[customer_index] * win_status

                if winners == 0:
                    trial_wins = np.append(trial_wins, revenue)
                pacing_agent.update(revenue, cost)
                utilities = np.append(utilities, revenue)
                agent_bids = np.append(agent_bids, bid)
                agent_payments = np.append(agent_payments, cost)
                agent_wins = np.append(agent_wins, win_status)

            trial_win_logs = np.append(trial_win_logs, trial_wins)
            cumulative_regrets.append(np.cumsum(clairvoyant_utilities - utilities))
            cumulative_payments.append(np.cumsum(agent_payments))
            bid_histories.append(agent_bids)
            win_histories.append(np.cumsum(agent_wins))
            reward_histories.append(np.cumsum(utilities))
            utility_histories.append(utilities)

            num_clicks = len(agent_wins) * campaign.ctrs[0]
            items_sold, total_profit = pricing_environment.round(sell_price, num_clicks)
            pricing_agent.update(total_profit / num_clicks)

            daily_profits.append(total_profit)

        daily_prices.append(trial_prices)
        profit_over_time.append(daily_profits)

    daily_prices = np.vstack(daily_prices)
    profit_over_time = np.vstack(profit_over_time)
    bid_histories = np.vstack(bid_histories)
    utility_histories = np.vstack(utility_histories)
    cumulative_payments = np.vstack(cumulative_payments)
    cumulative_regrets = np.vstack(cumulative_regrets)
    clairvoyant_utility_histories = np.vstack(clairvoyant_utility_histories)

    avg_regret = np.array(cumulative_regrets).mean(axis=0)
    std_regret = np.array(cumulative_regrets).std(axis=0)

    avg_wins = np.array(win_histories).mean(axis=0)
    std_wins = np.array(win_histories).std(axis=0)

    avg_bids = np.array(bid_histories).mean(axis=0)
    std_bids = np.array(bid_histories).std(axis=0)

    avg_payments = np.array(cumulative_payments).mean(axis=0)
    std_payments = np.array(cumulative_payments).std(axis=0)

    avg_rewards = np.array(reward_histories).mean(axis=0)
    std_rewards = np.array(reward_histories).std(axis=0)

    print(profit_over_time.shape)

    logs.__dict__['trials_prices'] = daily_prices
    logs.__dict__['trials_profits'] = profit_over_time
    logs.__dict__['avg_regret'] = avg_regret
    logs.__dict__['std_regret'] = std_regret
    logs.__dict__['avg_payments'] = avg_payments
    logs.__dict__['std_payments'] = std_payments
    logs.__dict__['avg_bids'] = avg_bids
    logs.__dict__['std_bids'] = std_bids
    logs.__dict__['avg_wins'] = avg_wins
    logs.__dict__['std_wins'] = std_wins
    logs.__dict__['avg_rewards'] = avg_rewards
    logs.__dict__['std_rewards'] = std_rewards
    logs.__dict__['trials_bids'] = bid_histories
    logs.__dict__['cum_all_payments'] = cumulative_payments
    logs.__dict__['cum_all_regrets'] = cumulative_regrets
    logs.__dict__['all_bids'] = bid_histories
    logs.__dict__['all_utilities'] = utility_histories
    logs.__dict__['trial_win_logs'] = trial_win_logs
    logs.__dict__['all_clairvoyant_utilities'] = clairvoyant_utility_histories

    return logs


In [None]:
logs_UCBLike = bidding_per_day("ucblike", c)

+Trial 0
+Trial 1
+Trial 2
+Trial 3
+Trial 4
+Trial 5
+Trial 6
+Trial 7
+Trial 8
+Trial 9
(10, 10)


In [None]:
logs_MPA = bidding_per_day("mpa", c)

+Trial 0
+Trial 1
+Trial 2
+Trial 3
+Trial 4
+Trial 5
+Trial 6
+Trial 7
+Trial 8
+Trial 9
(10, 10)


#### Plots profits

In [None]:
def plot_profit_trials_interaction(logs1, logs2):
    mu1 = logs1.mean(axis=0)
    sd1 = logs1.std(axis=0)

    mu2 = logs2.mean(axis=0)
    sd2 = logs2.std(axis=0)

    fig = go.Figure()

    # Prima curva
    fig.add_trace(go.Scatter(
        x=np.arange(logs1.shape[1]),
        y=mu1,
        mode='lines',
        name='MPA',
        line=dict(color='blue'),
    ))

    fig.add_trace(go.Scatter(
        x=np.concatenate([np.arange(logs1.shape[1]), np.arange(logs1.shape[1])[::-1]]),
        y=np.concatenate([mu1 - sd1, (mu1 + sd1)[::-1]]),
        fill='toself',
        fillcolor='rgba(0, 100, 250, 0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        name='Uncertainty MPA',
        showlegend=True
    ))

    # Seconda curva
    fig.add_trace(go.Scatter(
        x=np.arange(logs2.shape[1]),
        y=mu2,
        mode='lines',
        name='UCBLIKE',
        line=dict(color='red'),
    ))

    fig.add_trace(go.Scatter(
        x=np.concatenate([np.arange(logs2.shape[1]), np.arange(logs2.shape[1])[::-1]]),
        y=np.concatenate([mu2 - sd2, (mu2 + sd2)[::-1]]),
        fill='toself',
        fillcolor='rgba(250, 100, 100, 0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        name='Uncertainty UCBLIKE',
        showlegend=True
    ))

    # Layout del grafico
    fig.update_layout(
        title='Profits',
        xaxis_title='Days',
        yaxis_title='Profit',
        legend=dict(x=0.8, y=0.2),
    )

    fig.show()


In [None]:
print(logs_UCBLike.trials_profits)

[[-375.            0.          194.46428571  199.74489796  210.84183673
   207.39795918  205.86734694  204.71938776  212.37244898  193.23979592]
 [-375.            0.          194.46428571  203.73979592  205.96428571
   205.86734694  204.71938776  212.37244898  193.23979592  214.66836735]
 [-375.            0.          197.55102041  200.18877551  205.86734694
   204.71938776  212.37244898  193.23979592  214.66836735  201.2755102 ]
 [-375.            0.          195.0255102   198.41326531  204.71938776
   212.37244898  193.23979592  214.66836735  201.2755102   209.31122449]
 [-375.            0.          193.90306122  197.5255102   212.37244898
   193.23979592  214.66836735  201.2755102   209.31122449  210.07653061]
 [-375.            0.          193.06122449  205.07142857  191.45408163
   214.66836735  201.2755102   209.31122449  210.07653061  206.25      ]
 [-375.            0.          198.67346939  184.65306122  214.08673469
   201.2755102   209.01530612  210.10204082  206.47959184 

In [None]:
plot_profit_trials_interaction(logs_MPA.trials_profits, logs_UCBLike.trials_profits)

#### Plot prices

In [None]:
def plot_prices_trials_interaction(logs1, logs2):
    mu1 = logs1.mean(axis=0)
    sd1 = logs1.std(axis=0)

    mu2 = logs2.mean(axis=0)
    sd2 = logs2.std(axis=0)

    fig = go.Figure()

    # Prima curva
    fig.add_trace(go.Scatter(
        x=np.arange(logs1.shape[1]),
        y=mu1,
        mode='lines',
        name='MPA',
        line=dict(color='blue'),
    ))

    fig.add_trace(go.Scatter(
        x=np.concatenate([np.arange(logs1.shape[1]), np.arange(logs1.shape[1])[::-1]]),
        y=np.concatenate([mu1 - sd1, (mu1 + sd1)[::-1]]),
        fill='toself',
        fillcolor='rgba(0, 100, 250, 0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        name='Uncertainty MPA',
        showlegend=True
    ))

    # Seconda curva
    fig.add_trace(go.Scatter(
        x=np.arange(logs2.shape[1]),
        y=mu2,
        mode='lines',
        name='UCBLIKE',
        line=dict(color='red'),
    ))

    fig.add_trace(go.Scatter(
        x=np.concatenate([np.arange(logs2.shape[1]), np.arange(logs2.shape[1])[::-1]]),
        y=np.concatenate([mu2 - sd2, (mu2 + sd2)[::-1]]),
        fill='toself',
        fillcolor='rgba(250, 100, 100, 0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        name='Uncertainty UCBLIKE',
        showlegend=True
    ))

    # Layout del grafico
    fig.update_layout(
        title='Prices',
        xaxis_title='Days',
        yaxis_title='Price',
        legend=dict(x=0.8, y=0.2),
    )

    fig.show()


In [None]:
plot_prices_trials_interaction(logs_UCBLike.trials_prices, logs_MPA.trials_prices)