In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from bnb.problem import OptimizationProblem
from matplotlib import pyplot as plt
import matplotlib
from scipy.optimize import minimize
import pandas as pd
from bnb.fml_solver import FMLSolver
import warnings
import time
from bnb.gradient_descent import GradientDescent
import seaborn as sns

from autograd.scipy.special import logsumexp
from autograd import jacobian
import autograd.numpy as anp

sns.set()
matplotlib.rcParams.update({'font.size': 11, 'font.family': 'serif'})

In [None]:
n = 5
n_choices = 10000
runs = 150
m = 2

In [None]:
class DataSimulator(OptimizationProblem):
    
    def revenue_true(self, p):
        return np.sum([
            segment.w * np.sum(p * segment.purchase_probabilities(p))
            for segment in self.segments
        ])
    
    def simulate_choices(self, n_observations, seed=None):
        
        np.random.seed(seed)
        
        observed_prices = np.random.uniform(self.p_lb, self.p_ub, size=(n_observations, self.n))
        
        purchase_probabilities = np.asarray([
            np.sum([segment.w * segment.purchase_probabilities(price) for segment in self.segments], axis=0)
            for price in observed_prices
        ])
            
        assert purchase_probabilities.shape == (n_observations, self.n)
        
        # 0 is first product, n is no purchase
        observed_choices = np.sum(
            np.cumsum(purchase_probabilities, axis=1) < np.random.uniform(size=(n_observations, 1)),
            axis=1
        )
        
        return observed_prices, observed_choices
    
    
def logit_loglikelihood(params, observed_choices, observed_prices):
    
    assert len(params) == 2 * n
    a, b = params[:n], params[n:]
    n_choices = len(observed_choices)
    
    utilities = a - b * observed_prices
    assert utilities.shape == (n_choices, n)
    
    utilities = anp.concatenate((utilities, anp.zeros((n_choices, 1))), axis=1)
    assert utilities.shape == (n_choices, n + 1)
    
    utilities_chosen_products = utilities[anp.arange(n_choices), observed_choices]
    assert utilities_chosen_products.shape == (n_choices, ), utilities_chosen_products.shape

    log_probs = - logsumexp(utilities - anp.expand_dims(utilities_chosen_products, -1), axis=-1)
    assert log_probs.shape == (n_choices, ), log_probs.shape
    
    return - anp.sum(log_probs) / n_choices

In [None]:
from tqdm import tqdm

results = []
for run in tqdm(range(runs)):
# for run in [32]:
# for run in [31, 32, 33, 34]:
    
    t0 = time.time()
    np.random.seed(run)

#     a0 = np.random.choice([0.0, 4.0, 8.0], size=(m, n))
#     b0 = np.random.choice([0.001, 0.005, 0.01], size=n)
#     w0 = np.random.uniform(0.01, 0.99)
#     b0 = np.random.uniform(0.001, 0.01, size=n)

    a0 = np.random.uniform(-7, 7, size=(m, n))
    b0 = np.random.uniform(0.001, 0.01, size=n)
#     b0 = np.random.choice(np.arange(0.001, 0.01, 0.001), size=n)
#     w0 = np.random.choice([0.05, 0.5, 0.95])
#     w0 = np.asarray([w0, 1 - w0])
    w0 = np.asarray([0.5, 0.5])
    
    gd = GradientDescent(a0, b0, w0)
    gd_sol = gd.solve()

    true_demand = DataSimulator(a0, b0, w0)
    solver = FMLSolver(true_demand, objective_lb=gd_sol, epsilon=0.005)
    solver.solve()

    # Simulate data
    observed_prices, observed_choices = true_demand.simulate_choices(n_choices, seed=run)

    # Estimate logit model
    t0 = time.time()
    a_start = np.mean(a0, axis=0)
    b_start = b0
    best_result = None
    any_success = False
    
    for attempt in range(20):
        
        parameter_start = np.hstack((a_start, b_start))

        with warnings.catch_warnings():
            warnings.filterwarnings(action="ignore", category=RuntimeWarning)
            jac = jacobian(logit_loglikelihood)
            min_ll = minimize(
                logit_loglikelihood,
                parameter_start,
                args=(observed_choices, observed_prices),
                jac=jac
            )
            
        any_success = any_success or min_ll.success

        if best_result is None:
            best_result = min_ll

        if min_ll.fun < best_result.fun:
            best_result = min_ll
        
        a_start, b_start = np.random.uniform(0, 8, size=n), np.random.uniform(0, 0.05, size=n)
    
    if not any_success:
        raise ValueError("All optimization attempts failed.")
        
    standard_errors = np.sqrt(np.diag(best_result.hess_inv) / n_choices)
    a, b = best_result.x[:n], best_result.x[n:]
        
    print(f"estimated logit model in {time.time() - t0:.0f} seconds.")

    # Optimize revenue under logit model specification (i.e., FML with one segment)
    opt = OptimizationProblem(np.asarray([a]), b, np.ones(1))
    opt_logit_prices = 1 / b + opt.segments[0].rev_opt
    logit_rev = true_demand.revenue_true(opt_logit_prices)
    opt_rev = solver.objective_lb
    
    results.append({
        "opt_rev": opt_rev,
        "logit_rev": logit_rev,
        "logit_par": (a, b),
        "real_par": (a0, b0, w0),
        "run": run
    })
      
    print(f"logit rev {logit_rev}, opt rev {opt_rev}")

In [None]:
_, ax = plt.subplots(figsize=(8, 5))

df = (
    pd.DataFrame
    .from_records(results)
    .assign(
        gap=lambda df: (
            df["opt_rev"]
            .subtract(df["logit_rev"])
            .divide(df["opt_rev"])
            .multiply(100)
        )
    )
    [lambda df: df["gap"] <= 100]
)

df["gap"].hist(bins=10, ax=ax)

ax.set_xlabel("Optimality gap due to misspecification (%)")
plt.show()

In [None]:
df.sort_values("gap")  # -4.2%