In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
import numpy as np
from bnb.problem import OptimizationProblem
from matplotlib import pyplot as plt
import matplotlib
from scipy.optimize import minimize
import pandas as pd
from bnb.fml_solver import FMLSolver
import warnings
import time
import seaborn as sns
sns.set()
matplotlib.rcParams.update({'font.size': 11, 'font.family': 'serif'})

In [47]:
class DataSimulator(OptimizationProblem):
    
    def revenue_true(self, p):
        return np.sum([
            segment.w * np.sum(p * segment.purchase_probabilities(p))
            for segment in self.segments
        ])
    
    def simulate_choices(self, n_observations, seed=None):
        
        np.random.seed(seed)
        
        observed_prices = np.random.uniform(self.p_lb, self.p_ub, size=(n_observations, self.n))
        
        purchase_probabilities = np.asarray([
            np.sum([segment.w * segment.purchase_probabilities(price) for segment in self.segments], axis=0)
            for price in observed_prices
        ])
            
        assert purchase_probabilities.shape == (n_observations, self.n)
        
        # 0 is first product, n is no purchse
        observed_choices = np.sum(
            np.cumsum(purchase_probabilities, axis=1) < np.random.uniform(size=(n_observations, 1)),
            axis=1
        )
        
        return observed_prices, observed_choices
    
    
def logit_loglikelihood(params, observed_choices, observed_prices):
    
    a = params[:n]
    b = params[n:]
    n_choices = len(observed_choices)
    
    utilities = a - b * observed_prices
#     assert utilities.shape == (n_choices, n)
    
    utilities = np.c_[utilities, np.zeros((n_choices, 1))]
#     assert utilities.shape == (n_choices, n + 1)
    
    utilities_chosen_products = utilities[np.arange(n_choices), observed_choices]
#     assert utilities_chosen_products.shape == (n_choices, ), utilities_chosen_products.shape
    
    log_probs = - np.log(np.sum(np.exp(utilities - np.expand_dims(utilities_chosen_products, -1)), axis=-1))
#     assert log_probs.shape == (n_choices, ), log_probs.shape
    
    return - np.sum(log_probs) / n_choices

In [51]:
n = 10
results = []
n_choices = 1000
for m in [2, 3, 4]:
    print(f"m: {m}.")
    for run in range(100):
        print(f"run: {run}.")
        t0 = time.time()

        seed = n + m + run
        np.random.seed(seed)

        a0 = np.random.uniform(0, 4, size=(m, n))
        b0 = np.random.uniform(0.001, 0.01, size=n)
        w0 = np.random.uniform(size=m)
        w0 /= np.sum(w0)

        true_demand = DataSimulator(a0, b0, w0)
        solver = FMLSolver(true_demand)
        solver.solve()
        print(f"time elapsed after FML solver: {solver.timer}.")

        observed_prices, observed_choices = true_demand.simulate_choices(n_choices)

        a_start = np.ones((1, n))
        b_start = np.zeros(n)
        parameter_start = np.hstack((a_start.reshape(n), b_start))

        with warnings.catch_warnings():
            warnings.filterwarnings(action="ignore", category=RuntimeWarning)
            min_ll = minimize(logit_loglikelihood, parameter_start, args=(observed_choices, observed_prices))

        print(f"time elapsed after likelihood estimation: {time.time() - t0}.")
        # standard_errors = np.sqrt(np.diag(min_ll.hess_inv) / n_choices)
        a, b = min_ll.x[:n], min_ll.x[n:]
        # print("likelihood opt: ", min_ll.message)

        opt = OptimizationProblem(np.asarray([a]), b, np.ones(1))
        def logit_revenue(p, opt):
            assert len(opt.segments) == 1
            segment = opt.segments[0]
            return - np.sum(p * segment.purchase_probabilities(p))
        with warnings.catch_warnings():
            warnings.filterwarnings(action="ignore", category=RuntimeWarning)
            opt_rev = minimize(logit_revenue, np.ones(n) + 3.0, args=(opt, ))

        print(f"time elapsed after logit revenue optimization: {time.time() - t0}.")
        results.append({
            "n": n,
            "m": m,
            "seed": seed,
            "logit_rev": opt_rev,
            "fml_rev": (solver.objective_ub + solver.objective_lb) / 2,
            "log_ll_msg": min_ll.message,
            "logit_opt_msg": opt_rev.message,
        })

        # print("opt logit rev: ", opt_rev.message)
        # print("misspecified rev: ", - opt_rev.fun)
        # print("real rev: ", true_demand.revenue_true(opt_rev.x))
        # print("opt rev: ", solver.objective_ub)
        # print()

m: 2.
run: 0.
time elapsed after FML solver: 0.813007116317749.
time elapsed after likelihood estimation: 1.7422006130218506.
time elapsed after logit revenue optimization: 1.761819839477539.
run: 1.
time elapsed after FML solver: 0.7915558815002441.
time elapsed after likelihood estimation: 1.6551790237426758.
time elapsed after logit revenue optimization: 1.7017898559570312.
run: 2.
time elapsed after FML solver: 0.9408700466156006.
time elapsed after likelihood estimation: 1.643984079360962.
time elapsed after logit revenue optimization: 1.6939499378204346.
run: 3.
time elapsed after FML solver: 0.7964248657226562.
time elapsed after likelihood estimation: 1.3520293235778809.
time elapsed after logit revenue optimization: 1.4414043426513672.
run: 4.
time elapsed after FML solver: 0.7912290096282959.
time elapsed after likelihood estimation: 1.2879230976104736.
time elapsed after logit revenue optimization: 1.3207619190216064.
run: 5.
time elapsed after FML solver: 0.6287028789520264

time elapsed after FML solver: 0.7723867893218994.
time elapsed after likelihood estimation: 1.6608531475067139.
time elapsed after logit revenue optimization: 1.7003459930419922.
run: 45.
time elapsed after FML solver: 0.6220541000366211.
time elapsed after likelihood estimation: 1.346008062362671.
time elapsed after logit revenue optimization: 1.393895149230957.
run: 46.
time elapsed after FML solver: 0.6191329956054688.
time elapsed after likelihood estimation: 1.5486018657684326.
time elapsed after logit revenue optimization: 1.6119072437286377.
run: 47.
time elapsed after FML solver: 0.9411430358886719.
time elapsed after likelihood estimation: 1.6108520030975342.
time elapsed after logit revenue optimization: 1.7016229629516602.
run: 48.
time elapsed after FML solver: 0.9800851345062256.
time elapsed after likelihood estimation: 1.8423881530761719.
time elapsed after logit revenue optimization: 1.9123649597167969.
run: 49.
time elapsed after FML solver: 0.8326132297515869.
time e

time elapsed after FML solver: 0.8196008205413818.
time elapsed after likelihood estimation: 1.4984157085418701.
time elapsed after logit revenue optimization: 1.5173068046569824.
run: 89.
time elapsed after FML solver: 0.8249280452728271.
time elapsed after likelihood estimation: 1.4215123653411865.
time elapsed after logit revenue optimization: 1.5184619426727295.
run: 90.
time elapsed after FML solver: 0.988152027130127.
time elapsed after likelihood estimation: 1.806570053100586.
time elapsed after logit revenue optimization: 1.8337700366973877.
run: 91.
time elapsed after FML solver: 0.8197617530822754.
time elapsed after likelihood estimation: 1.3715319633483887.
time elapsed after logit revenue optimization: 1.4449689388275146.
run: 92.
time elapsed after FML solver: 0.8097250461578369.
time elapsed after likelihood estimation: 1.37746000289917.
time elapsed after logit revenue optimization: 1.461775779724121.
run: 93.
time elapsed after FML solver: 0.8260831832885742.
time elap

time elapsed after FML solver: 1.5512919425964355.
time elapsed after likelihood estimation: 2.0235230922698975.
time elapsed after logit revenue optimization: 2.065586805343628.
run: 33.
time elapsed after FML solver: 2.6254568099975586.
time elapsed after likelihood estimation: 3.180285930633545.
time elapsed after logit revenue optimization: 3.23923397064209.
run: 34.
time elapsed after FML solver: 1.6201319694519043.
time elapsed after likelihood estimation: 2.522390127182007.
time elapsed after logit revenue optimization: 2.5875682830810547.
run: 35.
time elapsed after FML solver: 1.744145154953003.
time elapsed after likelihood estimation: 2.6712679862976074.
time elapsed after logit revenue optimization: 2.727982997894287.
run: 36.
time elapsed after FML solver: 1.0356333255767822.
time elapsed after likelihood estimation: 1.553574800491333.
time elapsed after logit revenue optimization: 1.5985510349273682.
run: 37.
time elapsed after FML solver: 1.7246999740600586.
time elapsed

time elapsed after FML solver: 2.7440271377563477.
time elapsed after likelihood estimation: 3.343975782394409.
time elapsed after logit revenue optimization: 3.429372787475586.
run: 77.
time elapsed after FML solver: 1.9453861713409424.
time elapsed after likelihood estimation: 2.4711029529571533.
time elapsed after logit revenue optimization: 2.52297306060791.
run: 78.
time elapsed after FML solver: 1.1567270755767822.
time elapsed after likelihood estimation: 1.7213711738586426.
time elapsed after logit revenue optimization: 1.7628710269927979.
run: 79.
time elapsed after FML solver: 1.9304170608520508.
time elapsed after likelihood estimation: 2.6047821044921875.
time elapsed after logit revenue optimization: 2.6245200634002686.
run: 80.
time elapsed after FML solver: 1.932291030883789.
time elapsed after likelihood estimation: 2.5396840572357178.
time elapsed after logit revenue optimization: 2.6074438095092773.
run: 81.
time elapsed after FML solver: 3.1701600551605225.
time elap

time elapsed after FML solver: 38.989373207092285.
time elapsed after likelihood estimation: 39.63798522949219.
time elapsed after logit revenue optimization: 39.70642805099487.
run: 21.
time elapsed after FML solver: 40.39816999435425.
time elapsed after likelihood estimation: 41.015625.
time elapsed after logit revenue optimization: 41.12500262260437.
run: 22.
time elapsed after FML solver: 15.407782793045044.
time elapsed after likelihood estimation: 16.049078941345215.
time elapsed after logit revenue optimization: 16.224927186965942.
run: 23.
time elapsed after FML solver: 17.225008964538574.
time elapsed after likelihood estimation: 18.362056016921997.
time elapsed after logit revenue optimization: 18.38669204711914.
run: 24.
time elapsed after FML solver: 45.52076292037964.
time elapsed after likelihood estimation: 46.31614398956299.
time elapsed after logit revenue optimization: 46.36968994140625.
run: 25.
time elapsed after FML solver: 16.227099895477295.
time elapsed after li

time elapsed after FML solver: 80.26726579666138.
time elapsed after likelihood estimation: 80.97473978996277.
time elapsed after logit revenue optimization: 81.04327178001404.
run: 65.
time elapsed after FML solver: 236.82418394088745.
time elapsed after likelihood estimation: 237.48368096351624.
time elapsed after logit revenue optimization: 237.5357267856598.
run: 66.
time elapsed after FML solver: 14.759151697158813.
time elapsed after likelihood estimation: 15.737514972686768.
time elapsed after logit revenue optimization: 15.808626890182495.
run: 67.
time elapsed after FML solver: 52.162070989608765.
time elapsed after likelihood estimation: 52.826021909713745.
time elapsed after logit revenue optimization: 52.87434387207031.
run: 68.
time elapsed after FML solver: 15.258002996444702.
time elapsed after likelihood estimation: 15.96806001663208.
time elapsed after logit revenue optimization: 16.009126901626587.
run: 69.
time elapsed after FML solver: 68.42012023925781.
time elapse

In [None]:
observed_choices

In [18]:
# check if you take average loglikelihoog if you have to divide by n or sqrt(n)
# standard errors: https://stats.stackexchange.com/questions/68080/basic-question-about-fisher-information-matrix-and-relationship-to-hessian-and-s
standard_errors
# standard_errors

array([0.08612482, 0.09406891, 0.09521281, 0.07162577, 0.11699961,
       0.12909529, 0.11300329, 0.08158962, 0.06631054, 0.07131954,
       0.00034469, 0.00032515, 0.00023397, 0.00021443, 0.00016033,
       0.00043511, 0.00017233, 0.00028399, 0.00014118, 0.00014423])

In [None]:
# test hypothesis that the value is different from the real value? true value in CI ?