## Multinomial Choice Models and the Indepdence of Irrelevant Alternatives

In [17]:
import pandas as pd
import pymc as pm
import patsy
import numpy as np
import pytensor.tensor as pt
import arviz as az
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../../../..')

%reload_ext autoreload
from pymc_marketing.customer_choice.mnl_logit import MNLogit


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
utility_formulas = ['gc ~ ic_gc + oc_gc | income + rooms + agehed', 
                    'gr ~ ic_gr + oc_gr | income + rooms + agehed', 
                    'ec ~ ic_ec + oc_ec | income + rooms + agehed', 
                    'er ~ ic_er + oc_er | income + rooms + agehed', 
                    'hp ~ ic_hp + oc_hp | income + rooms + agehed']


df = pd.read_csv('../../../../data/choice_wide_heating.csv')
df

Unnamed: 0,idcase,depvar,ic_gc,ic_gr,ic_ec,ic_er,ic_hp,oc_gc,oc_gr,oc_ec,oc_er,oc_hp,income,agehed,rooms,region
0,1,gc,866.00,962.64,859.90,995.76,1135.50,199.69,151.72,553.34,505.60,237.88,7,25,6,ncostl
1,2,gc,727.93,758.89,796.82,894.69,968.90,168.66,168.66,520.24,486.49,199.19,5,60,5,scostl
2,3,gc,599.48,783.05,719.86,900.11,1048.30,165.58,137.80,439.06,404.74,171.47,4,65,2,ncostl
3,4,er,835.17,793.06,761.25,831.04,1048.70,180.88,147.14,483.00,425.22,222.95,2,50,4,scostl
4,5,er,755.59,846.29,858.86,985.64,883.05,174.91,138.90,404.41,389.52,178.49,2,25,6,valley
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,896,gc,766.39,877.71,751.59,869.78,942.70,142.61,136.21,474.48,420.65,203.00,6,20,4,mountn
896,897,gc,1128.50,1167.80,1047.60,1292.60,1297.10,207.40,213.77,705.36,551.61,243.76,7,45,7,scostl
897,898,gc,787.10,1055.20,842.79,1041.30,1064.80,175.05,141.63,478.86,448.61,254.51,5,60,7,scostl
898,899,gc,860.56,1081.30,799.76,1123.20,1218.20,211.04,151.31,495.20,401.56,246.48,5,50,6,scostl


In [19]:
mnl = MNLogit(df, utility_formulas, 'depvar', covariates=['ic', 'oc', 'income'])
mnl

<pymc_marketing.customer_choice.mnl_logit.MNLogit at 0x16c06fa00>

In [20]:
mnl.model_config["likelihood"]

Prior("Categorical", p=0, dims="obs")

In [21]:
mnl.utility_equations

['gc ~ ic_gc + oc_gc | income + rooms + agehed',
 'gr ~ ic_gr + oc_gr | income + rooms + agehed',
 'ec ~ ic_ec + oc_ec | income + rooms + agehed',
 'er ~ ic_er + oc_er | income + rooms + agehed',
 'hp ~ ic_hp + oc_hp | income + rooms + agehed']

In [22]:
mnl._prepare_X_matrix(mnl.choice_df, mnl.utility_equations, mnl.depvar)

PatsyError: Error evaluating factor: TypeError: unsupported operand type(s) for |: 'float' and 'int'
    0 + ic_gc + oc_gc | income + rooms + agehed
                ^^^^^^^^^^^^^^

In [None]:
az.summary(mnl.idata, var_names=['alphas', 'betas'])

In [None]:
pm.model_to_graphviz(mnl.model)

In [None]:
az.summary(mnl.idata, var_names=['alphas', 'betas'])

In [None]:

def prepare_X_matrix(df, utility_formulas, depvar):
    """ Helper function to prepare a X matrix for utility equations
        The Dimensions of the X matrix should return a tensor
        with N observations x Alt x Covariates. Assumes utility
        formulas have an equal number of covariates per alternative. 
        These can be zero values if one alternative lacks some attribute.

        The utility formulas should express the driver relationship
        between the choice value in the dependent variable and the attributes
        of the alternative that would incentivise that choice. 
        The RHS of each formula needs to relate to a value of the dependent choice 
        variable and the LHS needs to express an additive relation of the available
        covariates
    """
    n_obs = len(df)
    n_alts = len(utility_formulas)
    n_covariates = len(utility_formulas[0].split('|')[0].split('+'))

    alts = []
    alt_covariates = []
    fixed_covariates = []
    for f in utility_formulas:
        split = f.split('~')
        covariates_split = split[1]
        fixed_covariates_split = covariates_split.split('|')
        f = '0 +' + fixed_covariates_split[0]
        alt_covariates.append(np.asarray(patsy.dmatrix(f, df)).T)
        alts.append(split[0].strip())
        if len(fixed_covariates_split) > 1:
            fixed_covariates.append(fixed_covariates_split[1])

    if fixed_covariates:  
        F = '+'.join(np.unique(fixed_covariates))
        F = '0 + ' + F
        F = np.asarray(patsy.dmatrix(F, df))
    else:
        F = []
    

    X = np.stack(alt_covariates, axis=1).T
    assert X.shape == (n_obs, n_alts, n_covariates)
    for a in alts: 
        assert a in df[depvar].values

    return X, F, alts, np.unique(fixed_covariates)

X, F, alternatives, f_covariates = prepare_X_matrix(df, utility_formulas, 'depvar')

def prepare_y_outcome(df, alternatives, depvar):
    """ Helper function to categorically encode the outcome variable for
        use in the modelling. 

        The order of the alterntives should map to the order of the
        utility formulas. 
    """
    mode_mapping = dict(zip(alternatives, range(len(alternatives))))
    df['mode_encoded'] = df[depvar].map(mode_mapping)
    y = df['mode_encoded'].values
    return y

covariates = ['ic', 'oc']

y = prepare_y_outcome(df, alternatives, 'depvar')


def prepare_coords(df, alternatives, covariates, f_covariatess):
    coords = {
        "alts": alternatives,
        "alts_probs": alternatives[:-1],
        "alt_covariates": covariates,
        'fixed_covariates': [s.strip() for s in [s.split('+') for s in f_covariates][0]],
        "obs": range(len(df)),
    }
    return coords

coords = prepare_coords(df, alternatives, covariates, f_covariates)
coords

In [None]:
F

In [None]:
def make_model(X, y, coords, F=None):
    N = len(y)
    with pm.Model(coords=coords) as model:

        # Intercept Parameters
        alphas = pm.Normal('alphas_', 0,5, dims='alts')
        # Covariate Weight Parameters
        betas = pm.Normal('betas', 0, 1, dims=('alt_covariates'))
        if F is not None:
            betas_fixed_ = pm.Normal('betas_fixed_', 0, 1, dims=('alts','fixed_covariates'))
            betas_fixed = pm.Deterministic('betas_fixed', pt.set_subtensor(betas_fixed_[-1, :], 0), 
            dims=('alts','fixed_covariates'))
            F_data = pm.Data('F', F)
            F = pm.Deterministic('F_interaction', pm.math.dot(F_data, betas_fixed.T))
        else: 
            F = pt.zeros(N)


        # Instantiate covariate data for each Utility function
        X_data = pm.Data('X', X, dims=('obs', 'alts', 'covariates'))
        # Instantiate outcome data
        observed = pm.Data('y', y, dims='obs')

        # Compute utility as a dot product
        U = pm.math.dot(X, betas)  # (N, alts)
        # Zero out reference alternative intercept
        alphas = pm.Deterministic('alphas', pt.set_subtensor(alphas[-1], 0), 
        dims='alts')
        U = pm.Deterministic("U", F + U + alphas, dims=("obs", "alts"))
        ## Apply Softmax Transform
        p_ = pm.Deterministic("p", pm.math.softmax(U, axis=1), 
        dims=("obs", "alts"))
        ## Likelihood
        choice_obs = pm.Categorical("likelihood", p=p_, 
        observed=observed, dims="obs")
    
    return model

model = make_model(X, y, coords, F)
pm.model_to_graphviz(model)


In [None]:
with model: 
    idata_m1 = pm.sample_prior_predictive()
    idata_m1.extend(
        pm.sample(
        target_accept=.99,
        tune=2000,
        idata_kwargs={"log_likelihood": True}, 
        random_seed=101, 
        )
        )
    idata_m1.extend(pm.sample_posterior_predictive(idata_m1))

In [None]:
az.summary(idata_m1, var_names=['betas', 'alphas', 'betas_fixed'])