37902 Foundation of Advanced Quantitative Marketing

Li Liu

2/4/2019

#### Tasks
1)      Compute the nested logit elasticities (if you haven’t already done so)

2)      Try out the IIA tests

3)      Work on the observable heterogeneity model – both a priori and with interactions

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as opt
pd.options.display.max_colwidth = 1000

### Yogurt100N Sales Data

In [11]:
df=pd.read_excel("Yogurt100N.csv.xlsx")
df.describe()
df.head()

Unnamed: 0,Pan I.D.,Expend $,Income,HH Size,IPT,Quantity,Brand 1,Brand 2,Brand 3,Brand 4,Feature 1,Feature 2,Feature 3,Feature 4,Price 1,Price 2,Price 3,Price 4,PanelistFirstObs
0,1,40.900002,9,2,5,2,0,0,0,1,0,0,0,0,0.108,0.081,0.061,0.079,1
1,1,16.809999,9,2,5,2,0,1,0,0,0,0,0,0,0.108,0.098,0.064,0.075,0
2,1,4.06,9,2,1,2,0,1,0,0,0,0,0,0,0.108,0.098,0.061,0.086,0
3,1,34.459999,9,2,4,2,0,1,0,0,0,0,0,0,0.108,0.098,0.061,0.086,0
4,1,8.39,9,2,7,2,0,1,0,0,0,0,0,0,0.125,0.098,0.049,0.079,0


### Simple Logit on Yogurt Data

In [12]:
def crit(params,df):

    a1,a2,a3,bf,bp=params
    ev1=np.exp(a1+bf*df['Feature 1']+bp*df['Price 1'])
    ev2=np.exp(a2+bf*df['Feature 2']+bp*df['Price 2'])
    ev3=np.exp(a3+bf*df['Feature 3']+bp*df['Price 3'])
    ev4=np.exp(0+bf*df['Feature 4']+bp*df['Price 4'])
    denom=ev1+ev2+ev3+ev4
    global p1,p2,p3,p4
    p1,p2,p3,p4=ev1/denom,ev2/denom,ev3/denom,ev4/denom
    pc=(ev1*df['Brand 1']+ev2*df['Brand  2']+ev3*df['Brand 3']+ev4*df['Brand 4'])/denom
    Inpc=np.log(pc)
    LL=np.sum(Inpc)
    return -LL
a1,a2,a3,bf,bp=1,1,1,1,1 #Initalization
params_init = np.array([a1,a2,a3,bf,bp])
results = opt.minimize(crit, params_init,df)
a1,a2,a3,bf,bp = results.x
a1F,a2F,a3F,bfF,bpF = results.x #For IIA Test

sigmaF=results.hess_inv
print(" a1 (Intrinsic brand preference for Brand 1):",a1,"\n",
      "a2 (Intrinsic brand preference for Brand 2):",a2,"\n",
      "a3 (Intrinsic brand preference for Brand 3):",a3,"\n",
      "bf (Coefficients for feature variable):",bf,"\n",
      "bp (Coefficients for price variable):",bp,"\n",
      "Maximized Log Likelihood:",-results.fun)

 a1 (Intrinsic brand preference for Brand 1): 1.3877493848693059 
 a2 (Intrinsic brand preference for Brand 2): 0.6435046305879636 
 a3 (Intrinsic brand preference for Brand 3): -3.0861119572117355 
 bf (Coefficients for feature variable): 0.4874149107851659 
 bp (Coefficients for price variable): -37.057782766093105 
 Maximized Log Likelihood: -2658.5566975071233


### Elasticity with I.I.A assumption

Own Elasticity:
$\beta*X_{jt}*(1-P_{ijt})$

Cross Elasticity (doesn't depend on j):
$-\beta*P_{ikt}*X_{kt}$

In [13]:
#Own elasticities
e11=np.mean((bf*df['Feature 1']+bp*df['Price 1'])*(1-p1))
e22=np.mean((bf*df['Feature 2']+bp*df['Price 2'])*(1-p2))
e33=np.mean((bf*df['Feature 3']+bp*df['Price 3'])*(1-p3))
e44=np.mean((bf*df['Feature 4']+bp*df['Price 4'])*(1-p4))

In [14]:
#Property of Logit Model: for all j != k, the cross elasticity will be the same.
e21=e31=e41=np.mean(-(bf*df['Feature 1']+bp*df['Price 1'])*p1)
e12=e32=e42=np.mean(-(bf*df['Feature 2']+bp*df['Price 2'])*p2)
e13=e23=e43=np.mean(-(bf*df['Feature 3']+bp*df['Price 3'])*p3)
e14=e24=e34=np.mean(-(bf*df['Feature 4']+bp*df['Price 4'])*p4)

In [15]:
mat=pd.DataFrame({"Brand 1":[e11,e21,e31,e41], "Brand2":[e12,e22,e32,e42],
                  "Brand 3":[e13,e23,e33,e43],'Brand 4':[e14,e24,e34,e44]})
mat.index=["Brand 1","Brand 2", "Brand 3", "Brand 4"]
print("Elasiticy Matrix with Simple Logit Model")
mat

Elasiticy Matrix with Simple Logit Model


Unnamed: 0,Brand 1,Brand2,Brand 3,Brand 4
Brand 1,-2.688666,1.164621,0.052607,0.651862
Brand 2,1.221561,-1.837507,0.052607,0.651862
Brand 3,1.221561,1.164621,-1.916245,0.651862
Brand 4,1.221561,1.164621,0.052607,-2.276253


### Nested Logit Model on Yogurt Data

#### Brands 1~3 in one nest and 4 in another 

In [16]:
def nestedlogit(params):

    a1,a2,a3,bf,bp,theta=params 
    rho=np.exp(theta)/(np.exp(theta)+1)
    ev1=np.exp((a1+bf*df['Feature 1']+bp*df['Price 1'])/rho)
    ev2=np.exp((a2+bf*df['Feature 2']+bp*df['Price 2'])/rho)
    ev3=np.exp((a3+bf*df['Feature 3']+bp*df['Price 3'])/rho)
    ev4=np.exp(0+bf*df['Feature 4']+bp*df['Price 4'])
    denom=ev1+ev2+ev3
    P4=ev4/(np.power(denom,rho)+ev4)
    P1=(1-P4)*ev1/denom
    P2=(1-P4)*ev2/denom
    P3=(1-P4)*ev3/denom
        
    pc=(P1*df['Brand 1']+P2*df['Brand  2']+P3*df['Brand 3']+P4*df['Brand 4'])
    Inpc=np.log(pc)
    LL=np.sum(Inpc)
    return -LL

In [17]:
params_init = np.array([1.3,0.6,-3,0.5,-30,0])
results = opt.minimize(nestedlogit, params_init)
a1,a2,a3,bf,bp,theta = results.x
rho=np.exp(theta)/(np.exp(theta)+1)
print(" a1 (Intrinsic brand preference for Brand 1):",a1,"\n",
      "a2 (Intrinsic brand preference for Brand 2):",a2,"\n",
      "a3 (Intrinsic brand preference for Brand 3):",a3,"\n",
      "bf (Coefficients for feature variable):",bf,"\n",
      "bp (Coefficients for price variable):",bp,"\n",
      "rho (Correlation variable):",rho,"\n",
      "Maximized Log Likelihood:",-results.fun)

 a1 (Intrinsic brand preference for Brand 1): 1.381668246314243 
 a2 (Intrinsic brand preference for Brand 2): 0.8394214107974036 
 a3 (Intrinsic brand preference for Brand 3): -1.6585033289233315 
 bf (Coefficients for feature variable): 0.3744687285023429 
 bp (Coefficients for price variable): -26.58111407114701 
 rho (Correlation variable): 0.6433850797137343 
 Maximized Log Likelihood: -2653.7645999847723


In [18]:
vcv_mle = results.hess_inv
stderr_a1_mle = np.sqrt(vcv_mle[0,0])
stderr_a2_mle = np.sqrt(vcv_mle[1,1])
stderr_a3_mle = np.sqrt(vcv_mle[2,2])
stderr_bf_mle = np.sqrt(vcv_mle[3,3])
stderr_bp_mle = np.sqrt(vcv_mle[4,4])
stderr_theta_mle = np.sqrt(vcv_mle[5,5])

print('Standard error for a1 estimate = ', stderr_a1_mle)
print('Standard error for a2 estimate = ', stderr_a2_mle)
print('Standard error for a3 estimate = ', stderr_a3_mle)
print('Standard error for bf estimate = ', stderr_bf_mle)
print('Standard error for bp estimate = ', stderr_bp_mle)
print('Standard error for theta estimate = ', stderr_theta_mle)

Standard error for a1 estimate =  0.07558685487925244
Standard error for a2 estimate =  0.0643395346921931
Standard error for a3 estimate =  0.25835257393961025
Standard error for bf estimate =  0.10968714265868745
Standard error for bp estimate =  2.2109577731832593
Standard error for theta estimate =  0.2704197005249941


### Elasticity when I.I.A assumption is violated

Own Elasticity:
$\beta*X_{j}*[1/\rho-P_{j}+P_{j|nest}*(1-1/\rho)]$

Cross Elasticity:

Ex: Brands 1~3 in one nest and 4 in another

$e_{41}=-\beta*X_{1}*P_{1}$

$e_{42}=-\beta*X_{2}*P_{2}$

$e_{43}=-\beta*X_{3}*P_{3}$

$e_{14}=e_{24}=e_{34}=-\beta*X_{4}*P_{4}$

$e_{12}=\beta*X_{2}*[-P_{1}+P_{1|nest}*(1-1/\rho)]*P_{2}/P_{1}$

In [19]:
ev1=np.exp((a1+bf*df['Feature 1']+bp*df['Price 1'])/rho)
ev2=np.exp((a2+bf*df['Feature 2']+bp*df['Price 2'])/rho)
ev3=np.exp((a3+bf*df['Feature 3']+bp*df['Price 3'])/rho)
ev4=np.exp(0+bf*df['Feature 4']+bp*df['Price 4'])
denom=ev1+ev2+ev3
P4=ev4/(np.power(denom,rho)+ev4)
P1=(1-P4)*ev1/denom
P2=(1-P4)*ev2/denom
P3=(1-P4)*ev3/denom
#Own Elasticities
e11=np.mean((bf*df['Feature 1']+bp*df['Price 1'])*((1/rho)+(ev1/denom)*(1-1/rho)-P1))
e22=np.mean((bf*df['Feature 2']+bp*df['Price 2'])*((1/rho)+(ev2/denom)*(1-1/rho)-P2))
e33=np.mean((bf*df['Feature 3']+bp*df['Price 3'])*((1/rho)+(ev3/denom)*(1-1/rho)-P3))
e44=np.mean((bf*df['Feature 4']+bp*df['Price 4'])*((1/rho)+P4*(1-1/rho)-P4))

In [20]:
e41=np.mean(-(bf*df['Feature 1']+bp*df['Price 1'])*(P1))
e42=np.mean(-(bf*df['Feature 2']+bp*df['Price 2'])*(P2))
e43=np.mean(-(bf*df['Feature 3']+bp*df['Price 3'])*(P3))
e14=e24=e34=np.mean(-(bf*df['Feature 4']+bp*df['Price 4'])*(P4))

e12=np.mean((bf*df['Feature 2']+bp*df['Price 2'])*((ev1/denom)*(1-1/rho)-P1)*(ev2/ev1))
e13=np.mean((bf*df['Feature 3']+bp*df['Price 3'])*((ev1/denom)*(1-1/rho)-P1)*(ev3/ev1))
e21=np.mean((bf*df['Feature 1']+bp*df['Price 1'])*((ev2/denom)*(1-1/rho)-P2)*(ev1/ev2))
e23=np.mean((bf*df['Feature 3']+bp*df['Price 3'])*((ev2/denom)*(1-1/rho)-P2)*(ev3/ev2))
e32=np.mean((bf*df['Feature 2']+bp*df['Price 2'])*((ev3/denom)*(1-1/rho)-P3)*(ev2/ev3))
e31=np.mean((bf*df['Feature 1']+bp*df['Price 1'])*((ev3/denom)*(1-1/rho)-P3)*(ev1/ev3))

mat=pd.DataFrame({"Brand 1":[e11,e21,e31,e41], "Brand2":[e12,e22,e32,e42],
                  "Brand 3":[e13,e23,e33,e43],'Brand 4':[e14,e24,e34,e44]})
mat.index=["Brand 1","Brand 2", "Brand 3", "Brand 4"]
print("Elasiticy Matrix with Model 1 (Nest 1: 1~3; Nest 2: 4)")
mat

Elasiticy Matrix with Model 1 (Nest 1: 1~3; Nest 2: 4)


Unnamed: 0,Brand 1,Brand2,Brand 3,Brand 4
Brand 1,-2.862328,1.451158,0.064228,0.470184
Brand 2,1.494906,-1.894287,0.064228,0.470184
Brand 3,1.494906,1.451158,-2.129332,0.470184
Brand 4,0.867565,0.839325,0.037066,-2.532213


### IIA Test

In [21]:
#Subset of the data where Brand 1 is not chosen
df2=df[df["Brand 1"] != 1]

1. MTT(McFadden, Train, Tye)

In [22]:
#Drop a1 to fit a restricted logit model

#Use estimated parameters from simple logit
ev2=np.exp(a2F+bfF*df2['Feature 2']+bpF*df2['Price 2'])
ev3=np.exp(a3F+bfF*df2['Feature 3']+bpF*df2['Price 3'])
ev4=np.exp(0+bfF*df2['Feature 4']+bpF*df2['Price 4'])
denom=ev2+ev3+ev4
pc=(ev2*df2['Brand  2']+ev3*df2['Brand 3']+ev4*df2['Brand 4'])/denom
Inpc=np.log(pc)
LLFR=np.sum(Inpc)
LLFR

-1258.8764069818249

In [23]:
#Logit model with J-1 of alternatives on restricted data
def crit2(params):

    a2,a3,bf,bp=params
    ev2=np.exp(a2+bf*df2['Feature 2']+bp*df2['Price 2'])
    ev3=np.exp(a3+bf*df2['Feature 3']+bp*df2['Price 3'])
    ev4=np.exp(0+bf*df2['Feature 4']+bp*df2['Price 4'])
    denom=ev2+ev3+ev4
    pc=(ev2*df2['Brand  2']+ev3*df2['Brand 3']+ev4*df2['Brand 4'])/denom
    Inpc=np.log(pc)
    LL=np.sum(Inpc)
    return -LL
a2r,a3r,bfr,bpr=0.6,-3,0.5,-30 #Initalization
params_init = np.array([a2r,a3r,bfr,bpr])
resultsR = opt.minimize(crit2, params_init)
a2r,a3r,bfr,bpr = resultsR.x
sigmaR=resultsR.hess_inv
LLR=-resultsR.fun
LLR

-1256.909105382134

In [24]:
#MTT Formula
MMT=-2*(LLFR-LLR)
MMT

3.934603199381854

In [25]:
#p-value
from scipy.stats import chi2
1 - chi2.cdf(MMT, 4)

0.4149286869663318

Fail to reject the null hypothesis of IIA.

2. Hausman McFadden

In [26]:
from numpy import matrix
#Calculate V_R-V_FR where V_FR is the subvector of V_F excluding brand 1
#V.I=inverse of (V_R-V_FR)
V=matrix(sigmaR-sigmaF[1:,1:])

#beta_R: parameters vectors of the restricted model
#beta_FR: subvector of beta_F corresponding to beta_R
paraR=np.array([a2r,a3r,bfr,bpr])
paraFR=np.array([a2F,a3F,bfF,bpF])
paradiff=(paraR-paraFR)
paradiff.reshape(1,4)*(V.I)*paradiff.reshape(4,1)

matrix([[30.01624893]])

In [27]:
1 - chi2.cdf(30, 4)

4.894437128033502e-06

It's very unlikely that the parameters in the restricted model is the same as corresonding parameters from the full model.

#### Observable heterogeneity model with a priori 

Motivation: use demographical information to divide customers into segments

For Yogurt Data, we divide the customers into four segments based on income level and household size.

In [28]:
df[['Income','HH Size']].describe()

Unnamed: 0,Income,HH Size
count,2430.0,2430.0
mean,8.720988,2.802058
std,3.800654,1.173291
min,1.0,1.0
25%,6.0,2.0
50%,9.0,3.0
75%,12.0,4.0
max,14.0,6.0


In [29]:
#Low Income, Small HH Size
seg1=df[(df['Income']<9) & (df['HH Size']<3)]
n1=seg1.shape[0]

In [30]:
#High Income, Small HH Size
seg2=df[(df['Income']>=9) & (df['HH Size']<3)]
n2=seg2.shape[0]

In [31]:
#Low Income, Large HH Size
seg3=df[(df['Income']<9) & (df['HH Size']>=3)]
n3=seg3.shape[0]

In [32]:
#High Income, Large HH Size
seg4=df[(df['Income']>=9) & (df['HH Size']>=3)]
n4=seg4.shape[0]

In [33]:
def BIC(data):
    params_init = np.array([a1F,a2F,a3F,bfF,bpF])
    results = opt.minimize(crit, params_init,data)
    n=data.shape[0]
    return np.log(n)*(5)-2*(-results.fun)

In [34]:
tab=pd.DataFrame({'BIC ':[BIC(seg1),BIC(seg2),BIC(seg3),BIC(seg4),
                          (BIC(seg1)+BIC(seg2)+BIC(seg3)+BIC(seg4))/4,BIC(df)/4]},
                 index=["Seg 1","Seg 2","Seg 3","Seg 4","Avg BIC","25% of Full Model"])
tab

Unnamed: 0,BIC
Seg 1,1332.669651
Seg 2,1251.350202
Seg 3,990.876326
Seg 4,1643.086377
Avg BIC,1304.495639
25% of Full Model,1339.022907


The average BIC of four models is close to the 25% of the BIC from full logit model.

There exists heterogeneity among this four groups, as the BIC changes greatly when one condition varies.

Especially for large household size, BIC for high income group is 66% greater than BIC for low income group.

#### Observable heterogeneity model with interactions 

In [35]:
def crit(params,df):

    a11,a1i,a1h,a22,a2i,a2h,a33,a3i,a3h,bf,bp,bfi,bfh,bpi,bph=params
    
    a1=a11+a1i*df["Income"]+a1h*df["HH Size"]
    a2=a22+a2i*df["Income"]+a2h*df["HH Size"]
    a3=a33+a3i*df["Income"]+a3h*df["HH Size"]
    
    bfi=bf+bfi*df["Income"]+bfh*df["HH Size"]
    bpi=bp+bpi*df["Income"]+bph*df["HH Size"]
    
    ev1=np.exp(a1+bfi*df['Feature 1']+bpi*df['Price 1'])
    ev2=np.exp(a2+bfi*df['Feature 2']+bpi*df['Price 2'])
    ev3=np.exp(a3+bfi*df['Feature 3']+bpi*df['Price 3'])
    ev4=np.exp(0+bfi*df['Feature 4']+bpi*df['Price 4'])
    denom=ev1+ev2+ev3+ev4
    
    pc=(ev1*df['Brand 1']+ev2*df['Brand  2']+ev3*df['Brand 3']+ev4*df['Brand 4'])/denom
    Inpc=np.log(pc)
    LL=np.sum(Inpc)
    
    return -LL

a11,a1i,a1h,a22,a2i,a2h,a33,a3i,a3h,bf,bp,bfi,bfh,bpi,bph=1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
params_init = np.array([a11,a1i,a1h,a22,a2i,a2h,a33,a3i,a3h,bf,bp,bfi,bfh,bpi,bph])
results = opt.minimize(crit, params_init,df)
a11,a1i,a1h,a22,a2i,a2h,a33,a3i,a3h,bf,bp,bfi,bfh,bpi,bph= results.x
print("Maximized Log Likelihood:",-results.fun)

Maximized Log Likelihood: -2537.4825261515534


$\alpha_1=0.51-0.097*Inc+0.617*HH$

$\alpha_2=1.19-0.119*Inc+0.196*HH$

$\alpha_3=-1.88-0.293*Inc+0.373*HH$

$\beta_f=0.542-0.089*Inc+0.219*HH$

$\beta_p=-40+1.22*Inc-2.458*HH$


In [36]:
para=[a11,a1i,a1h,a22,a2i,a2h,a33,a3i,a3h,bf,bp,bfi,bfh,bpi,bph]
paraname=["a11","a1i","a1h","a22","a2i","a2h","a33","a3i","a3h","bf","bp","bfi","bfh","bpi","bph"]

vcv_mle = results.hess_inv
CIlow,CIhigh=[],[]
for i in range(len(para)):
    std=np.sqrt(vcv_mle[i,i])
    CIlow.append(para[i]-1.96*std)
    CIhigh.append(para[i]+1.96*std)
paratable=pd.DataFrame({"Mean Value of Parameters":para,"CI Left":CIlow,"CI Right":CIhigh},
                       index=paraname)
paratable

Unnamed: 0,Mean Value of Parameters,CI Left,CI Right
a11,0.51029,0.187368,0.833211
a1i,-0.097419,-0.129433,-0.065405
a1h,0.616646,0.519699,0.713593
a22,1.190373,0.987831,1.392914
a2i,-0.118501,-0.143341,-0.093661
a2h,0.196365,0.099227,0.293502
a33,-1.884975,-2.344343,-1.425606
a3i,-0.292851,-0.368287,-0.217416
a3h,0.372958,0.175329,0.570587
bf,0.54225,0.136067,0.948433


All of the confidence interval doesn't contain 0 at 95% level. So they are all statistically significant. There exists heterogeneity for groups with different income and household size when responding to feature and price.