In [28]:
#####Bivariate Nested Logit model#####
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy.linalg
import itertools
from scipy import sparse
from pandas.tools.plotting import scatter_matrix
from numpy.random import *
from scipy import optimize
import seaborn as sns
import time

#np.random.seed(25978)

In [29]:
####データの発生####
##データの設定
hh = 10000   #プレイヤー数
pt = 90   #観測期間
hhpt = hh*pt   #総レコード数

In [30]:
##IDとインデックスの設定
#IDの設定
user_id = np.repeat(range(hh), pt)
pt_id = np.repeat(range(pt), hh).reshape(pt, hh).T.reshape(-1)

#インデックスの設定
index = np.array(range(hhpt))

user_index = [i for i in range(hh)]
for i in range(hh):
    user_index[i] = index[user_id==i]

In [31]:
##説明変数の生成
#ログイン有無の説明変数を生成
k1 = 4; k2 = 3; k3 = 7; k4 = 6
x1 = np.log(1 + np.abs(np.random.normal(0, 1.0, hhpt*k1))).reshape(hhpt, k1)
x2 = np.zeros((hhpt, k2)); x3 = np.zeros((hhpt, k3))
for i in range(hh):
    prob1 = np.random.beta(5.0, 7.0, k2)
    prob2 = np.random.beta(7.5, 10.0, k3)
    x2[user_index[i], ] = np.random.binomial(1, np.repeat(prob1, pt), pt*k2).reshape(pt, k2)
    x3[user_index[i], ] = np.random.binomial(1, np.repeat(prob2, pt), pt*k3).reshape(pt, k3)
x4 = np.random.multinomial(1, np.random.dirichlet(np.repeat(2.5, k4), 1).reshape(-1), hh)[user_id, ]
x4 = np.delete(x4, np.sum(x4, axis=0).argmin(), axis=1)
dt0 = np.hstack((np.repeat(1, hhpt).reshape(hhpt, 1), x1, x2, x3, x4))

In [32]:
#コンバージョン有無の説明変数を生成
k1 = 5; k2 = 4; k3 = 6; k4 = 7
x1 = np.log(1 + np.abs(np.random.normal(0, 1.0, hhpt*k1))).reshape(hhpt, k1)
x2 = np.zeros((hhpt, k2)); x3 = np.zeros((hhpt, k3))
for i in range(hh):
    prob1 = np.random.beta(5.0, 7.5, k2)
    prob2 = np.random.beta(7.0, 10.0, k3)
    x2[user_index[i], ] = np.random.binomial(1, np.repeat(prob1, pt), pt*k2).reshape(pt, k2)
    x3[user_index[i], ] = np.random.binomial(1, np.repeat(prob2, pt), pt*k3).reshape(pt, k3)
x4 = np.random.multinomial(1, np.random.dirichlet(np.repeat(2.5, k4), 1).reshape(-1), hh)[user_id, ]
x4 = np.delete(x4, np.sum(x4, axis=0).argmin(), axis=1)
dt2 = np.hstack((np.repeat(1, hhpt).reshape(hhpt, 1), x1, x2, x3, x4))

In [33]:
##応答変数の生成
rp = 0
while True:
    rp = rp + 1
    
    #パラメータの生成
    k1 = dt0.shape[1] + 1; k2 = dt2.shape[1]
    beta1 = np.append(np.append(-0.6, np.random.normal(0, 0.75, k1-2)), np.random.uniform(0.3, 0.75, 1))
    beta2 = np.append(-0.75, np.random.normal(0, 0.75, k2-1))
    betat1 = beta1; betat2 = beta2
    betat = np.append(betat1, betat2)

    #コンバージョンのロジットと応答確率を設定
    logit_cv = np.dot(dt2, beta2)
    Prob_cv = np.exp(logit_cv) / (1 + np.exp(logit_cv))

    #ログサム変数を設定
    logsum = np.log(1 + np.exp(logit_cv)).reshape(hhpt, 1)
    dt1 = np.hstack((dt0, logsum))

    #ログイン有無のロジットと応答確率を設定
    logit_login = np.dot(dt1, beta1)
    Prob_login = np.exp(logit_login) / (1 + np.exp(logit_login))

    #ベルヌーイ分布から応答変数を生成
    y1 = np.random.binomial(1, Prob_login, hhpt)
    y2 = np.random.binomial(1, Prob_cv, hhpt)
    y2[y1==0] = 0

    #break条件
    if (np.mean(y1) > 0.2) & (np.mean(y1) < 0.45) & (np.mean(y2[y1==1]) > 0.15) & (np.mean(y2[y1==1]) < 0.4):
        break

In [34]:
####最尤法でBivariate Nested Logit modelを推定####
##Bivariate Nested Logit modelの対数尤度関数
def loglike(beta, y1, y2, y2_nested, dt1, dt2, index_beta1, index_beta2, index_y1):
    
    #パラメータの設定
    beta1 = beta[index_beta1]
    beta2 = beta[index_beta2]
    
    #コンバージョンのロジットと応答確率を設定
    mu_cv = np.exp(np.dot(dt2, beta2))
    Prob_cv = (mu_cv / (1 + mu_cv))[index_y1]

    #ログサム変数を設定
    logsum = np.log(1 + mu_cv)
    dt1[:, k1-1] = logsum

    #ログイン有無のロジットと応答確率を設定
    mu_login = np.exp(np.dot(dt1, beta1))
    Prob_login = mu_login / (1 + mu_login)

    #対数尤度の和
    LL1 = np.sum(y1*(np.log(Prob_login)) + (1-y1)*(np.log(1-Prob_login)))
    LL2 = np.sum((y2_nested*(np.log(Prob_cv)) + (1-y2_nested)*(np.log(1-Prob_cv))))
    LL = -(LL1 + LL2)
    return LL

In [35]:
##Bivariate Nested Logit modelの勾配ベクトル
def dloglike(beta, y1, y2, y2_nested, dt1, dt2, index_beta1, index_beta2, index_y1):
    
    #パラメータの設定
    n = y2_nested.shape[0]
    dt2_part = dt2[index_y1, ]
    beta1 = beta[index_beta1]
    beta2 = beta[index_beta2]

    #コンバージョンのロジットと応答確率を設定
    mu_cv = np.exp(np.dot(dt2, beta2))
    Prob_cv = (mu_cv / (1 + mu_cv)).reshape(hhpt, 1)
    
    #ログサム変数を設定
    logsum = np.log(1 + mu_cv)
    dt1[:, k1-1] = logsum

    #ログイン有無のロジットと応答確率を設定
    mu_login = np.exp(np.dot(dt1, beta1))
    Prob_login = (mu_login / (1 + mu_login)).reshape(hhpt, 1)

    #勾配ベクトルを計算
    dlogit1 = np.sum(y1.reshape(hhpt, 1)*dt1 - Prob_login*dt1, axis=0)
    dlogit21 = np.sum(y1.reshape(hhpt, 1)*Prob_cv*dt2 - Prob_login*Prob_cv*dt2, axis=0)
    dlogit22 = np.sum(y2_nested.reshape(n, 1)*dt2_part - Prob_cv[index_y1, ]*dt2_part, axis=0)
    dlogit = -np.append(dlogit1, dlogit21 + dlogit22)
    return dlogit

In [36]:
#データの設定
index_beta1 = np.array(range(k1))
index_beta2 = np.delete(np.array(range(k1 + k2)), index_beta1)
index_y1 = np.arange(hhpt)[y1==1]
y2_nested = y2[index_y1]

#初期値の設定
beta1 = np.append(-0.5, np.repeat(0, k1-1))
beta2 = np.append(-0.5, np.repeat(0, k2-1))
beta = np.append(beta1, beta2)
k = beta.shape[0]

##準ニュートン法で対数尤度を最大化
res = optimize.minimize(loglike, beta, jac=dloglike, method='BFGS', args=(y1, y2, y2_nested, dt1, dt2, index_beta1, index_beta2, index_y1),
                        options={"gtol": 0.01, "disp":True})

         Current function value: 550928.927016
         Iterations: 56
         Function evaluations: 171
         Gradient evaluations: 159


In [37]:
##推定されたパラメータの確認と適合度
#推定されたパラメータと真のパラメータの比較
beta = res.x   #推定されたパラメータ
hessian = res.hess_inv   #ヘッシアンの逆行列
LL = -res.fun   #最大化された対数尤度
LLst= np.sum(y1*np.log(np.mean(y1)) + (1-y1)*np.log(np.mean(y1))) + \
            np.sum(y2_nested*np.log(np.mean(y2_nested)) + (1-y2_nested)*np.log(np.mean(y2_nested)))
LLbest = -loglike(betat, y1, y2, y2_nested, dt1, dt2, index_beta1, index_beta2, index_y1)   #真値での対数尤度
print(np.round(np.array([LL, LLst, LLbest]), 3))
print(np.round(np.array([betat1, beta[index_beta1]]), 3))
print(np.round(np.array([betat2, beta[index_beta2]]), 3))

[ -550928.927 -1444663.576  -550953.083]
[[-0.6   -0.74   0.521 -0.526  0.521 -0.737  0.592 -0.467 -1.079 -0.773
   0.882  0.812 -0.637  0.262 -0.339 -0.453 -0.407  1.017  0.681 -0.59
   0.306]
 [-0.614 -0.742  0.531 -0.515  0.516 -0.739  0.588 -0.46  -1.083 -0.781
   0.877  0.802 -0.64   0.262 -0.33  -0.463 -0.393  1.024  0.689 -0.582
   0.322]]
[[-0.75   0.637 -0.539 -1.123 -0.496 -0.696 -0.647  0.671 -0.305  0.055
  -0.637  0.893  0.169  0.363  1.129  0.449 -0.068 -0.355 -0.975 -0.565
  -1.404  0.827]
 [-0.734  0.622 -0.537 -1.132 -0.501 -0.67  -0.647  0.67  -0.292  0.057
  -0.615  0.872  0.18   0.355  1.125  0.443 -0.056 -0.347 -0.996 -0.585
  -1.398  0.811]]


In [38]:
#適合度の確認
t_value = beta / np.sqrt(np.diag(hessian))   #t値
AIC = -2*LL + 2*k   #AIC
BIC = -2*LL + np.log(hhpt)*k   #BIC
print(np.round(t_value, 3))
print(np.round(np.array([LL, AIC, BIC]), 3))

[ -44.299  -94.39    67.106  -65.645   65.349 -142.753  114.988  -89.671
 -205.086 -150.195  172.201  156.721 -124.56    51.696  -64.716  -38.893
  -36.646   92.722   65.845  -55.909   42.139  -33.52    53.375  -43.595
  -92.667  -40.056  -53.03   -75.889   82.333  -35.427    6.881  -75.557
  104.246   21.881   42.518  132.302   54.513   -2.632  -16.347  -44.142
  -36.543  -62.381   51.526]
[-550928.927 1101943.854 1102447.39 ]
