In [352]:
#####Model integration with Maximum likelihood estimation#####
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy.linalg
import itertools
from scipy import sparse
from scipy.stats import norm
from pandas.tools.plotting import scatter_matrix
from numpy.random import *
from scipy import optimize

#np.random.seed(98537)

In [353]:
##多項分布の乱数を生成する関数
def rmnom(pr, n, k, pattern):
    if pattern==1:
        z_id = np.array(np.argmax(np.cumsum(pr, axis=1) >= np.random.uniform(0, 1, n)[:, np.newaxis], axis=1), dtype="int")
        Z = np.diag(np.repeat(1, k))[z_id, ]
        return z_id, Z
    z_id = np.argmax((np.cumsum(pr, axis=1) >= np.random.uniform(0, 1, n)[:, np.newaxis]), axis=1)
    return z_id

In [354]:
#切断正規分布の乱数を発生させる関数
def rtnorm(mu, sigma, a, b, n):
    FA = norm.cdf(a, mu, sigma)
    FB = norm.cdf(b, mu, sigma)
    return norm.ppf(np.random.uniform(0, 1, n)*(FB-FA)+FA, mu, sigma)

In [355]:
##データの生成
#データの設定
lower = 0.1
upper = 0.9
models = 4
n = 10000

In [356]:
#入力変数の生成
tau = 0.3
alpha = np.random.normal(-0.2, 0.5, n)
gamma = np.random.normal(np.repeat(alpha, models), tau, models*n)
x1 = (np.exp(gamma) / (1 + np.exp(gamma))).reshape(n, models)
x2 = np.log(x1 / (1-x1))

In [357]:
#パラメータを生成
Sigma = np.array(0.05)
beta = np.random.dirichlet(np.repeat(1.0, models), 1).reshape(-1)
Sigmat = Sigma.copy(); betat = beta.copy()

#応答変数を生成
mu = np.dot(x2, beta)
logit = mu + np.random.normal(0, Sigma, n)
y = np.exp(logit) / (1 + np.exp(logit))

In [358]:
##最尤法でパラメータを推定
#二乗誤差和を計算する関数
def loglike(theta, logit, x, models):
    new_theta = np.append(theta, 1.0)
    beta = np.exp(new_theta) / np.sum(np.exp(new_theta))   #確率に変換
    mu = np.dot(x, beta)
    sq = np.sum(np.power(logit - mu, 2))
    return sq

In [359]:
#勾配ベクトルを計算する関数
def dloglike(theta, logit, x, models):
    #パラメータの設定
    new_theta = np.append(theta, 1.0)
    beta = np.exp(new_theta) / np.sum(np.exp(new_theta))   #確率に変換

    #勾配ベクトルの計算
    beta_dt1 = np.repeat(beta, models).reshape(models, models) 
    beta_dt2 = np.repeat(beta, models).reshape(models, models, order="F")
    beta_dt = -beta_dt1 * beta_dt2
    dll1 = beta_dt - np.diag(np.diag(beta_dt)) + np.diag(beta * (1-beta))
    dll2 = -2*np.dot(x.T, logit) + 2*np.dot(np.dot(x.T, x), beta)
    dll = np.array([np.sum(dll1[j, ] * dll2) for j in range(models-1)])
    return dll

In [360]:
#準ニュートン法でパラメータを推定
theta = np.repeat(1.0, models-1)
res = optimize.minimize(loglike, theta, jac=dloglike, method="BFGS", args=(logit, x2, models), options={"disp": True})

Optimization terminated successfully.
         Current function value: 25.057675
         Iterations: 15
         Function evaluations: 18
         Gradient evaluations: 18


In [361]:
#パラメータの推定結果の要約
theta = np.append(res.x, 1.0)
beta = np.exp(theta) / np.sum(np.exp(theta))
print(np.round(np.hstack((beta, betat)).reshape(2, models), 3))
print(np.round(np.append(np.sum(np.power(logit - np.dot(x2, beta), 2)), np.sum(np.power(logit - np.dot(x2, betat), 2))), 3))

[[0.609 0.148 0.162 0.082]
 [0.607 0.147 0.163 0.083]]
[25.058 25.063]
