In [261]:
#####Mini batch Stochastic Gradient Descent#####
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy.linalg
import itertools
import math
from scipy import sparse
from pandas.tools.plotting import scatter_matrix
from numpy.random import *
from scipy import optimize
import seaborn as sns
import time

#np.random.seed(98537)

In [262]:
####データの発生####
#データの設定
N = 1000000
k = 15

#説明変数の生成
k1 = 5; k2 = 7; k3 = 6
x1 = np.array(np.random.random(N*k1)).reshape((N, k1))
x2 = np.zeros((N, k2))
for j in range(k2):
    prob = np.random.uniform(0.25, 0.55, 1)
    x2[:, j] = np.random.binomial(1, prob, N)
x3 = np.random.multinomial(1, np.random.dirichlet(np.repeat(3.0, k3), 1).reshape(k3), N)
x3 = np.delete(x3, np.argmin(np.sum(x3, axis=0)), axis=1)   #冗長な変数の削除
x = np.concatenate((np.repeat(1, N)[:, np.newaxis], x1, x2, x3), axis=1)
k = x.shape[1]

#パラメータの生成
beta0 = np.array([-0.7])
beta1 = np.random.normal(0, 0.75, k-1)
beta = np.append(beta0, beta1)
betat = beta.copy()

#応答変数の生成
logit = np.dot(x, beta)
Prob = 1 / (1 + np.exp(-logit))
y = np.random.binomial(1, Prob, N)

In [263]:
####Stochastic Gradient Descentでロジスティック回帰を推定####
##対数尤度関数を計算する関数
def loglike(beta, x, y):
    mu = np.exp(np.dot(x, beta))   #ロジットの指数
    Prob = mu / (1 + mu)   #確率の計算
    LL = -np.sum(y*np.log(Prob) + (1-y)*np.log(1 - Prob))   #対数尤度関数の和
    return LL 

In [264]:
##対数尤度関数の勾配ベクトル
def dloglike(beta, x, y):
    mu = np.exp(np.dot(x, beta))   #ロジットの指数
    Prob = mu / (1 + mu)   #確率の計算
    dlogit = -np.sum(y[:, np.newaxis]*x - Prob[:, np.newaxis]*x, axis=0)
    return dlogit

In [265]:
##準ニュートン法でパラメータを推定
#初期値の設定
beta0 = np.array([-0.5])
beta1 = np.random.normal(0, 0.2, k-1)
beta = np.append(beta0, beta1)
k = beta.shape[0]

#準ニュートン法で対数尤度を最大化
res = optimize.minimize(loglike, beta, jac=dloglike, method='BFGS', args=(x, y), options={"gtol": 0.01, "disp":True})
LL_BFGS = -res.fun
beta_BFGS = res.x
print(np.round(-res.fun, 1))
print(np.round(res.x, 3))

Optimization terminated successfully.
         Current function value: 538077.030056
         Iterations: 27
         Function evaluations: 41
         Gradient evaluations: 41
-538077.0
[-7.150e-01 -1.463e+00 -7.400e-01  6.090e-01 -9.080e-01  1.592e+00
 -3.890e-01 -1.780e-01  7.250e-01 -9.790e-01  1.840e-01  6.040e-01
  1.181e+00  1.000e-03  1.672e+00  7.120e-01 -4.650e-01  6.030e-01]


In [268]:
##ミニバッチ確率的勾配法によるパラメータの推定
#収束判定の設定
iter = 1
display = 50
LL = 1000000000   #対数尤度の初期値
dl = 100   #EMアルゴリズムの対数尤度の差の初期値
tol = 0.1

#アルゴリズムの設定
a = 1000
eta = 1/N * a   #学習率
m = 100   #ミニバッチ数
n = np.array(N/m, dtype="int")   #ミニバッチのサンプル数

#初期値の設定
beta0 = np.array([-0.5])
beta1 = np.random.normal(0, 0.2, k-1)
beta = np.append(beta0, beta1)
LL = loglike(beta, x, y)

#ミニバッチをサンプリング
u = np.random.uniform(0, 1, N)
index = np.array(np.argsort(-u), dtype="int").reshape(n, m)

In [269]:
##パラメータを更新
while abs(dl) >= tol:
    
    #ミニバッチごとの勾配ベクトルを更新
    g = np.zeros((m, k))
    for j in range(m):
        y1 = y[index[j, ]]; x1 = x[index[j, ], ] 
        g[j, ] = dloglike(beta, x1, y1) / m

    #パラメータを更新
    beta = beta - eta*np.sum(g, axis=0)
    LL = loglike(beta, x, y)
    
    #EMアルゴリズムのパラメータを更新
    iter = iter + 1
    dl = LL1 - LL
    LL1 = LL
    if iter%display==0:
        print(LL)

647325.7295672423
614763.6258476354
594435.0931766586
581129.4053735712
572017.263610638
565521.2882788174
560728.0374272192
557086.7878572955
554252.4308286698
552000.8462298055
550181.6640071742
548690.9599312424
547454.949912342
546419.9550326514
545546.052638017
544802.9559432529
544167.2761737366
543620.6630258879
543148.5155093927
542739.0708421919
542382.7487612194
542071.6715322385
541799.3069020871
541560.1984842245
541349.7592924006
541164.1115590506
540999.9609572131
540854.496737417
540725.3116330225
540610.3370255454
540507.7900217148
540416.1299258685
540334.0221948024
540260.308405081
540193.9810914502
540134.1625614916
540080.0869784652
540031.0851472791
539986.5715490172
539946.0332556411
539909.0204242488
539875.1381240581
539844.0392922147
539815.4186491333
539789.007432098
539764.5688287027
539741.8940104728
539720.7986824561
539701.1200774082
539682.7143338474
539665.4542062074
539649.2270627811
539633.9331334835
539619.4839747956
539605.8011237802
539592.814916904

In [279]:
#推定結果の確認
print(np.round(np.array([-LL, LL_BFGS, -loglike(betat, x, y)]), 1))
pd.DataFrame(np.hstack((beta[:, np.newaxis], beta_BFGS[:, np.newaxis], betat[:, np.newaxis])))

[-539371.4 -538077.  -538084.3]


Unnamed: 0,0,1,2
0,-0.508796,-0.715323,-0.7
1,-1.408521,-1.463252,-1.469615
2,-0.763106,-0.740487,-0.734961
3,0.557541,0.608715,0.607932
4,-1.024461,-0.907771,-0.904802
5,1.615063,1.591747,1.589689
6,-0.434787,-0.388865,-0.391905
7,-0.173844,-0.177642,-0.171245
8,0.765014,0.725242,0.716868
9,-0.960786,-0.978505,-0.979668
