In [29]:
#####Mini batch Stochastic Gradient Descent#####
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy.linalg
import itertools
import math
from scipy import sparse
from pandas.tools.plotting import scatter_matrix
from numpy.random import *
from scipy import optimize
import seaborn as sns
import time

#np.random.seed(98537)

In [30]:
####データの発生####
#データの設定
N = 1000000
k = 15

#説明変数の生成
k1 = 5; k2 = 7; k3 = 6
x1 = np.array(np.random.random(N*k1)).reshape((N, k1))
x2 = np.zeros((N, k2))
for j in range(k2):
    prob = np.random.uniform(0.25, 0.55, 1)
    x2[:, j] = np.random.binomial(1, prob, N)
x3 = np.random.multinomial(1, np.random.dirichlet(np.repeat(3.0, k3), 1).reshape(k3), N)
x3 = np.delete(x3, np.argmin(np.sum(x3, axis=0)), axis=1)   #冗長な変数の削除
x = np.concatenate((np.repeat(1, N)[:, np.newaxis], x1, x2, x3), axis=1)
k = x.shape[1]

#パラメータの生成
beta0 = np.array([-0.7])
beta1 = np.random.normal(0, 0.75, k-1)
beta = np.append(beta0, beta1)
betat = beta.copy()

#応答変数の生成
logit = np.dot(x, beta)
Prob = 1 / (1 + np.exp(-logit))
y = np.random.binomial(1, Prob, N)

In [31]:
####Stochastic Gradient Descentでロジスティック回帰を推定####
##対数尤度関数を計算する関数
def loglike(beta, x, y):
    mu = np.exp(np.dot(x, beta))   #ロジットの指数
    Prob = mu / (1 + mu)   #確率の計算
    LL = -np.sum(y*np.log(Prob) + (1-y)*np.log(1 - Prob))   #対数尤度関数の和
    return LL 

In [32]:
##対数尤度関数の勾配ベクトル
def dloglike(beta, x, y):
    mu = np.exp(np.dot(x, beta))   #ロジットの指数
    Prob = mu / (1 + mu)   #確率の計算
    dlogit = -np.sum(y[:, np.newaxis]*x - Prob[:, np.newaxis]*x, axis=0)
    return dlogit

In [33]:
##準ニュートン法でパラメータを推定
#初期値の設定
beta0 = np.array([-0.5])
beta1 = np.random.normal(0, 0.2, k-1)
beta = np.append(beta0, beta1)
k = beta.shape[0]

#準ニュートン法で対数尤度を最大化
res = optimize.minimize(loglike, beta, jac=dloglike, method='BFGS', args=(x, y), options={"gtol": 0.01, "disp":True})
LL_BFGS = -res.fun
beta_BFGS = res.x
print(np.round(-res.fun, 1))
print(np.round(res.x, 3))

Optimization terminated successfully.
         Current function value: 471136.797461
         Iterations: 31
         Function evaluations: 53
         Gradient evaluations: 53
-471136.8
[-0.704 -0.798 -0.64   0.543  0.013  0.635  1.031 -0.993 -0.803  0.301
 -1.213  0.757  0.021 -1.325 -0.404 -1.145  0.49   0.736]


In [34]:
##ミニバッチ確率的勾配法によるパラメータの推定
#収束判定の設定
iter = 1
display = 50
LL1 = 1000000000   #対数尤度の初期値
dl = 100   #EMアルゴリズムの対数尤度の差の初期値
tol = 0.1

#アルゴリズムの設定
a = 1000
eta = 1/N * a   #学習率
m = 100   #ミニバッチ数
n = np.array(N/m, dtype="int")   #ミニバッチのサンプル数

#初期値の設定
beta0 = np.array([0.0])
beta1 = np.random.normal(0, 0.2, k-1)
beta = np.append(beta0, beta1)
LL1 = loglike(beta, x, y)

#ミニバッチをサンプリング
u = np.random.uniform(0, 1, N)
index = np.array(np.argsort(-u), dtype="int").reshape(n, m)

In [35]:
##パラメータを更新
while abs(dl) >= tol:
    
    #ミニバッチごとの勾配ベクトルを更新
    g = np.zeros((m, k))
    for j in range(m):
        y1 = y[index[j, ]]; x1 = x[index[j, ], ] 
        g[j, ] = dloglike(beta, x1, y1) / m

    #パラメータを更新
    beta = beta - eta*np.sum(g, axis=0)
    LL = loglike(beta, x, y)
    
    #EMアルゴリズムのパラメータを更新
    iter = iter + 1
    dl = LL1 - LL
    LL1 = LL
    if iter%display==0:
        print(LL)

561308.2286496802
535074.1552067868
518130.918271631
506811.5691732581
498983.9917161641
493396.78974358016
489294.92010496807
486208.3208456959
483835.19845950324
481976.13367191213
480495.8298263854
479300.2609840951
478322.63694903074
477514.5575151443
476840.3026845502
476273.06550993625
475792.4161900938
475382.5631659185
475031.1402187719
474728.3469712573
474466.330773981
474238.7360071788
474040.37115910096
473866.9598719357
473714.95260603237
473581.38258689776
473463.75446669426
473359.9574136423
473268.19662881043
473186.93890017853
473114.8689503324
473050.8541585371
472993.91583641677
472943.20567485935
472897.98630377155
472857.6151477239
472821.53094191675
472789.2424102241
472760.31871184154
472734.3813436247
472711.09724750085
472690.1729209738
472671.3493668594
472654.3977485349
472639.11564093863
472625.32378674665
472612.86328255787
472601.5931324401
472591.38811635296
472582.1369292896
472573.74055384245
472566.1108345554
472559.16922714387
472552.8456995957
472547

In [36]:
#推定結果の確認
print(np.round(np.array([-LL, LL_BFGS, -loglike(betat, x, y)]), 1))
pd.DataFrame(np.hstack((beta[:, np.newaxis], beta_BFGS[:, np.newaxis], betat[:, np.newaxis])))

[-472541.3 -471136.8 -471150.5]


Unnamed: 0,0,1,2
0,-0.780772,-0.703549,-0.7
1,-0.903146,-0.797976,-0.791506
2,-0.769811,-0.640235,-0.644111
3,0.667598,0.542961,0.540471
4,-0.090256,0.012788,0.004614
5,0.648853,0.634986,0.628077
6,1.019493,1.030746,1.016203
7,-1.106236,-0.992649,-0.97767
8,-0.812082,-0.802956,-0.801414
9,0.285616,0.300522,0.292216
