In [1]:
#####Mini batch Stochastic Gradient Descent#####
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy.linalg
import itertools
import math
from scipy import sparse
from numpy.random import *
from scipy import optimize
import seaborn as sns
import time

#np.random.seed(98537)

In [2]:
####データの発生####
#データの設定
N = 1000000
k = 15

#説明変数の生成
k1 = 5; k2 = 7; k3 = 6
x1 = np.array(np.random.random(N*k1)).reshape((N, k1))
x2 = np.zeros((N, k2))
for j in range(k2):
    prob = np.random.uniform(0.25, 0.55, 1)
    x2[:, j] = np.random.binomial(1, prob, N)
x3 = np.random.multinomial(1, np.random.dirichlet(np.repeat(3.0, k3), 1).reshape(k3), N)
x3 = np.delete(x3, np.argmin(np.sum(x3, axis=0)), axis=1)   #冗長な変数の削除
x = np.concatenate((np.repeat(1, N)[:, np.newaxis], x1, x2, x3), axis=1)
k = x.shape[1]

#パラメータの生成
beta0 = np.array([-0.7])
beta1 = np.random.normal(0, 0.75, k-1)
beta = np.append(beta0, beta1)
betat = beta.copy()

#応答変数の生成
logit = np.dot(x, beta)
Prob = 1 / (1 + np.exp(-logit))
y = np.random.binomial(1, Prob, N)

In [3]:
####Stochastic Gradient Descentでロジスティック回帰を推定####
##対数尤度関数を計算する関数
def loglike(beta, x, y):
    mu = np.exp(np.dot(x, beta))   #ロジットの指数
    Prob = mu / (1 + mu)   #確率の計算
    LL = -np.sum(y*np.log(Prob) + (1-y)*np.log(1 - Prob))   #対数尤度関数の和
    return LL 

In [4]:
##対数尤度関数の勾配ベクトル
def dloglike(beta, x, y):
    mu = np.exp(np.dot(x, beta))   #ロジットの指数
    Prob = mu / (1 + mu)   #確率の計算
    dlogit = -np.sum(y[:, np.newaxis]*x - Prob[:, np.newaxis]*x, axis=0)
    return dlogit

In [5]:
##準ニュートン法でパラメータを推定
#初期値の設定
beta0 = np.array([-0.5])
beta1 = np.random.normal(0, 0.2, k-1)
beta = np.append(beta0, beta1)
k = beta.shape[0]

#準ニュートン法で対数尤度を最大化
res = optimize.minimize(loglike, beta, jac=dloglike, method='BFGS', args=(x, y), options={"gtol": 0.01, "disp":True})
LL_BFGS = -res.fun
beta_BFGS = res.x
print(np.round(-res.fun, 1))
print(np.round(res.x, 3))

Optimization terminated successfully.
         Current function value: 520612.741394
         Iterations: 28
         Function evaluations: 49
         Gradient evaluations: 49
-520612.7
[-0.706  0.091  0.082 -0.762 -1.286  1.516 -0.367  0.948 -0.431  0.545
 -1.457 -0.463 -0.228  0.76   0.933  1.076 -0.333  1.552]


In [6]:
##ミニバッチ確率的勾配法によるパラメータの推定
#収束判定の設定
iter = 1
display = 50
LL1 = 1000000000   #対数尤度の初期値
dl = 100   #EMアルゴリズムの対数尤度の差の初期値
tol = 0.1

#アルゴリズムの設定
a = 1000
eta = 1/N * a   #学習率
m = 100   #ミニバッチ数
n = np.array(N/m, dtype="int")   #ミニバッチのサンプル数

#初期値の設定
beta0 = np.array([0.0])
beta1 = np.random.normal(0, 0.2, k-1)
beta = np.append(beta0, beta1)
LL1 = loglike(beta, x, y)

#ミニバッチをサンプリング
u = np.random.uniform(0, 1, N)
index = np.array(np.argsort(-u), dtype="int").reshape(n, m)

In [7]:
##パラメータを更新
while abs(dl) >= tol:
    
    #ミニバッチごとの勾配ベクトルを更新
    g = np.zeros((m, k))
    for j in range(m):
        y1 = y[index[j, ]]; x1 = x[index[j, ], ] 
        g[j, ] = dloglike(beta, x1, y1) / m

    #パラメータを更新
    beta = beta - eta*np.sum(g, axis=0)
    LL = loglike(beta, x, y)
    
    #EMアルゴリズムのパラメータを更新
    iter = iter + 1
    dl = LL1 - LL
    LL1 = LL
    if iter%display==0:
        print(LL)

600243.6108509474
580292.1009192829
566964.0806103527
557641.8618424407
550865.9270838078
545778.9347145004
541856.1709992508
538763.7407409309
536281.3944482298
534259.0041361933
532591.1494065903
531201.7885377306
530034.7313233347
529047.5519597359
528207.5991034481
527489.3174794802
526872.4092689792
526340.545111642
525880.4422865199
525481.1930083358
525133.7662837099
524830.6323580949
524565.4752365326
524332.9695182411
524128.60492975847
523948.54675875657
523789.52368605277
523648.7367995553
523523.78518174315
523412.6046084237
523313.41672428465
523224.6866663718
523145.08755529835
523073.4706104883
523008.83990105893
522950.3309398814
522897.19248031196
522848.7709940971
522804.4974030778
522763.8757123701
522726.47325302195
522691.91229098506
522659.86279907386
522630.0362211983
522602.1800850703
522576.0733418751
522551.52232992963
522528.35727484705
522506.4292517095
522485.60754566314
522465.7773565714
522446.83780115366
522428.7001726369
522411.2864235725
522394.5278422

In [36]:
#推定結果の確認
print(np.round(np.array([-LL, LL_BFGS, -loglike(betat, x, y)]), 1))
pd.DataFrame(np.hstack((beta[:, np.newaxis], beta_BFGS[:, np.newaxis], betat[:, np.newaxis])))

[-472541.3 -471136.8 -471150.5]


Unnamed: 0,0,1,2
0,-0.780772,-0.703549,-0.7
1,-0.903146,-0.797976,-0.791506
2,-0.769811,-0.640235,-0.644111
3,0.667598,0.542961,0.540471
4,-0.090256,0.012788,0.004614
5,0.648853,0.634986,0.628077
6,1.019493,1.030746,1.016203
7,-1.106236,-0.992649,-0.97767
8,-0.812082,-0.802956,-0.801414
9,0.285616,0.300522,0.292216
