In [1]:
#-*- coding:utf-8 -*-
import numpy as np
import numpy.random as rd
import scipy as sp
from scipy import stats as st

import matplotlib.pyplot as plt

In [2]:
seed = 0
n = [200, 150, 150] #各データ数
K = 3 #潜在変数の数
D = 2 #次元

#mu:D次元
mu_true = np.array(
    [[0.2, 0.5],
     [1.2, 0.5],
     [2.0, 0.5]])

#sigma: D×D次元
sigma_true = np.array(
    [[[0.1,  0.085], [0.085, 0.1]],
     [[0.1, -0.085], [-0.085, 0.1]],
     [[0.1,  0.085], [0.085, 0.1]]
    ])

rd.seed(seed)
org_data = None
for i in range(K):
    #k_0 に属するデータを生成
    if org_data is None:
        org_data = np.c_[st.multivariate_normal.rvs(mean=mu_true[i], cov=sigma_true[i], size=n[i]), np.ones(n[i])*i]
        
    #k_1, k_2に属するデータを生成し、結合する
    else:
        tmp_data = np.c_[st.multivariate_normal.rvs(mean=mu_true[i], cov=sigma_true[i], size=n[i]), np.ones(n[i])*i]
        org_data = np.r_[org_data, tmp_data]

print(org_data)

[[-3.71170206e-01 -1.86094478e-03  0.00000000e+00]
 [-2.91738581e-01  3.96395507e-01  0.00000000e+00]
 [-2.83360841e-01 -1.52630335e-01  0.00000000e+00]
 ...
 [ 1.87631177e+00  4.47823093e-01  2.00000000e+00]
 [ 2.05218338e+00  5.68497961e-01  2.00000000e+00]
 [ 2.38004583e+00  8.18018654e-01  2.00000000e+00]]


In [3]:
data = org_data[:,:2]

In [4]:
#Given
T = len(data)
alpha = 1.0
r = 1 / T
k = K #潜在変数の数
D = 2 #次元

In [5]:
#初期化
rd.seed(seed)

#空のパラメータを用意
pi = np.zeros((T, k))
mu = np.zeros((T, k, D))
mu_ = mu.copy()
sigma = np.zeros((T, k, D, D))
sigma_ = sigma.copy()

#各パラメータの初期化
for i in range(k):
    pi[0, i] = 1 / k #piの初期化
    mu[0, i] = rd.uniform(low=0, high=1, size=D) #muの初期化(一様分布)
    mu_[0, i] = mu[0, i] * pi[0, i, np.newaxis] #mu_の初期化(mu*piで計算)
    sigma[0, i] = np.eye(D) #sigmaの初期化(単位行列)
    sigma_[0, i] = np.dot(mu[0, i][:,np.newaxis], mu[0, i][:,np.newaxis].T) * pi[0, i] #sigma_初期化

print(pi[0])
print(mu[0])
print(mu_[0])
print(sigma[0])
print(sigma_[0])

t = 1

[0.33333333 0.33333333 0.33333333]
[[0.5488135  0.71518937]
 [0.60276338 0.54488318]
 [0.4236548  0.64589411]]
[[0.18293783 0.23839646]
 [0.20092113 0.18162773]
 [0.14121827 0.21529804]]
[[[1. 0.]
  [0. 1.]]

 [[1. 0.]
  [0. 1.]]

 [[1. 0.]
  [0. 1.]]]
[[[0.10039875 0.13083519]
  [0.13083519 0.17049861]]

 [[0.1211079  0.10947854]
  [0.10947854 0.09896589]]

 [[0.0598278  0.09121205]
  [0.09121205 0.13905974]]]


In [39]:
#E-Step
def E_step(d, t, mu, sigma, pi, alpha, r, k):
    '''Eステップ(負担率gammaから各パラメータ_を求める)'''
    
    #pi*p(y|mu,sigma)を計算する
    pi_prob = np.array([pi[t-1, i]*st.multivariate_normal.pdf(d, mu[t-1,i], sigma[t-1, i]) for i in range(k)])
    print(pi_prob)
    
    gamma = (1-alpha*r) * pi_prob / np.sum(pi_prob) + (alpha*r / k)
    
    #piを計算する
    pi[t] = (1-r)*pi[t-1] + r*gamma
    print(pi[t]) #ok
    
    #mu_を計算する
    mu_[t] = (1-r)*mu_[t-1] + r*gamma[:,np.newaxis]*d
    print(mu_[t]) #ok
    
    #sigma_を計算する
    for i in range(k):
        sigma_[t,i] = (1-r)*sigma_[t-1,i] + r*gamma[i]*np.dot(d[:,np.newaxis], d[:,np.newaxis].T)         
    

In [40]:
E_step(data[0], 1, mu, sigma, pi, alpha, r, k)

[0.02686973 0.02843236 0.03136207]
[0.33328685 0.33332284 0.33339031]
[[0.18234177 0.23791851]
 [0.20027573 0.18126325]
 [0.14066723 0.21486609]]


In [48]:
#M-Step
def M_step(d, t, r, k):
    '''Mステップ(gammaを使って、各パラメータを更新する)'''
    
    #muを計算する
    mu[t] = mu_[t] / pi[t][:,np.newaxis]
    print(mu[t]) #ok
    
    #sigmaを計算する
    for i in range(k):
        sigma[t,i] = sigma_[t,i]/pi[t,i] - np.dot(mu[t,i][:,np.newaxis],mu[t,i][:,np.newaxis].T)
    
    print(sigma[t]) #ok
    
    

In [49]:
M_step(data[0], 1, r, k)

[[0.54710159 0.71385508]
 [0.60084611 0.54380688]
 [0.42192958 0.64448812]]
[[[0.001572   0.00122524]
  [0.00122524 0.00095497]]

 [[0.00186361 0.00104619]
  [0.00104619 0.00058731]]

 [[0.00136827 0.0011151 ]
  [0.0011151  0.00090877]]]


In [50]:
mu_

array([[[0.18293783, 0.23839646],
        [0.20092113, 0.18162773],
        [0.14121827, 0.21529804]],

       [[0.18234177, 0.23791851],
        [0.20027573, 0.18126325],
        [0.14066723, 0.21486609]],

       [[0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       ...,

       [[0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       [[0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       [[0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]]])

In [51]:
#パラメータ更新:
while t <= T:
    y_t = data[t-1]
    print(y_t)
    
    #E-step
    gamma = E_step(y_t, t, mu, sigma, pi, alpha, r, k)
    print(gamma)
    
    #M-Step
    M_step(y_t, t, mu, sigma, pi, r, k, gamma)
    
    t += 1

[-0.29173858  0.39639551]


LinAlgError: singular matrix