In [1]:
import pandas as pd
import numpy as np
import scipy as sp

In [2]:
import seaborn as sns
from matplotlib import pyplot as plt

In [114]:
class UGMM(object):
    
    def __init__(self, X, K=1):
        self.X = X
        self.K = K
        self.N = self.X.shape[0]

    def _init(self):
#         self.alpha=np.random.rand(self.K,1)
        self.m_prior= np.zeros((self.K,1))
        self.m=self.m_prior.copy()
        self.beta_prior=np.random.rand(self.K,1)
        self.beta=self.beta_prior.copy()
        self.nu_prior=np.ones((self.K,1))*np.random.randint(1,10)
        self.nu= self.nu_prior.copy()
        self.W_prior=np.ones((self.K,1))*np.random.randint(1,10)
        self.W=self.W_prior.copy()
        self.alpha_prior=np.ones((self.K,1))*1
        self.alpha=self.alpha_prior.copy()
        self.r=np.zeros((self.N,self.K))
        self.x_nebla=np.zeros((self.K,1))
        self.Nk=np.zeros((self.K,1))
        self.S=np.zeros((self.K,1))
        self.pi_nebla=np.zeros((self.K,1))
        self.lamda_nebla=np.zeros((self.K,1))
        self.E_mu_lamda=np.zeros((self.N,self.K))
        
        

        print('Init mean')
        print(self.m)
        

    def E_Step(self):
        for n in range(self.N):
            for k in range(self.K):
                self.pi_nebla[k]=np.exp(sp.special.digamma(self.alpha[k])-sp.special.digamma(self.alpha.sum()))
                self.lamda_nebla[k]=np.exp(sp.special.digamma(self.nu[k]/2, out=None)+np.log(2)+np.log(self.W[k]))
                self.E_mu_lamda[n][k]=(self.beta[k]**(-1))+self.nu[k]*self.W[k]*(self.X[n]-self.m[k])**2
                self.r[n][k]=self.pi_nebla[k]*(self.lamda_nebla[k]**0.5)*(np.exp(-0.5*self.E_mu_lamda[n][k]))
        row_sums = self.r.sum(axis=1)
        self.r= self.r / row_sums[:, np.newaxis]
        
        
    def fit(self, max_iter=10000, tol=1e-12):
        self._init()
#         self.r_values = [self.r_elbo()]
        self.m_history = [self.m.copy()]
        self.E_Step()
#         self.s2_history = [self.s2]
        for iter_ in range(1, max_iter+1):
#             print('before')
#             print(self.m_history)
            self.M_Step()
#             print('after')
#             print(self.m_history)
            self.m_history.append(self.m.copy())
            if iter_ % 25 == 0:
                print(iter_, self.m_history[iter_])
                
            if np.linalg.norm(self.m_history[-1]-self.m_history[-2],2) <= tol:
                print('VI converged with ll %.3f at iteration %d'%(self.m_history[-1][0],iter_))
                break
            self.E_Step()
        if iter_ == max_iter:
            print('VI ended with ll %.3f'%(self.m_history[-1][0]))


    def M_Step(self):
        self._update_Nk()
        self._update_x_nebla()
        self._update_S()
        self._update_alpha()
        self._update_W()
        self._update_beta()
        self._update_nu()
        self._update_m()

    def _update_Nk(self):
        for k in range(self.K):
            self.Nk[k]=self.r[:,k].sum()
            
    def _update_x_nebla(self):
        for k in range(self.K):
            self.x_nebla[k]=0
            for n in range(self.N):
#                 print(self.Nk)
                self.x_nebla[k]+=self.r[n][k]*self.X[n]/self.Nk[k]
    
    def _update_S(self):
        for k in range(self.K):
            self.S[k]=0
            for n in range(self.N):
                self.S[k]+=(self.r[n][k]*(self.X[n]-self.x_nebla[k])**(2))/self.Nk[k]
                
    def _update_alpha(self):
        for k in range(self.K):
            self.alpha[k]=self.alpha_prior[k]+self.Nk[k]
            
    def _update_beta(self):
        for k in range(self.K):
            self.beta[k]=self.beta_prior[k]+self.Nk[k]
        
    def _update_m(self):
        for k in range(self.K):
            self.m[k]=(self.beta_prior[k]*self.m_prior[k]+self.Nk[k]*self.x_nebla[k])/self.beta[k]
#         print('achha')
#         print(self.m)
    
    
    def _update_W(self):
        for k in range(self.K):
            self.W[k]=((1/self.W_prior[k])+self.Nk[k]*self.S[k]+(self.beta_prior[k]*self.Nk[k]/(self.beta_prior[k]+self.Nk[k]))*(self.x_nebla[k]-self.m_prior[k])**2)**(-1)
        
    
    def _update_nu(self):
        for k in range(self.K):
            self.nu[k]=self.nu_prior[k]+self.Nk[k]
    
#     def _calculate_rnk
        

In [115]:
data=pd.read_csv('data2.txt',header=None)

In [116]:
X =np.array(data.iloc[0,0])
for i in range(1,1000):
    X= np.append(X, data.iloc[i,0])
    

In [118]:
ugmm = UGMM(X, 3)
ugmm.fit()

Init mean
[[0.]
 [0.]
 [0.]]
25 [[0.45063513]
 [0.45170145]
 [0.45076113]]
50 [[0.45033401]
 [0.45227966]
 [0.45058528]]
75 [[0.44999175]
 [0.45292814]
 [0.45039583]]
100 [[0.44959268]
 [0.453668  ]
 [0.45019042]]
125 [[0.44911583]
 [0.45452488]
 [0.4499677 ]]
150 [[0.44853275]
 [0.45552909]
 [0.44972893]]
175 [[0.44780399]
 [0.45671537]
 [0.44948097]]
200 [[0.44687282]
 [0.45812251]
 [0.44924148]]
225 [[0.44565352]
 [0.45979341]
 [0.44904849]]
250 [[0.44400765]
 [0.46177774]
 [0.448978  ]]
275 [[0.44169271]
 [0.46414303]
 [0.44917828]]
300 [[0.43823915]
 [0.46701178]
 [0.44994227]]
325 [[0.43260646]
 [0.4706882 ]
 [0.45188329]]
350 [[0.42191833]
 [0.47618341]
 [0.45649035]]
375 [[0.39332759]
 [0.48873452]
 [0.46924612]]
400 [[0.2920144 ]
 [0.53961975]
 [0.51222009]]
425 [[0.29326582]
 [0.55567982]
 [0.50249401]]
450 [[0.29681347]
 [0.56279803]
 [0.48487078]]
475 [[0.29913909]
 [0.56419595]
 [0.47063779]]
500 [[0.30048538]
 [0.56323822]
 [0.45910038]]
525 [[0.30133594]
 [0.56168158]
 [

In [110]:
mean=ugmm.beta

In [111]:
mean

array([[586.22427203],
       [  0.67150057],
       [414.22523161]])