# Example: Dirichlet Mixture Model

In [1]:
import os
os.chdir("../..")

In [2]:
# import necessary modules

from fmvmm.mixtures.DMM_Soft import DMM_Soft
from fmvmm.mixtures.DMM_Hard import DMM_Hard
from fmvmm.utils.utils_mixture import sample_mixture_distribution
from fmvmm.distributions import dirichlet
import numpy as np

In [3]:
# Let us first generate some data from a dirichlet mixture model
np.random.seed(5)
pis = [0.3,0.5,0.2]
a1 =[5,5,5]
a2 = [7,15,225]
a3 = [50,10,4]
alphas = [[a1],[a2],[a3]]

data, label = sample_mixture_distribution(1000, dirichlet.rvs, pis,alphas)

In [4]:
# First Let us fit Soft DMM to the Data

model1 = DMM_Soft(n_clusters= 3)
model1.fit(data)

Soft DMM Fitting Done Successfully


In [5]:
# To get the MLE of the parameters:

pi_soft, alpha_soft = model1.get_params()

print("pi vaues: ", pi_soft)
print("alpha vaues: ", alpha_soft)

pi vaues:  [0.30138564 0.49999095 0.19862341]
alpha vaues:  [[5.062837461576432, 4.983848038140964, 5.019564652120155], [7.174278834982839, 15.245684383456593, 227.31932854182696], [46.507525409682835, 9.330051120538414, 3.584104722023921]]


In [6]:
# To get the standard errors:

im, se = model1.get_info_mat(method="louis")

print("standard errors: ", se)

standard errors:  [ 0.01745009  0.02236069  0.0142222   0.30881423  0.30519248  0.29719684
  0.35474074  0.72245626 11.08355662  3.71008826  0.73837566  0.26803755]


In [7]:
# To get the standard errors:

im, se = model1.get_info_mat(method="score")

print("standard errors: ", se)

standard errors:  [ 0.01742648  0.02236069  0.01418818  0.30365108  0.30596142  0.31160834
  0.32325377  0.68776915 10.26278073  3.62337514  0.71029754  0.26550514]


In [8]:
# To check classification performance keeping in mind label switching:

from fmvmm.utils.utils_mixture import clustering_metrics

clustering_metrics(label,model1.predict())

{'accuracy': 0.998, 'precision': 0.998, 'recall': 0.998, 'f_score': 0.998}

In [9]:
# To get AIC, BIC, ICL

print("AIC", model1.aic())
print("BIC", model1.bic())
print("ICL", model1.icl())

AIC -6414.134290020937
BIC -6360.148981952134
ICL -6369.235267785873


In [10]:
# We can similarly fit Hard DMM

model2 = DMM_Hard(n_clusters= 3)
model2.fit(data)

Hard DMM Fitting Done Successfully


In [11]:
# To get the MLE of the parameters:

pi_soft, alpha_soft = model2.get_params()

print("pi vaues: ", pi_soft)
print("alpha vaues: ", alpha_soft)

pi vaues:  [0.3 0.5 0.2]
alpha vaues:  [array([5.13307   , 5.07683489, 5.13265107]), array([  7.17364518,  15.24409011, 227.29315746]), array([46.16704257,  9.29445097,  3.57438085])]


In [12]:
# To get the standard errors:

im, se = model2.get_info_mat(method="louis")

print("standard errors: ", se)

standard errors:  [ 0.01732051  0.02236068  0.01414214  0.30833331  0.30256038  0.29484352
  0.3545411   0.72193125 11.07409056  3.42785225  0.70191528  0.25899917]


In [13]:
# To check classification performance keeping in mind label switching:


clustering_metrics(label,model2.predict())

{'accuracy': 0.998, 'precision': 0.998, 'recall': 0.998, 'f_score': 0.998}

In [14]:
# To get AIC, BIC, ICL

print("AIC", model2.aic())
print("BIC", model2.bic())
print("ICL", model2.icl())

AIC -6413.908448703943
BIC -6359.923140635139
ICL -6368.641537267545


## High Dimensional Case

Only for Soft DMM it is currently implemented 

In [24]:
import numpy as np

pis = [0.4762, 0.2857, 0.2381]

a1=np.random.uniform(10,20,1000)
a2=np.random.uniform(20,200,1000)
a3=np.random.uniform(10,100,1000)

alphas = [[a1],[a2],[a3]]

data, label = sample_mixture_distribution(1000, dirichlet.rvs, pis,alphas)

In [25]:
# Method: Highdimensional 

model3 = DMM_Soft(n_clusters= 3, method="highdimensional")
model3.fit(data)

Soft DMM Fitting Done Successfully


In [26]:
# To check classification performance 

clustering_metrics(label,model3.predict())

{'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f_score': 1.0}

In [27]:
#Execution Time in Seconds

model3.execution_time

5.802024841308594