# Example: FMVMM (Mixtures of Identical and Non-Identical Distributions)

In [1]:
import os
os.chdir("../..")

In [2]:
# Let us first import necessary modules

from fmvmm.mixtures.FMVMM import fmvmm
from sklearn import datasets
from fmvmm.utils.utils_mixture import clustering_metrics 

In [3]:
# LEt us first get iris data

iris = datasets.load_iris()

df = iris.data

In [4]:
# We can  fit mixtures of all possible combinations of identical and non-identical distributions.
#However, that might take quite an amount of time.

# Let us try all possible combinations of the following distributions: 
# Multivariate Generalized Hyperbolic,
# Skew Normal
# Multivariate Normal Inverse Gaussian
# Multivariate T
# Multivariate Skew T

dist = ["mghp", "mvsn", "mnig","mvt","mvst"]

#If you want to use the list of all the distributions do not provide list_of_dist argument.
#By default it will use the full list of distributions

model = fmvmm(n_clusters=3, list_of_dist = dist)

model.fit(df)

distribution fitted ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic']
distribution fitted ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_skewnorm']
distribution fitted ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_norminvgauss']
distribution fitted ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_t']
distribution fitted ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_skew_t_smsn']
distribution fitted ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_skewnorm', 'fmvmm.distributions.multivariate_skewnorm']
dist

In [5]:
# Find the best model based on bic

model.best_mixture()

['fmvmm.distributions.multivariate_t',
 'fmvmm.distributions.multivariate_t',
 'fmvmm.distributions.multivariate_t']

In [6]:
# Get BIC of all the fitted models

model.bic()

[711.7076492774013,
 697.0784436918127,
 706.6009114771585,
 681.5341597917555,
 719.3091660401305,
 669.4740924392906,
 678.9965601353692,
 653.9298084500243,
 691.7048089330975,
 701.4893655163868,
 676.4226138309855,
 714.1976216909445,
 649.5479798218937,
 687.3229924763755,
 829.0889895356158,
 630.145472719892,
 649.9390287491451,
 624.7352466964525,
 710.1861816987816,
 672.4318516614228,
 647.2280696086405,
 732.6789905844719,
 620.3534457651027,
 705.804359387904,
 847.5705067411286,
 695.6094167895112,
 670.5368777398432,
 708.4485915396067,
 643.6622435754593,
 681.5739596738879,
 823.3401379214614,
 619.0275715022165,
 656.9914241550224,
 798.7574219483016,
 982.4183693203009]

In [7]:
# get information matrix and standard errors for all the fitted models

ims, ses = model.get_info_mat()

In [8]:
# We can see Out of the fitted models which were successfull

model.worked()

Distribution Combinations That Could Be Fitted:
0 ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic']
1 ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_skewnorm']
2 ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_norminvgauss']
3 ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_t']
4 ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_skew_t_smsn']
5 ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.distributions.multivariate_skewnorm', 'fmvmm.distributions.multivariate_skewnorm']
6 ['fmvmm.distributions.multivariate_genhyperbolic', 'fmvmm.dist

In [9]:
# We can see Out of the fitted models which were not successfull

model.not_worked()

Distribution Combinations That Could Not Be Fitted:


In [10]:
# Luckily all models were successfull.

# We can get the MLEs for all the models

pi_list, alpha_list = model.get_params()

In [11]:
# let us now get the MLEs of the best model

pi_best, alpha_best = model.best_params()

print("pi: ", pi_best)
print("alpha: ", alpha_best)

pi:  [0.42666667 0.33333333 0.24      ]
alpha:  [(array([5.96086899, 2.8376423 , 4.05928003, 1.1618888 ]), array([[0.23305969, 0.07155415, 0.13199134, 0.02847889],
       [0.07155415, 0.09287426, 0.02926368, 0.00686498],
       [0.13199134, 0.02926368, 0.3799496 , 0.21610546],
       [0.02847889, 0.00686498, 0.21610546, 0.16233063]]), array([-1.2735831 , -2.58377104,  4.21371348,  5.05288398])), (array([4.92949575, 3.31313483, 1.39171769, 0.13216361]), array([[0.1276169 , 0.10601967, 0.0214049 , 0.01883297],
       [0.10601967, 0.15401001, 0.01953699, 0.02218784],
       [0.0214049 , 0.01953699, 0.0344956 , 0.01394868],
       [0.01883297, 0.02218784, 0.01394868, 0.02384272]]), array([0.32199196, 1.23594201, 1.19854539, 3.84975236])), (array([6.37578569, 2.93361081, 5.40024292, 2.18999692]), array([[ 0.47779152,  0.09320541,  0.36423354, -0.03938397],
       [ 0.09320541,  0.10631035,  0.06615481,  0.02181322],
       [ 0.36423354,  0.06615481,  0.35731247, -0.0319485 ],
       [-0.039

In [12]:
# Check the clustering performance

from fmvmm.utils.utils_mixture import clustering_metrics

# To get the predictions for all the models

cluster_list = model.predict()

# To get the predictions for the best model

best_cluster = model.best_predict()

clustering_metrics(iris.target, best_cluster)

{'accuracy': 0.9066666666666666,
 'precision': 0.9270833333333334,
 'recall': 0.9066666666666666,
 'f_score': 0.9048007615939072}

In [15]:
# Get the top performing models with the BIC values

model.get_top_mixtures()

[(619.0275715022165,
  ['fmvmm.distributions.multivariate_t',
   'fmvmm.distributions.multivariate_t',
   'fmvmm.distributions.multivariate_t']),
 (620.3534457651027,
  ['fmvmm.distributions.multivariate_skewnorm',
   'fmvmm.distributions.multivariate_t',
   'fmvmm.distributions.multivariate_t']),
 (624.7352466964525,
  ['fmvmm.distributions.multivariate_skewnorm',
   'fmvmm.distributions.multivariate_skewnorm',
   'fmvmm.distributions.multivariate_t']),
 (630.145472719892,
  ['fmvmm.distributions.multivariate_skewnorm',
   'fmvmm.distributions.multivariate_skewnorm',
   'fmvmm.distributions.multivariate_skewnorm']),
 (643.6622435754593,
  ['fmvmm.distributions.multivariate_norminvgauss',
   'fmvmm.distributions.multivariate_t',
   'fmvmm.distributions.multivariate_t']),
 (647.2280696086405,
  ['fmvmm.distributions.multivariate_skewnorm',
   'fmvmm.distributions.multivariate_norminvgauss',
   'fmvmm.distributions.multivariate_t']),
 (649.5479798218937,
  ['fmvmm.distributions.multivari