# Gaussian Mixtures

In this section is implemented a Gaussian mixtures model using diagonal covariance matrix

### Import

In [None]:
import pandas as pd
from numpy import ndarray, array
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.mixture._base import BaseMixture
from src.rand_index_cmpt import randIndexComputing

### K-clusters tuning

In [None]:
# tuning process of k gaussian mixtures
def tuning(X: pd.DataFrame, y: ndarray, returnBestModel: bool = False) -> tuple:
    # initialize the possible candidates (number of clusters)
    nClusters: list[int] = [2, 5, 9, 20, 35, 55, 75, 100, 150]

    # generate k gaussian mixtures each of which with a different number of clusters
    gaussianMixtures: list[BaseMixture] = [
        GaussianMixture(
            n_components=n_classes, covariance_type="diag", random_state=1629
        ) for n_classes in nClusters
    ]

    randomIndexes: ndarray = array([])

    for idx in range(0, len(gaussianMixtures)):
        # train the model
        gaussianMixtures[idx].fit(X)

        # see how the model doing with the training data
        prediction = gaussianMixtures[idx].predict(X)

        # compare the model prediction with the actual prediction and store it in a collection
        randomIndexes = np.append(randomIndexes, randIndexComputing(y, prediction))

    # find the index related to the max values
    bestIdx = np.argmax(randomIndexes)

    # find the right number of clusters
    bestNClusters: int = nClusters[bestIdx]

    # find the best model
    bestModel: BaseMixture = gaussianMixtures[bestIdx]

    # return a tuple containing the best number of clusters, all the random indexes and if required the best model
    if not returnBestModel:
        return bestNClusters, randomIndexes
    else:
        return bestNClusters, randomIndexes, bestModel