# Sesión 14 B

## Modelos de Mezcla Gaussiana (GMM) 

In [None]:
from sklearn.mixture import GaussianMixture
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [None]:
import os
ruta = os.path.join('..', 'data', 'bank_customer_data.csv')
df = pd.read_csv(ruta)
df.head()

In [None]:
df.shape

In [None]:
df.labels.value_counts()

#### 1. Gaussian Mixture Model (GMM) básico

In [None]:
GaussianMixture?

* **También puedes checar la documentación oficial de sklearn sobre GMM [aquí](https://scikit-learn.org/stable/modules/mixture.html#gmm).**

In [None]:
# Modelo de mezclas Gaussianas
gmm = GaussianMixture(
    n_components=4
)

In [None]:
# Entrenamos
gmm.fit(df[['income', 'debt']])

In [None]:
# Parámetros óptimos: means_
gmm.means_

In [None]:
# Parámetros óptimos: covariances_
gmm.covariances_

In [None]:
# Parámetros óptimos: weights_
gmm.weights_

In [None]:
from scipy.stats import multivariate_normal

# Gaussianas ajustadas
X1 = multivariate_normal(
    mean=gmm.means_[0],
    cov=gmm.covariances_[0]
)
X2 = multivariate_normal(
    mean=gmm.means_[1],
    cov=gmm.covariances_[1]
)
X3 = multivariate_normal(
    mean=gmm.means_[2],
    cov=gmm.covariances_[2]
)
X4 = multivariate_normal(
    mean=gmm.means_[3],
    cov=gmm.covariances_[3]
)

In [None]:
# Predicción de probabilidades
probas = gmm.predict_proba(df[['income', 'debt']])
probas

In [None]:
# Convertir a DataFrame
df_probas = pd.DataFrame(
    probas,
    columns=[f"componente_{i}" for i in range(4)]
)

In [None]:
df_probas

In [None]:
# Datos
plt.scatter(
    x=df['income'],
    y=df['debt'], 
    c=gmm.predict_proba(df[['income', 'debt']]),
    cmap='Accent',
    alpha=0.5
)
# Añadimos etiquetas a los ejes
plt.xlabel('Ingresos mensuales (x100k MXN)')
plt.ylabel('Deuda (x100k MXN)')

# Gaussiana 1
x = np.linspace(0, 8, 100)
y = np.linspace(0, 8, 100)
X, Y = np.meshgrid(x, y)
z = X1.pdf(np.dstack((X, Y)))
# Graficamos la función de densidad
plt.contour(X, Y, z, levels=10, cmap='rainbow')

# Gaussiana 2
x = np.linspace(0, 8, 100)
y = np.linspace(0, 8, 100)
X, Y = np.meshgrid(x, y)
z = X2.pdf(np.dstack((X, Y)))
# Graficamos la función de densidad
plt.contour(X, Y, z, levels=10, cmap='rainbow')

# Gaussiana 3
x = np.linspace(0, 8, 100)
y = np.linspace(0, 8, 100)
X, Y = np.meshgrid(x, y)
z = X3.pdf(np.dstack((X, Y)))
# Graficamos la función de densidad
plt.contour(X, Y, z, levels=10, cmap='rainbow')

# Gaussiana 4
x = np.linspace(0, 8, 100)
y = np.linspace(0, 8, 100)
X, Y = np.meshgrid(x, y)
z = X4.pdf(np.dstack((X, Y)))
# Graficamos la función de densidad
plt.contour(X, Y, z, levels=10, cmap='rainbow')

---

#### 2. Probar varios números de componentes y comparar BIC

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
#BIC
ks = range(1, 10)
bics = []

for k in ks:
    gmm = GaussianMixture(n_components=k)
    gmm.fit(df[['income', 'debt']])
    bics.append(gmm.bic(df[['income', 'debt']]))

list1 = list(zip(ks, bics))
list1

In [None]:
min(list1, key=lambda x: x[1])

In [None]:
#AIC
aics = []
for k in ks:
    gmm = GaussianMixture(n_components=k)
    gmm.fit(df[['income', 'debt']])
    aics.append(gmm.aic(df[['income', 'debt']]))

list2 = list(zip(ks, aics))
list2

In [None]:
min(list2, key=lambda x: x[1])

In [None]:
# Mostrar resultados
plt.plot(ks, bics, marker='o', label="BIC", color='purple')
plt.plot(ks, aics, marker='o', label="AIC", color='green')
plt.xlabel("Número de componentes (k)")
plt.ylabel("Valor del criterio")
plt.legend()
plt.show()