## Gaussian Mixture Models

In [32]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
from sklearn.decomposition import PCA

In [33]:
from sklearn.datasets import load_iris
dataset = load_iris()
X = dataset.data
y = dataset.target

In [36]:
X[0:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [41]:
# Correlated dataset can make covariance matrix ill-conditioned
pd.DataFrame(X).corr()

Unnamed: 0,0,1,2,3
0,1.0,-0.11757,0.871754,0.817941
1,-0.11757,1.0,-0.42844,-0.366126
2,0.871754,-0.42844,1.0,0.962865
3,0.817941,-0.366126,0.962865,1.0


In [43]:
# Let's perform PCA to remove linear dependence.
dr = PCA(3)
dr.fit(X)
X_new = dr.transform(X)

In [49]:
print(dr.explained_variance_ratio_)
pd.DataFrame(X_new).corr()


[0.92461872 0.05306648 0.01710261]


Unnamed: 0,0,1,2
0,1.0,1.366725e-15,-8.603643e-16
1,1.366725e-15,1.0,-1.507649e-16
2,-8.603643e-16,-1.507649e-16,1.0


In [63]:
x = np.array([[2, 3],
              [1, 2]])

In [66]:
class GaussianMixtureModel:
    def __init__(self,
                 num_clusters: int = 1,
                 tolerance: float = 1e-5,
                 num_iters: int = 10,
                 ):
        self.num_clusters = num_clusters
        self.tolerance = tolerance
        self.num_iters = num_iters
        self.X = None
    
    def gaussian(self, mu, cov):
        n, d = self.X.shape
        diff = (self.X - mu).T
        gauss = np.diagonal(1 / ((2 * np.pi) ** (n / 2) * np.linalg.det(cov) ** 0.5) * np.exp(-0.5 * np.dot(np.dot(diff.T, np.linalg.inv(cov)), diff))).reshape(-1, 1)
        return gauss.squeeze()

    def initial_means_covs(self):
        n, d = self.X.shape
        # Mean
        random_idx = np.random.permutation(n)
        selected_index = random_idx[0:self.num_clusters]
        Xmean_init = self.X[selected_index]
        # Covs
        covs = np.zeros((self.num_clusters, d, d))
        for cluster in range(self.num_clusters):
            covs[cluster] = self.Xcov
        return Xmean_init, self.clip_cov(covs)
    
    def clip_cov(self, x: np.ndarray):
        d = x.shape[1]
        return x + np.eye(d, d)
    
    def compute_Xmean_Xcov(self):
        n = self.X.shape[0]
        Xmean = np.mean(self.X, axis=0)
        Xcentred = self.X - Xmean
        Xcov = (1/n) * (Xcentred.T@Xcentred)
        print(Xcov)
        return Xmean, self.clip_cov(Xcov)
    
    def initialize_phis(self, num_clusters):
        """ Set all component distribution prior estimates 
        to the uniform distribution
        """
        return np.array([1/num_clusters for _ in range(num_clusters)])
    
    def initialize_cluster(self):
        n = X.shape[0]
        cluster_w = np.ones((n, self.num_clusters)) * 1/self.num_clusters
        return cluster_w
    
    def multivariate_pdf(self, cluster: int):
        var = multivariate_normal(mean=self.means[cluster],
                                  cov=self.covs[cluster],
                                  seed=42,
                                 )
        return var.pdf(self.X)
    
    def compute_pXi_Clust_i(self):
        for cluster in range(self.num_clusters):
            self.pXi_Clust_i[:, cluster] = self.gaussian(self.means[cluster],
                                                         self.covs[cluster])
            # self.multivariate_pdf(cluster)
        return None
            
    def compute_SigmaPXi_Zi_n_phi(self):
        return np.sum(self.pXi_Clust_i * self.phis, axis=1)
    
    def compute_phis(self) -> None:
        self.phis = np.mean(self.cluster_weights, axis=0)
        return None
        
    def compute_means(self) -> None:
        for cluster in range(self.num_clusters): 
            self.means[cluster] = np.sum(self.X * self.cluster_weights[:, cluster].reshape(-1, 1)) \
                                / np.sum(self.cluster_weights[:, cluster])
        return None
#     def comp_conv(self):
#         for j in range(self.X.shape[0]):
#             diff = (self.X[j] - mu_k).reshape(-1, 1)
#             cov_k += gamma_nk[j] * np.dot(diff, diff.T)
            
#         cov_k /= N_k
    def compute_covs(self) -> None:
        d = self.X.shape[1]
        for cluster in range(self.num_clusters):
            mean_ = self.means[cluster]
            Xcentred = self.X - mean_
            Xcov = (Xcentred.T@(self.cluster_weights[:, cluster].reshape(-1, 1)*Xcentred))
            self.covs[cluster] = Xcov + np.eye(d, d) # self.tolerance * np.eye(d, d)
            print(self.covs[cluster])
        return None
    
    def compute_likelihood(self) -> float:
        l = self.likelihood + 0.1
        return l
    
    def fit(self, X: pd.DataFrame):
        n, d = X.shape
        self.X = X.values.copy()
        self.Xmean, self.Xcov = self.compute_Xmean_Xcov()
        
        # Initialize mean and covariance randomly
        self.means, self.covs = self.initial_means_covs()        
        
        # Initialize clusters
        self.cluster_weights = self.initialize_cluster()
        self.phis = self.initialize_phis(self.num_clusters)
        
        # Probability distributions
        self.pXi_Clust_i = np.zeros((n, self.num_clusters))
        self.Sigma_PXi_Zi_n_phi = np.zeros(n)
        
        iter = 0
        mean_norm = 1
        self.prev_likelihood = 0
        self.likelihood = 0
        
#         while (iter < self.num_iters) or \
#               (np.linalg.norm(self.prev_likelihood - self.likelihood) <= self.tolerance):
        for i in range(2):
            # Perform Expectation(E) step
            # Compute probability of data given the cluster
            self.compute_pXi_Clust_i()
            self.compute_SigmaPXi_Zi_n_phi()
            # Compute probability of cluster           
            for cluster in range(self.num_clusters):
                #print(self.pXi_Clust_i)
                #print(self.Sigma_PXi_Zi_n_phi.shape)
                #print('phis:', self.phis[cluster])
                self.cluster_weights[:, cluster] = self.pXi_Clust_i[:, cluster] \
                                                 * self.phis[cluster] \
                                                 / self.Sigma_PXi_Zi_n_phi
            #print(self.cluster_weights)
            
            # Perform Maximization(M) Step
            self.compute_phis()
            self.compute_means()
            
            self.compute_covs()
            
            # TODO: compute log-likelihood
            self.likelihood = self.compute_likelihood()
            print(f'Log-likelihood for this {iter} iteration is {self.likelihood}')
            self.prev_likelihood = self.likelihood
                                    
            iter += 1

In [67]:
gmm = GaussianMixtureModel(num_clusters=3)
gmm.fit(pd.DataFrame(X_new))

[[ 4.20005343e+00  1.37519625e-15 -4.91458726e-16]
 [ 1.37519625e-15  2.41052943e-01 -2.06316445e-17]
 [-4.91458726e-16 -2.06316445e-17  7.76881034e-02]]
[[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
[[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
[[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
Log-likelihood for this 0 iteration is 0.1
[[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
[[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
[[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
Log-likelihood for this 1 iteration is 0.2




In [None]:
from scipy.stats import multivariate_normal
var = multivariate_normal(mean=[0,0], cov=[[1,0],[0,1]], seed=42)
var.pdf([[0.0,0.0],
         [0.0, 0.0]])