In [1]:
import numpy as np
import math
from scipy.stats import multivariate_normal
class igmm:
    def __init__(self, X, dim, sigma_ini, tau):
        self.X = X
        self.dim = dim
        self.pi = list()
        self.mu = list()
        self.C = list()
        self.sp = list()
        self.sigma_ini = sigma_ini
        self.tau = tau
        
    def posterior_prob(self, x, component):
        mvn = multivariate_normal(mean = np.array(self.mu[component][0]), cov = np.array(self.C[component]))
        pdf_value = mvn.pdf(x)
        return self.pi[component]*pdf_value
    
    def createnewornot(self, x):
        createnew = True
        for i in range(len(self.pi)):
            novelty_criterion = self.tau/(((2*math.pi)**(dim/2))*math.sqrt(np.linalg.det(np.array(self.C[i]))))
            #print(f"Is this not a vector of length 2: {self.mu[i]}\n Dimensions: {np.shape(self.mu[i])}")
            mvn = multivariate_normal(mean = self.mu[i][0], cov = np.array(self.C[i]))
            pdf_value = mvn.pdf(x)
            if(pdf_value >= novelty_criterion):
                createnew = False
                return createnew
        return createnew
    
    def update(self, x):
        createnew = self.createnewornot(x)
        if(createnew == True):
            self.mu.append(np.array(x))
            self.C.append((self.sigma_ini**2)*np.eye(dim))
            self.sp.append(1)
            
            total_sum = np.sum(self.sp)
            for j in range(len(self.sp)-1):
                self.pi[j] = self.sp[j]/total_sum
            self.pi.append(self.sp[len(self.sp)-1]/total_sum)
        else:
            for j in range(len(self.sp)):
                posterior_value = self.posterior_prob(x,j)
                self.sp[j] += posterior_value
                prev_mu = self.mu[j]
                self.mu[j] = self.mu[j] + (posterior_value/self.sp[j])*(np.array(x)-self.mu[j])
                self.C[j] = self.C[j] - np.matmul(self.mu[j]-prev_mu,(self.mu[j]-prev_mu).T) + (posterior_value/self.sp[j])*(np.matmul((np.array(x)-self.mu[j]),(np.array(x)-self.mu[j]).T)-self.C[j])
            total_sum = np.sum(self.sp)
            for j in range(len(self.pi)):
                self.pi[j] = self.sp[j]/total_sum
        return
    
    def fit(self):
        iter = 1
        for x in self.X:
            self.update(x)
            print(f"\n\n\033[1mFor next sample {iter}:\033[0m")
            print_pi = np.array(self.pi)
            print_mean = np.array(self.mu)
            print_cov = np.array(self.C)
            print_sp = np.array(self.sp)
            for j in range(len(self.mu)):
                print(f"\033[1mComponent #{j+1}:\033[0m\nPrior Probability: {print_pi[j]}\nMean: {print_mean[j][0]}\nContribution of dataset: {print_sp[j]}\nCovariance:\n {print_cov[j]}")
            iter+=1
        return

In [2]:
class gen_samples:
    def __init__(self, pi, mu, C):
        self.X = list()
        self.pi = pi
        self.mu = mu
        self.C = C
        return
    
    def generate_samples(self, n_samples):
        iter = 1
        while(iter <= n_samples):
            z_i = np.argmax(np.random.multinomial(1, self.pi))
            sample = np.random.multivariate_normal(self.mu[z_i], self.C[z_i], 1)
            self.X.append(np.array(sample))
            iter += 1
        return

In [3]:
pi_value = [1/4, 3/4]
pi_value = np.array(pi_value)
mu_value = [[-5,0,0], [10,0,5]]
mu_value = np.array(mu_value)
C_value = [[[1,0,0],[0,1,0],[0,0,1]],[[1,0,0],[0,1,0],[0,0,1]]]
C_value = np.array(C_value)
sample_generator = gen_samples(pi_value, mu_value, C_value)
sample_generator.generate_samples(1000)
X = sample_generator.X
X = np.array(X)

In [4]:
sigma_threshold = (np.max(X)-np.min(X))/10
tau = 0.01
dim = 3
incremental_model = igmm(X,dim,sigma_threshold,tau)
incremental_model.fit()



[1mFor next sample 1:[0m
[1mComponent #1:[0m
Prior Probability: 1.0
Mean: [-5.97885277 -0.53906233  0.43345238]
Contribution of dataset: 1
Covariance:
 [[4.27839758 0.         0.        ]
 [0.         4.27839758 0.        ]
 [0.         0.         4.27839758]]


[1mFor next sample 2:[0m
[1mComponent #1:[0m
Prior Probability: 1.0
Mean: [-5.97718405 -0.53517258  0.42558796]
Contribution of dataset: 1.0045967179512643
Covariance:
 [[4.2760142  0.01719322 0.01719322]
 [0.01719322 4.2760142  0.01719322]
 [0.01719322 0.01719322 4.2760142 ]]


[1mFor next sample 3:[0m
[1mComponent #1:[0m
Prior Probability: 1.0
Mean: [-5.97076433 -0.53837323  0.42885015]
Contribution of dataset: 1.010323239876988
Covariance:
 [[4.2625478  0.02786585 0.02786585]
 [0.02786585 4.2625478  0.02786585]
 [0.02786585 0.02786585 4.2625478 ]]


[1mFor next sample 4:[0m
[1mComponent #1:[0m
Prior Probability: 0.5025675572147341
Mean: [-5.97076433 -0.53837323  0.42885015]
Contribution of dataset: 1.010323

[1mComponent #1:[0m
Prior Probability: 0.4228464825541377
Mean: [-5.91700594 -0.49944014  0.41395081]
Contribution of dataset: 1.0997365993162918
Covariance:
 [[4.17015845 0.27977453 0.27977453]
 [0.27977453 4.17015845 0.27977453]
 [0.27977453 0.27977453 4.17015845]]
[1mComponent #2:[0m
Prior Probability: 0.5771535174458622
Mean: [10.04375469 -0.39527894  5.35530486]
Contribution of dataset: 1.5010574114875952
Covariance:
 [[3.68217112 0.83191533 0.83191533]
 [0.83191533 3.68217112 0.83191533]
 [0.83191533 0.83191533 3.68217112]]


[1mFor next sample 210:[0m
[1mComponent #1:[0m
Prior Probability: 0.4219860043769857
Mean: [-5.91700594 -0.49944014  0.41395081]
Contribution of dataset: 1.0997365993162918
Covariance:
 [[4.17015845 0.27977453 0.27977453]
 [0.27977453 4.17015845 0.27977453]
 [0.27977453 0.27977453 4.17015845]]
[1mComponent #2:[0m
Prior Probability: 0.5780139956230144
Mean: [10.04317488 -0.3957151   5.35700607]
Contribution of dataset: 1.50636073071229
Covariance:
 



[1mFor next sample 462:[0m
[1mComponent #1:[0m
Prior Probability: 0.3476291379702197
Mean: [-5.83965305 -0.4482665   0.38486405]
Contribution of dataset: 1.242275454941933
Covariance:
 [[4.0094972  0.56549647 0.56549647]
 [0.56549647 4.0094972  0.56549647]
 [0.56549647 0.56549647 4.0094972 ]]
[1mComponent #2:[0m
Prior Probability: 0.6523708620297803
Mean: [10.06170681 -0.29555578  5.2384429 ]
Contribution of dataset: 2.3312899319973943
Covariance:
 [[3.17698816 1.34178203 1.34178203]
 [1.34178203 3.17698816 1.34178203]
 [1.34178203 1.34178203 3.17698816]]


[1mFor next sample 463:[0m
[1mComponent #1:[0m
Prior Probability: 0.3473352334091723
Mean: [-5.83965305 -0.4482665   0.38486405]
Contribution of dataset: 1.2422754549419497
Covariance:
 [[4.0094972  0.56549647 0.56549647]
 [0.56549647 4.0094972  0.56549647]
 [0.56549647 0.56549647 4.0094972 ]]
[1mComponent #2:[0m
Prior Probability: 0.6526647665908277
Mean: [10.0591331  -0.29484714  5.23893394]
Contribution of dataset: 



[1mFor next sample 681:[0m
[1mComponent #1:[0m
Prior Probability: 0.29255208868539856
Mean: [-5.79723947 -0.3999669   0.37507369]
Contribution of dataset: 1.348946737994543
Covariance:
 [[3.94518761 0.77352971 0.77352971]
 [0.77352971 3.94518761 0.77352971]
 [0.77352971 0.77352971 3.94518761]]
[1mComponent #2:[0m
Prior Probability: 0.7074479113146015
Mean: [10.0580945  -0.20296237  5.22368465]
Contribution of dataset: 3.2620158569270012
Covariance:
 [[2.91940276 1.6078219  1.6078219 ]
 [1.6078219  2.91940276 1.6078219 ]
 [1.6078219  1.6078219  2.91940276]]


[1mFor next sample 682:[0m
[1mComponent #1:[0m
Prior Probability: 0.292165689736774
Mean: [-5.79723947 -0.3999669   0.37507369]
Contribution of dataset: 1.348946737994543
Covariance:
 [[3.94518761 0.77352971 0.77352971]
 [0.77352971 3.94518761 0.77352971]
 [0.77352971 0.77352971 3.94518761]]
[1mComponent #2:[0m
Prior Probability: 0.707834310263226
Mean: [10.05839389 -0.20018774  5.2227452 ]
Contribution of dataset: 3.

[1mComponent #1:[0m
Prior Probability: 0.20654411545172122
Mean: [-5.78168412 -0.38323973  0.36054106]
Contribution of dataset: 1.400299377026746
Covariance:
 [[3.90912075 0.85377584 0.85377584]
 [0.85377584 3.90912075 0.85377584]
 [0.85377584 0.85377584 3.90912075]]
[1mComponent #2:[0m
Prior Probability: 0.6459559150129954
Mean: [10.03024193 -0.16314646  5.19152411]
Contribution of dataset: 4.379363040196948
Covariance:
 [[2.62110289 1.64415771 1.64415771]
 [1.64415771 2.62110289 1.64415771]
 [1.64415771 1.64415771 2.62110289]]
[1mComponent #3:[0m
Prior Probability: 0.1474999695352833
Mean: [9.37504802 1.83723061 1.74393304]
Contribution of dataset: 1.0
Covariance:
 [[4.27839758 0.         0.        ]
 [0.         4.27839758 0.        ]
 [0.         0.         4.27839758]]


[1mFor next sample 845:[0m
[1mComponent #1:[0m
Prior Probability: 0.20612222323071022
Mean: [-5.78168412 -0.38323973  0.36054106]
Contribution of dataset: 1.400299377026746
Covariance:
 [[3.90912075 0.85



[1mFor next sample 974:[0m
[1mComponent #1:[0m
Prior Probability: 0.16653523369788675
Mean: [-5.76807896 -0.37486864  0.3596218 ]
Contribution of dataset: 1.4285518362931484
Covariance:
 [[3.89331491 0.89839553 0.89839553]
 [0.89839553 3.89331491 0.89839553]
 [0.89839553 0.89839553 3.89331491]]
[1mComponent #2:[0m
Prior Probability: 0.5974386792895694
Mean: [10.04079792 -0.15163864  5.1981128 ]
Contribution of dataset: 5.1248741988134485
Covariance:
 [[2.50023681 1.66540703 1.66540703]
 [1.66540703 2.50023681 1.66540703]
 [1.66540703 1.66540703 2.50023681]]
[1mComponent #3:[0m
Prior Probability: 0.11854889481494826
Mean: [9.38267582 1.81409997 1.78807195]
Contribution of dataset: 1.0169213902545275
Covariance:
 [[4.40996459 0.20275878 0.20275878]
 [0.20275878 4.40996459 0.20275878]
 [0.20275878 0.20275878 4.40996459]]
[1mComponent #4:[0m
Prior Probability: 0.11747719219759556
Mean: [ 8.82628983 -0.10079922  7.99481664]
Contribution of dataset: 1.0077282441077082
Covariance:

In [5]:
import numpy as np
import math
from scipy.stats import chi2

class deletespurious:
    def __init__(self, params, dim, confidence):
        self.params = params
        self.dim = dim
        self.confidence = confidence
    
    def delete_trivial(self):
        continue_update = True
        while(continue_update):
            continue_update = False
            num_components = len(self.params)
            delete_threshold = (math.pi/10)*(math.e ** (-1*(math.pi/10)*num_components))
            for k in range(num_components):
                if(self.params[k][0] <= delete_threshold):
                    del self.params[k]
                    continue_update = True
                    break
        return
    
    def inconfidenceornot(self, i, j):
        mahalanobis_distance = np.matmul(np.matmul(self.params[i][1]-self.params[j][1],np.linalg.inv(self.params[i][2])),(self.params[i][1]-self.params[j][1]).T)
        lower_critical_value = chi2.ppf((1-self.confidence)/2, dim)
        upper_critical_value = chi2.ppf(1-(1-self.confidence)/2, dim)
        if(lower_critical_value <= mahalanobis_distance <= upper_critical_value):
            return 1
        return 0
    
    def createLM(self):
        LM = []
        num_components = len(self.params)
        for i in range(num_components):
            ith_entries = list()
            for j in range(num_components):
                ith_entries.append(self.inconfidenceornot(i,j))
            LM.append(ith_entries)
        LM = np.array(LM)
        return LM
    
    def updateLM(self, LM, sum_LP, index_to_remove):
        updated_LM = np.delete(np.delete(LM, index_to_remove, axis=0), index_to_remove, axis=1)
        updated_sum_LM = np.delete(sum_LM, index_to_remove)
        del self.params[index_to_remove]
        return updated_LM, updated_sum_LM
    
    def deleteLMspurious(self):
        sorted_params = sorted(self.params, key=lambda x: x[0], reverse=True)
        self.params = sorted_params
        self.delete_trivial()
        LM = self.createLM()
        sum_LM = np.sum(LM, axis=1)
        continue_update = True
        while(continue_update):
            continue_update = False
            num_components = len(sum_LM)
            for i in range(num_components):
                for j in range(num_components):
                    if(LM[i][j]==1):
                        if(sum_LM[j]>=2):
                            continue_update = True
                            updated_LM, updated_sumLM = self.updateLM(LM, sum_LM, i)
                            LM = updated_LM
                            sum_LM = updated_sumLM
                        elif(sum_LM[j]==1 and self.params[i][0] <= self.params[j][0]):
                            continue_update = True
                            updated_LM, updated_sumLM = self.updateLM(LM, sum_LM, i)
                            LM = updated_LM
                            sum_LM = updated_sumLM
                        num_components = len(sum_LM)
                        i-=1
                        if(j>=i):
                            j-=1
        total_spsum = sum(x[3] for x in self.params)
        for i in range(len(self.params)):
            self.params[i][0] = self.params[i][3]/total_spsum
        return

In [6]:
M = len(incremental_model.pi)
params = []
for k in range(M):
    current_components = list()
    current_components.append(incremental_model.pi[k])
    current_components.append(incremental_model.mu[k])
    current_components.append(incremental_model.C[k])
    current_components.append(incremental_model.sp[k])
    params.append(current_components)
dim = 3
confidence = 0.95
remove_spurious = deletespurious(params, dim, confidence)
remove_spurious.deleteLMspurious()
for k in range(len(remove_spurious.params)):
    print(f"Component #{k+1}:\nPrior Probability:{remove_spurious.params[k][0]}\nMean:{remove_spurious.params[k][1]}\nContribution:{remove_spurious.params[k][3]}\nCovariance\n{remove_spurious.params[k][2]}\n")

Component #1:
Prior Probability:0.606187696877148
Mean:[[10.04227552 -0.1447773   5.19374115]]
Contribution:5.335795421614907
Covariance
[[2.46988176 1.66805238 1.66805238]
 [1.66805238 2.46988176 1.66805238]
 [1.66805238 1.66805238 2.46988176]]

Component #2:
Prior Probability:0.16293233630780393
Mean:[[-5.76465692 -0.37407744  0.35770629]]
Contribution:1.4341657189396744
Covariance
[[3.88888383 0.90568773 0.90568773]
 [0.90568773 3.88888383 0.90568773]
 [0.90568773 0.90568773 3.88888383]]

Component #3:
Prior Probability:0.11587008517458824
Mean:[[9.38407583 1.80921262 1.79620706]]
Contribution:1.019913589737529
Covariance
[[4.43268692 0.23782412 0.23782412]
 [0.23782412 4.43268692 0.23782412]
 [0.23782412 0.23782412 4.43268692]]

Component #4:
Prior Probability:0.11500988164045979
Mean:[[ 8.83229983 -0.10075449  7.98370327]]
Contribution:1.012341891890963
Covariance
[[4.33767789 0.11144008 0.11144008]
 [0.11144008 4.33767789 0.11144008]
 [0.11144008 0.11144008 4.33767789]]

