# Dataset_1b GMM Bayes Classifier

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns  #A statistical plotting library
from sklearn.cluster import KMeans
from kneed import KneeLocator  #A function that helps in optimization of number of clusters from an error curve
from scipy.stats import multivariate_normal as mvn
from numba import njit

In [2]:
header_names = ['x1', 'x2' , 'Class']
D = pd.read_csv('Data/train.csv', header = None, names = header_names)
D.head()

Unnamed: 0,x1,x2,Class
0,-0.08659,-0.818173,0.0
1,0.410383,-0.38866,0.0
2,0.4024,-0.332094,0.0
3,0.356568,-0.530018,0.0
4,-0.387783,-0.748299,0.0


In [3]:
L_df = D.loc[:,['x1','x2']]
Unlab_Data = L_df.to_numpy()

lab_df = D.loc[:,'Class']
labels = lab_df.to_numpy()

#Training Dataset for Class 0
L0 = (D['Class'] == 0.0)
L0_df = D.loc[L0 , ['x1', 'x2'] ]  
Class0 = L0_df.to_numpy()

#Training Dataset for Class 1
L1 = (D['Class'] == 1.0)
L1_df = D.loc[L1 , ['x1', 'x2']]
Class1 = L1_df.to_numpy()

#Training Dataset for Class 2
L2 = (D['Class'] == 2.0)
L2_df = D.loc[L2, ['x1', 'x2']]
Class2 = L2_df.to_numpy()
labels.shape

(600,)

In [4]:
#KMeans implementation for initialization and optimization of the number of clusters
#Number of clusters for each class eqauls the number of gaussian componenets to be fitted for that class
def K_Clustering(Class,M):
    #Dictionary of the arguments for scikit.KMeans
    KMeans_args = {
        "init" :"random",
        "n_init" : 10,
        "max_iter" : 300,
        "random_state" : 0,
        }
    #Estimation of the optimum number of clusters using elbow method
    std_error = []
    for cluster in range(1,11):
        kmeans = KMeans(n_clusters = cluster , **KMeans_args)
        kmeans.fit(Class)
        std_error.append(kmeans.inertia_)
    if M==0:
        #detecting the elbow point of the curve of 's_err vs K' using kneed, which gives the optimum number of clusters
        curve = KneeLocator(range(1,11), std_error, curve="convex", direction = "decreasing")
        K_opt = curve.elbow
    else:
        #Using Manually entered value for K_opt
        K_opt = M
    #clustering the class in to K_opt clusters 
    kmeans =  KMeans(n_clusters = K_opt , **KMeans_args)
    kmeans.fit(Class)
    labels = kmeans.labels_
    centers = kmeans.cluster_centers_
    return K_opt,labels,centers



In [5]:

        
#initialization of the parameters using K-Clusters

def Parameters_old(Class,M):
    #Will return a mean(mu)-(K,d) array;
    N,d = np.shape(Class)
    K,lab,mu = K_Clustering(Class,M)
    #gamma contains initial responsibilty values for an example w.r.t each clusters as columns
    gamma = np.array([ [0]*K for i in range(N)])
    for example in range(N):
        for cluster in range(K):
            if lab[example] == cluster:
                gamma[example][cluster]= 1
    return K,mu,gamma


In [6]:

#Defining the Gaussian Mixture Model as a class

class Gaussian_Mixture_Model:
    #Class - Examples of the class to which the Gaussian Componenets need to be fitted
    #Class - N x d matrix, where N is the number of examples and d is the number of features for each example
    #K - Number of Gaussian Components that needs to be fitted
    
    def __init__(self,Class,K,MU,GAMMA,f): 
        self.Class = Class
        self.K = K                  #Attribute for Number of clusters
        self.GAMMA = GAMMA          #Attribute for NxK array of posterior responsibity term 
        self.MU = MU                #Attribute for the mean values. An Kxd array.
        self.SIGMA = None           #Attribute for (K,d,d) array of covariances
        self.W = None               #Attribute for prior probabilty, an array of length K
        #self.max_iter = max_iter   #Attribute for the number of iterations
        self.N = len(self.Class)    #Attribute for number of examples available
        self.d = len(self.Class[0]) #Attribute for the number of features in each example
        self.f = f                  #Attribute that acts as switch between diagonal and full covariance matrix
        self.mean_shift = np.reshape(self.Class, (self.N, 1, self.d) ) - np.reshape(self.MU, (1, self.K, self.d) )
    
    def Prior_Probability(self):
        #A function to estimate the (K,) array of prior prob.
        self.W = np.einsum("ij -> j",self.GAMMA) / self.N  
        
    def Mean(self):
      # A function to calculate mean
      self.MU =  ((self.GAMMA).T) @ (self.Class) / np.reshape((self.W*self.N), (self.K, 1)) 
  
    def Covariance_Matrix_Array(self):
        # A function to calculate covariances of the features of the examples
        
        Nk = np.einsum("ij -> j",self.GAMMA)
        self.mean_shift = np.reshape(self.Class, (self.N, 1, self.d) ) - np.reshape(self.MU, (1, self.K, self.d) )
        sigma = np.einsum("nki,nkj->kij", np.einsum("nk,nki->nki", self.GAMMA, self.mean_shift), 
                                   self.mean_shift) / np.reshape(Nk, (self.K, 1, 1))
            
        if self.f==1: #Case where we use full diagonal covariance matrix
            self.SIGMA = sigma
        
        if self.f==0: #Case where we use a diagonal covariance matrix
            I = np.identity(self.d,dtype=int) #An identity matrix of the size equal to number of feature
            self.SIGMA = np.einsum("kij,ij -> kij",sigma,I)
  

    def Gaussian_Prob(self):
        #This function accounts for our assumption that the conditional distribution of an example is a Gaussian.
        
        self.Covariance_Matrix_Array()           #SIGMA gets updated to the full covariance matrix
        SIGMA_inv = np.linalg.inv(self.SIGMA)     #Inverse of the covariance matrix
        
        norm = np.sqrt(((2*np.pi)**self.d)*np.linalg.det(self.SIGMA))  #Normalisation term of the Gaussian dist.
        #Exponential term of the Gaussian
        expo = np.exp(-0.5*(np.einsum("nkj,nkj->nk", np.einsum("nki,kij->nkj", self.mean_shift, SIGMA_inv),
                                     self.mean_shift)))  
        
        #Prob_mat is an (NxK)-array that contains Gaussian Prob. of the various examples to belong to 
        #respective clusters 
        Prob_mat =  expo / norm
        return Prob_mat
    
    
    def Expectation_Step(self):
        #In this step we update the values of the responsibilty term
        
        N = self.Gaussian_Prob()
        self.W =  np.einsum("ij -> j",self.GAMMA) / self.N  #Prior probability array
        Num =  N * self.W
        Den = np.reshape(np.sum(Num, axis=1), (self.N, 1) )
        self.GAMMA = Num/Den
    
      
    def Maximization_Step(self): 
        #In this step we updtae the various parameters
        
        #Updation of GAMMA
        self.Expectation_Step()
        
        #Updation of W
        self.Prior_Probability()
        
        #Updation of Mean MU
        self.Mean()
        
        #Updation of Covariance Matrix SIGMA
        self.Covariance_Matrix_Array()
      
   
    def Log_Likelihood(self):
      
      llhd = np.sum(np.log(self.Gaussian_Prob() @ self.W))
    
      return llhd
      
    
    def fit(self,max_iter,threshold):
        
        log_likelihoods = []  #Attribute for 1D array that contains Log_Likelihood values. Size depends on the number iterations
                                #required to converge
        
        
        for i in range(max_iter):
            self.Expectation_Step()   #Updates Gamma
            self.Maximization_Step()  #Updates all the other parameters
            log_likelihoods.append(self.Log_Likelihood())
            #An if conditional for the requirement of convergence
            if (i!=0) & ((log_likelihoods[i] - log_likelihoods[i-1]) < threshold):
                    break
                    
        print("Number of iterations to convegre:" ,i)
        
        
#         #Plotting log_likelihood vs iterations, comment out if not needed
#         sns.set_style("darkgrid")         #setting the plot style
#         fig = plt.figure(figsize=(10,10))
#         ax0 = fig.add_subplot(111) 
#         ax0.set_title('Log-Likelihood')
#         ax0.plot(range(i+1),log_likelihoods)  
        
#         #Plot of the fitted Gaussians for each class
#         x,y = np.meshgrid(np.sort(self.Class[:,0]),np.sort(self.Class[:,1])) # the meshgrid for the plot
#         XY = np.array([x.flatten(),y.flatten()]).T 
#         sns.set_style("darkgrid")         #setting the plot style
#         fig1 = plt.figure(figsize=(10,10))
#         ax1 = fig1.add_subplot(111)
#         ax1.set_title('Fitted Gaussians')
#         ax1.scatter(self.Class[:,0],self.Class[:,1],c= 'r')
#         for mu,sigma in zip(self.MU,self.SIGMA):
#             multi_normal = mvn(mean=mu,cov=sigma)
#             ax1.contour(np.sort(self.Class[:,0]),np.sort(self.Class[:,1]),multi_normal.pdf(XY).reshape(len(self.Class),len(self.Class)),colors='black',alpha=0.3)
            
    
    def Class_Prob(self,Y):
            #call this for prediction of 1b
            #A function that returns Prob. for a unlabelled vector Y to belong to a class
            #Pred_Prob = []
            Multi_Gauss = []
            for mu,sigma in zip(self.MU,self.SIGMA):
                Multi_Gauss.append(mvn(mean=mu,cov=sigma).pdf(Y)) #An array of Multi-Variate Gaussian Prob of various clusters                                                                         
            Wt_Gauss = np.einsum("i,i->i",self.W,Multi_Gauss) #An array of weighted probabilities
            Pred_Prob =np.sum(Wt_Gauss)  
            return Pred_Prob
        
    def Class_Prob_set_features(self,L):
        #call this for prediction of 2b
        #A function that returns Prob. for a set of feature vectors. For example, the set of all local feature 
        #vectors of an image. L will be an 2D array-(nxd).
        n = L.shape[0]   #Number of local feature row vectors
        d = L.shape[1]   #dimension  of feature space
        Prob_nfeature_list = []
        for n_feature in range(n):
            Prob_nfeature_list.append(self.Class_Prob(L[n_feature,:d]))
        
        Prob_nfeature_arr = np.array(Prob_nfeature_list)
        Pred_Prob = np.product(Prob_nfeature_arr)
        return Pred_Prob
            
        
        
            
        
        

In [7]:
#Fitting gaussian mixtures for Class0 
K,MU,GAMMA = Parameters_old(Class0,0)  #0 as the second argument chooses by default K_opt estimated using elbow method. 
                                       #If not pass the number of clusters needed

gmm0 = Gaussian_Mixture_Model(Class0,K,MU,GAMMA,1) #0 as the last argument -> diagonal covariance matrix. 1-> full covariance matix.   
#gmm0.fit(max_iter=100,threshold = 1e-10)
%time _ = gmm0.fit(max_iter=100,threshold = 1e-10)
Y = [0.410383 ,-0.388660]
#gmm0.Class_Prob(Y)

Number of iterations to convegre: 26
Wall time: 185 ms


In [8]:
#Fitting gaussian mixtures for Class1 
K,MU,GAMMA = Parameters_old(Class1,0)
gmm1 = Gaussian_Mixture_Model(Class1,K,MU,GAMMA,1)
gmm1.fit(max_iter=100,threshold = 1e-10)
#gmm1.Class_Prob(Y)

Number of iterations to convegre: 23


In [9]:
#Fitting gaussian mixtures for Class2 
K,MU,GAMMA = Parameters_old(Class2,0)
gmm2 = Gaussian_Mixture_Model(Class2,K,MU,GAMMA,1)
gmm2.fit(max_iter=100,threshold = 1e-10)

#gmm2.Class_Prob(Y)

Number of iterations to convegre: 67


In [10]:
#We have fitted 3 gaussians to each class and now we would like to make prediction for unlabelled points
def Class_Prediction(Y):
    #gmm0,gmm1,gmm2 are the instances of class 0, class 1 and class 2 respectively
    n = len(Y) #number of unlabelled examples
    prediction = []
    for example in range(n):
        Prob=[]
        Prob = [gmm0.Class_Prob(Y[example,:]), gmm1.Class_Prob(Y[example,:]),gmm2.Class_Prob(Y[example,:]) ]
        prediction.append(np.argmax(Prob))
    #print("Labels for the given dataset:", prediction)
    return prediction
# Pred = Class_Prediction(Unlab_Data)
# lst3 = [value for value in Pred if value in labels]
# lst3
# arr = os.listdir('.\Data\Dataset_2B\coast\train')
# print(arr)


# Real World Dataset_2b

In [11]:
import os
from sklearn.base import TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer

## Loading the dataset as an array

In [25]:
def load_file_as_arr(Dir):
    #Returns a 3D array DAT
    dat = [] #the list that needs to be converted to 3D array DAT
    for image in os.listdir(Dir):
        f = open(Dir +'/'+image)
        single_image_dat = []
        for line in f:
            single_image_dat.append([float(x) for x in line.strip().split(' ')])
        dat.append(single_image_dat)
    DAT = np.array(dat)
    return DAT,len(DAT)     #len(DAT) is needed for prior prob. calculation as the number of 
                            #examples are different for each class


#Class coast training images        
Dir_ctr = 'Data/Dataset_2B/coast/train'             
coast,n_c = load_file_as_arr(Dir_ctr) 



#Class forest training images
Dir_ftr = 'Data/Dataset_2B/forest/train' 
forest,n_f = load_file_as_arr(Dir_ftr)


#Class mountain training images
Dir_mtr = 'Data/Dataset_2B/mountain/train'  
mountain,n_m = load_file_as_arr(Dir_mtr)


#Class opencountry training images
Dir_otr = 'Data/Dataset_2B/opencountry/train'  
opencountry, n_o = load_file_as_arr(Dir_otr)

#Class street training images
Dir_str = 'Data/Dataset_2B/street/train'  
street, n_s = load_file_as_arr(Dir_str)

Dir_coast_dev = 'Data/Dataset_2B/coast/dev' 


#Prior prob. for respective classes as an 1x5 array
initial_weight = np.array([n_c, n_f, n_m, n_o, n_s])
prior_prob = initial_weight/np.sum(initial_weight)

prior_prob

array([0.20373377, 0.18587662, 0.21185065, 0.23295455, 0.16558442])

## A Class for Scaling of Data

In [13]:
#Defining a class for proper scaling of data
#Based on a stackoverflow answer
from sklearn.base import TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer

class Preprocess_Data(TransformerMixin):  #Dir is the location of all the data files from images of the class
    #An advantage of using this class is each feature will be scaled differently using QuantileTransformer.
    
    
    def __init__(self,n_quant, **kwargs):
        self._scaler = QuantileTransformer(n_quantiles = n_quant,copy=True, **kwargs)
        self._orig_shape = None

    def fit(self, X, **kwargs):
        X = np.array(X)
        # Save the original shape to reshape the flattened X later
        # back to its original shape
        if len(X.shape) > 1:
            self._orig_shape = X.shape[1:]
        X = self._flatten(X)
        self._scaler.fit(X, **kwargs)
        return self

    def transform(self, X, **kwargs):
        X = np.array(X)
        X = self._flatten(X)
        X = self._scaler.transform(X, **kwargs)
        X = self._reshape(X)
        return X

    def _flatten(self, X):
        # Reshape X to <= 2 dimensions
        if len(X.shape) > 2:
            X = X.reshape(X.shape[0],-1)  #Converts 3d array in to two dimensions
        return X
    
    
    def _reshape(self, X):
        # Reshape X back to it's original shape
        if len(X.shape) >= 2:
            X = X.reshape(-1, *self._orig_shape)
        return X


In [14]:
#Coast images preprocessing

coast_scaler = Preprocess_Data(len(coast))
coast_scaler.fit(coast)  #flattens and scales
coast_processed = coast_scaler.transform(coast) #converts the flattened array back to original shape
#coast_processed[0,:,:].T

#forest images preprocesing
forest_scaler = Preprocess_Data(len(forest))
forest_scaler.fit(forest)
forest_processed = forest_scaler.transform(forest)

#mountain images preprocesing
mountain_scaler = Preprocess_Data(len(mountain))
mountain_scaler.fit(mountain)
mountain_processed = mountain_scaler.transform(mountain)



#opencountry images preprocesing
opencountry_scaler = Preprocess_Data(len(opencountry))
opencountry_scaler.fit(opencountry)
opencountry_processed = opencountry_scaler.transform(opencountry)


#street images preprocessing
street_scaler = Preprocess_Data(len(street))
street_scaler.fit(street)
street_processed = street_scaler.transform(street)




#### Principal Componenet Analysis from scratch

In [15]:
#PCA for each image to determine the optimum number of features

class Transform_Features:
    #Will tranform the 23 dimensional feature vector to a lower dimension with more than 95% variance explained
    
    def __init__(self,Class):
        
        self.Class = Class           #Class should be an Nxnxd array.
        self.N = Class.shape[0]      #Attribute for the number of training images of the given class.
        self.n = Class.shape[1]      #Attribute for the total number of feature row vectors of the image
                                     #or the number of boxes the image is divided in to.        
        self.d = Class.shape[2]      #Attribute for dimension of the feature row vector for each thus divided box.
        self.var_explained = None    #Attribute for an percentage variance explained by different dimensions. 
                                     #Dim-d explains 100 percent.
        self.cum_var_explained = None
        
        self.dim_opt = None
        self.eig_val_arr = None
        self.eig_vec_arr = None
        #self.T_Class_image = None 
        
            
        
    def Eigen_Decomposition(self,X):
        #Returns Eigen Values and Eigen Vectors.
        
        sigma = np.cov(X.T)        #X should be an nxd array, then sigma will be an dxd covriance matrix.
        
        #Eigendecomposition exists for all positive, real valued symmetric matrices and sigma is one such matrix.
        self.eig_val_arr,self.eig_vec_arr = np.linalg.eig(sigma)
        #return eig_val_arr,eig_vec_arr
    
    
    def Reduce_Dimension_Image(self,X):
        #Determines the optimum number of dimensions of the feature space for an input nxd array(i.e) a single image.
        self.Eigen_Decomposition(X) #Will update eig_val_arr,eig_vec_arr
        
        norm = 100.0/np.einsum("i-> ",self.eig_val_arr) 
        self.var_explained = (self.eig_val_arr*norm)
        
        self.cum_var_explained = np.cumsum(self.var_explained)
        
        #Optimum number of dimensions using elbow method
        curve = KneeLocator(range(1,(len(self.cum_var_explained))+1), self.cum_var_explained, curve="concave", direction = "increasing")
        dim_opt_img = curve.elbow
        
        return dim_opt_img
    
    
    def Reduced_Dimension(self):
        #An itheration of Reduced_Dimension_Image over all images of the class and choosing the final reduced dimension.
        dim_opt_list = []
        
        for image in range(self.N):
            dim_opt_list.append(self.Reduce_Dimension_Image(self.Class[image,:,:]))
        
        self.dim_opt = max(dim_opt_list)
        
        
        
    def Transform_Class(self,dim):
        #A function that produces the transformation matrix that reduces the dimension of the feature vectors to dim.
        
        T_img_list = [] 
        
        for image in range(self.N):
            self.Eigen_Decomposition(self.Class[image,:,:])              #Will update eig_val_arr and eig_vec_arr.
            T_mat_image = (self.eig_vec_arr.T[:][:dim]).T  #Transformation matrix for the image
#             T_img_list.append(np.einsum("ik,kj -> ij",self.Class[image,:,:],T_mat_image))  #Transform the image in to lower dimension.
            T_img_list.append(self.Class[image,:,:].dot(T_mat_image))  #using np.dot is faster.
    
        Transformed_Class = np.array(T_img_list) #Nxnxdim_opt array of all transformed images
        return Transformed_Class
            
            


In [16]:
#Estimation of optimum number of dimensions for feature vectors


coast_inst = Transform_Features(coast_processed)             #Instance for class coast,pass in the processed data.
coast_inst.Reduced_Dimension()                               #Updates the attribute for the opt number of clusters for forest class.


forest_inst = Transform_Features(forest_processed)           #instance for class forest,pass in the preprocessed data.
forest_inst.Reduced_Dimension()                              #Updates the attribute for opt number of clusters for forest class.


mountain_inst = Transform_Features(mountain_processed)       #instance for class mountain,pass in the preprocessed data.
mountain_inst.Reduced_Dimension()                            #Updates the attribute for opt number of clusters for mountain class.


opencountry_inst = Transform_Features(opencountry_processed) #instance for class opencountry,pass in the preprocessed data.
opencountry_inst.Reduced_Dimension()                         #Updates the attribute for opt number of clusters for opencountry class.


street_inst = Transform_Features(street_processed)           #instance for class street,pass in the preprocessed data.
street_inst.Reduced_Dimension()                              #Updates the attribute for opt number of clusters for street class.





#The final chosen dimension of the transformed feature vectors

red_dim= max(coast_inst.dim_opt, forest_inst.dim_opt, mountain_inst.dim_opt, opencountry_inst.dim_opt,
             street_inst.dim_opt)  
red_dim = 23

In [17]:
#Preparation of training data for each class

#Class-coast:
coast_pca = coast_inst.Transform_Class(red_dim)                          #coast_pca is the images in the lower feature dimension an 3D array
coast_tr_data = coast_pca.reshape((coast_inst.N*coast_inst.n),red_dim)   #2D array and is the training data to which gaussians will be fitted for class coast


#Class-forest:
forest_pca = forest_inst.Transform_Class(red_dim)
forest_tr_data = forest_pca.reshape((forest_inst.N*forest_inst.n),red_dim)


#Class-mountain:
mountain_pca = mountain_inst.Transform_Class(red_dim)
mountain_tr_data = mountain_pca.reshape((mountain_inst.N*mountain_inst.n),red_dim)


#Class-opencountry:
opencountry_pca = opencountry_inst.Transform_Class(red_dim)
opencountry_tr_data = opencountry_pca.reshape((opencountry_inst.N*opencountry_inst.n),red_dim)


#Class-street:
street_pca = street_inst.Transform_Class(red_dim)
street_tr_data = street_pca.reshape((street_inst.N*street_inst.n),red_dim)




In [18]:
#Fitting GMM for class coast
K,MU,GAMMA = Parameters_old(coast_tr_data,10)  #0 as the second argument chooses by default K_opt estimated using elbow method. 
                                             #If not pass the number of clusters needed

gmm_coast = Gaussian_Mixture_Model(coast_tr_data,K,MU,GAMMA,1) #0 as the last argument -> diagonal covariance matrix. 1-> full covariance matix.   
gmm_coast.fit(max_iter=1000,threshold = 1e-10)
#gmm_coast.Class_Prob_set_features(coast_pca[0,:,:])





Number of iterations to convegre: 25


In [19]:
#Fitting GMM for class forest
K,MU,GAMMA = Parameters_old(forest_tr_data,10)  #0 as the second argument chooses by default K_opt estimated using elbow method. 
                                             #If not pass the number of clusters needed

gmm_forest = Gaussian_Mixture_Model(forest_tr_data,K,MU,GAMMA,1) #0 as the last argument -> diagonal covariance matrix. 1-> full covariance matix.   
gmm_forest.fit(max_iter=1000,threshold = 1e-10)
#gmm_forest.Class_Prob_set_features(coast_pca[0,:,:])


Number of iterations to convegre: 14


In [20]:
#Fitting GMM for class mountain:
K,MU,GAMMA = Parameters_old(mountain_tr_data,10)  #0 as the second argument chooses by default K_opt estimated using elbow method. 
                                             #If not pass the number of clusters needed

gmm_mountain = Gaussian_Mixture_Model(mountain_tr_data,K,MU,GAMMA,1) #0 as the last argument -> diagonal covariance matrix. 1-> full covariance matix.   
gmm_mountain.fit(max_iter=1000,threshold = 1e-10)
#gmm_mountain.Class_Prob_set_features(coast_pca[0,:,:]) #was meant for testing, don't execute in the final run.



Number of iterations to convegre: 25


In [21]:
#Fitting GMM for class opencountry:
K,MU,GAMMA = Parameters_old(opencountry_tr_data,10)  #0 as the second argument chooses by default K_opt estimated using elbow method. 
                                             #If not pass the number of clusters needed

gmm_opencountry = Gaussian_Mixture_Model(opencountry_tr_data,K,MU,GAMMA,1) #0 as the last argument -> diagonal covariance matrix. 1-> full covariance matix.   
gmm_opencountry.fit(max_iter=1000,threshold = 1e-10)
#gmm_opencountry.Class_Prob_set_features(coast_pca[0,:,:])



Number of iterations to convegre: 17


In [22]:
#Fitting GMM for class street:
K,MU,GAMMA = Parameters_old(street_tr_data,10)  #0 as the second argument chooses by default K_opt estimated using elbow method. 
                                             #If not pass the number of clusters needed

gmm_street = Gaussian_Mixture_Model(street_tr_data,K,MU,GAMMA,1) #0 as the last argument -> diagonal covariance matrix. 1-> full covariance matix.   
gmm_street.fit(max_iter=1000,threshold = 1e-10)
#gmm_street.Class_Prob_set_features(coast_pca[0,:,:])



Number of iterations to convegre: 84


In [23]:
#We have fitted 3 gaussians to each class and now we would like to make prediction for unlabelled points
def Class_Prediction_set_features(Y,W):
    #Y is a 3D array and W is the prior probability estimated in the beginning
    
    n = len(Y)            #number of unlabelled images
    prediction = []
    for image in range(n):
        Prob=[]
        Prob =np.array([gmm_coast.Class_Prob_set_features(Y[image,:,:]),
                        gmm_forest.Class_Prob_set_features(Y[image,:,:]),
                        gmm_mountain.Class_Prob_set_features(Y[image,:,:]),
                        gmm_opencountry.Class_Prob_set_features(Y[image,:,:]),
                        gmm_street.Class_Prob_set_features(Y[image,:,:])])
        
        #Prob = Prob.dot(W)  #Uncomment If we want to multiply prior prob.
        
        prediction.append(np.argmax(Prob))
    #print("Labels for the given dataset:", prediction)
    return prediction

In [26]:
#for prediction pass in data that is processed and feature vectors are reduced to a (1xred_dim)- dimension. 
Class_Prediction_set_features(forest_pca[:,:,:],prior_prob)

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 4,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]