In [5]:
import numpy as np
from scipy.stats import multivariate_normal
from audio_proc_utils import *

In [3]:
A = np.array([[0,1,2,],[1,2,3]])
A.mean(axis=0)

array([0.5, 1.5, 2.5])

In [6]:
Fs= 16000
n_mfcc = 20

# Read audio
x_music = readDir('music_speech/music_wav', Fs)    #change it as per your directory
x_speech = readDir('music_speech/speech_wav', Fs)  #change it as per your directory
x_silence = readDir('music_speech/silence_wav', Fs)
X = np.concatenate((x_music, x_speech, x_silence))

# Create labels
y_music = np.array([[1,0,0]]*len(x_music))
y_speech = np.array([[0,1,0]]*len(x_speech))
y_silence = np.array([[0,0,1]]*len(x_silence))
Y = np.concatenate((y_music, y_speech, y_silence))

X_train, y_train, X_test, y_test = splitData(X, Y)

In [8]:
x_train = audio2mfcc(X_train, n_mfcc, Fs)    # x_train: (Nclips, N_mfcc, N_frames)

In [10]:
X = x_train[0, :,:]

In [14]:
X =  np.transpose(X)
X.shape

(313, 20)

In [17]:
random_row = np.random.randint(low=0, high=313, size=5)
mu = [  X[row_index,:] for row_index in random_row ]
len(mu)
sigma = [ np.cov(X.T) for _ in range(5) ]

sigma[0].shape

(20, 20)

In [45]:
class GMM:
    def __init__(self, k, max_iter=10):
        '''
            k : Number of latent variables
        '''
        self.k = k
        self.max_iter = int(max_iter)
    
    def initialize(self, X):
        '''
            n : number of training examples
            m : number of features in each example
        '''
        self.shape = X.shape
        self.n, self.m = self.shape
    
        # Pi contains the prior probability of latent variables
        self.Pi = np.full(shape=self.k, fill_value=1/self.k)
        
        # P(x_i, k)/ P(x_i)
        self.weights = np.full( shape=(self.n, self.k), fill_value=1/self.k) # n x k
        
        # Each mu is randomly selected as one of the data points
        random_row = np.random.randint(low=0, high=self.n, size=self.k)
        self.mu = [  X[row_index,:] for row_index in random_row ]
        
        # Each class is given the same covariance for initialization
        self.sigma = [ np.cov(X.T) for _ in range(self.k) ]
    
    def E_step(self, X):
        # E-Step: update weights and phi holding mu and sigma constant
        self.weights = self.predict_proba(X)
        self.Pi = self.weights.mean(axis=0) # collapses the rows to give vector of size 1 x k
        
        # Pi_k = N_k^{soft} / N
        
    def M_step(self, X):
        # Now the weights are held constant and mu and sigma is updated
        for i in range(self.k):
            weight = self.weights[:, i]
            total_weight = weight.sum()
            self.mu = (X * weight).sum(axis=0) / total_weight
            self.sigma[i] = np.cov(X.T, aweights=(weight/total_weight).flatten(), bias=True)
    
    def fit(self, X):
        self.initialize(X)
        
        for iteration in range(self.max_iter):
            self.E_step(X)
            self.M_step(X)
            
    def predict_proba(self, X):
        # likelihood is of shape (n x k). Relate it to r_nk
        likelihood = np.zeros( (self.n, self.k) )
        for i in range(self.k):
            # row of X is considered as 1 training example
            distribution = multivariate_normal(mean=self.mu[i], cov=self.sigma[i])
            likelihood[:,i] = distribution.pdf(X)

        # makes use of broadcasting - so the i-th column is multiplied by P(i) only
        numerator = likelihood * self.Pi # (n x k)
        denominator = numerator.sum(axis=1)[:, np.newaxis] # (n x 1)
        weights = numerator / denominator # (n x k)
        return weights

In [46]:
gmm = GMM(5)

In [47]:
gmm.initialize(X)

In [48]:
w = gmm.predict_proba(X)

(313, 1)
(313, 5)


In [37]:
A = np.array([1,2,3,4,5])
A*gmm.Pi

array([0.2, 0.4, 0.6, 0.8, 1. ])

In [44]:
gmm.weights.shape

(313, 20)