In [2]:
from sklearn.mixture import GaussianMixture as GMM
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

In [3]:
with open("phoneme_name.txt", "r") as file:
    phoneme_names = file.readlines()
phoneme_names = [ele.strip().split(",") for ele in phoneme_names]
phoneme_names = phoneme_names[0][:-1]
print(phoneme_names)

['sil', 'sh', 'ih', 'hh', 'eh', 'jh', 'd', 'ah', 'k', 's', 'uw', '', 'n', 'g', 'r', 'w', 'aa', 'dx', 'er', 'l', 'y', 'uh', 'ae', 'm', 'oy', 'dh', 'iy', 'v', 'f', 't', 'ow', 'ch', 'b', 'ng', 'ay', 'th', 'ey', 'p', 'aw', 'z']


In [4]:
#creating a dictionary
int_to_label=dict((i,c) for i,c in enumerate(phoneme_names))
label_to_int=dict((c,i) for i,c in enumerate(phoneme_names))
print(label_to_int)
print(int_to_label)


{'sil': 0, 'sh': 1, 'ih': 2, 'hh': 3, 'eh': 4, 'jh': 5, 'd': 6, 'ah': 7, 'k': 8, 's': 9, 'uw': 10, '': 11, 'n': 12, 'g': 13, 'r': 14, 'w': 15, 'aa': 16, 'dx': 17, 'er': 18, 'l': 19, 'y': 20, 'uh': 21, 'ae': 22, 'm': 23, 'oy': 24, 'dh': 25, 'iy': 26, 'v': 27, 'f': 28, 't': 29, 'ow': 30, 'ch': 31, 'b': 32, 'ng': 33, 'ay': 34, 'th': 35, 'ey': 36, 'p': 37, 'aw': 38, 'z': 39}
{0: 'sil', 1: 'sh', 2: 'ih', 3: 'hh', 4: 'eh', 5: 'jh', 6: 'd', 7: 'ah', 8: 'k', 9: 's', 10: 'uw', 11: '', 12: 'n', 13: 'g', 14: 'r', 15: 'w', 16: 'aa', 17: 'dx', 18: 'er', 19: 'l', 20: 'y', 21: 'uh', 22: 'ae', 23: 'm', 24: 'oy', 25: 'dh', 26: 'iy', 27: 'v', 28: 'f', 29: 't', 30: 'ow', 31: 'ch', 32: 'b', 33: 'ng', 34: 'ay', 35: 'th', 36: 'ey', 37: 'p', 38: 'aw', 39: 'z'}


In [5]:
#loading test dataset
timit_test_df = pd.read_hdf("./test_features/mfcc/timit.hdf")
test_features = np.array(timit_test_df["features"].tolist())
test_labels = np.array(timit_test_df["labels"].tolist())
test_labels = test_labels.reshape(test_labels.size, 1)
total_test_sample = test_features.shape[0]

## Testing test data set with different number of mixtures only for Case (a) for MFCC (ii) without energy coefficients - (2, 4, 8, 16, 32, 64,128,256)

### mixture component : 2

In [10]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//002_mfcc//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)

Total samples matched:  50613
Accuracy is:  11.205995660452553


### mixture component : 4

In [11]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//004//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
#testing dataset        
total_test_sample = test_features.shape[0]
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)


Total samples matched:  59982
Accuracy is:  13.280343621308063


### mixture component : 8

In [12]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//008//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)

Total samples matched:  61081
Accuracy is:  13.523668246025771


### mixture component : 16

In [13]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//016//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)

Total samples matched:  62803
Accuracy is:  13.904928486029315


### mixture component : 32

In [14]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//032//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)

Total samples matched:  61847
Accuracy is:  13.693264845237568


### mixture component : 64

In [15]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//064//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)

Total samples matched:  59181
Accuracy is:  13.102997830226276


### mixture component : 128

In [7]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//128//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)

Total samples matched:  63205
Accuracy is:  13.993933489793205


### mixture component :  256

In [None]:
# loading model from pickle file
gmm = []
for i in range(len(phoneme_names)):
    path = "models//256//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
matched=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    curr_label = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    ans_label=likelihood.index(max(likelihood))
    if ans_label==curr_label:
        matched=matched+1
print("Total samples matched: ",matched)
accuracy=(matched/total_test_sample)*100
print("Accuracy is: ",accuracy)