### TESTING OF GMM MODELS FOR 40 CLASSES OF PHONEMES

In [1]:
from sklearn.mixture import GaussianMixture as GMM
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

In [2]:
#reading different phoneme names from phoneme_text file
with open("phoneme_name.txt", "r") as file:
    phoneme_names = file.readlines()
phoneme_names = [ele.strip().split(",") for ele in phoneme_names]
phoneme_names = phoneme_names[0][:-1]
print(phoneme_names)

['sil', 'sh', 'ih', 'hh', 'eh', 'jh', 'd', 'ah', 'k', 's', 'uw', '', 'n', 'g', 'r', 'w', 'aa', 'dx', 'er', 'l', 'y', 'uh', 'ae', 'm', 'oy', 'dh', 'iy', 'v', 'f', 't', 'ow', 'ch', 'b', 'ng', 'ay', 'th', 'ey', 'p', 'aw', 'z']


In [5]:
#creating a dictionary which contains phoneme names and corresponding number
int_to_label=dict((i,c) for i,c in enumerate(phoneme_names))
label_to_int=dict((c,i) for i,c in enumerate(phoneme_names))
print(label_to_int)
print(int_to_label)


{'k': 8, '': 11, 'g': 13, 'z': 39, 'ay': 34, 'eh': 4, 'ah': 7, 'n': 12, 'jh': 5, 'th': 35, 'aw': 38, 'uh': 21, 'iy': 26, 's': 9, 'y': 20, 'aa': 16, 'ih': 2, 'ey': 36, 'p': 37, 'r': 14, 'dh': 25, 'ch': 31, 'oy': 24, 'd': 6, 'er': 18, 'w': 15, 'f': 28, 'sh': 1, 'm': 23, 'l': 19, 'sil': 0, 'ae': 22, 'v': 27, 'uw': 10, 'dx': 17, 'hh': 3, 'ng': 33, 'ow': 30, 't': 29, 'b': 32}
{0: 'sil', 1: 'sh', 2: 'ih', 3: 'hh', 4: 'eh', 5: 'jh', 6: 'd', 7: 'ah', 8: 'k', 9: 's', 10: 'uw', 11: '', 12: 'n', 13: 'g', 14: 'r', 15: 'w', 16: 'aa', 17: 'dx', 18: 'er', 19: 'l', 20: 'y', 21: 'uh', 22: 'ae', 23: 'm', 24: 'oy', 25: 'dh', 26: 'iy', 27: 'v', 28: 'f', 29: 't', 30: 'ow', 31: 'ch', 32: 'b', 33: 'ng', 34: 'ay', 35: 'th', 36: 'ey', 37: 'p', 38: 'aw', 39: 'z'}


In [6]:
#loading test dataset 
timit_test_df = pd.read_hdf("./test_features/mfcc/timit.hdf")
test_features = np.array(timit_test_df["features"].tolist())
test_labels = np.array(timit_test_df["labels"].tolist())
test_labels = test_labels.reshape(test_labels.size, 1)
total_test_sample = test_features.shape[0]

## Testing test data set with different number of mixtures only for Case (a) for MFCC (ii) without energy coefficients - (2, 4, 8, 16, 32, 64,128,256)

### mixture component : 2

In [None]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//002_mfcc//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset 
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth=: ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

### mixture component : 4

In [8]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//004//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset 
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth=: ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

Total test samples predicted label matched with ground truth=:  59982
Accuracy of model is:  13.280343621308063


### mixture component : 8

In [10]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//008//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset 
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth= ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

Total test samples predicted label matched with ground truth=  61081
Accuracy of model is:  13.523668246025771


### mixture component : 16

In [11]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//016//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset 
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth=: ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

Total test samples predicted label matched with ground truth=:  62803
Accuracy of model is:  13.904928486029315


### mixture component : 32

In [12]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//032//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset 
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth=: ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

Total test samples predicted label matched with ground truth=:  61847
Accuracy of model is:  13.693264845237568


### mixture component : 64

In [13]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//064//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth: ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

Total test samples predicted label matched with ground truth:  59181
Accuracy of model is:  13.102997830226276


### mixture component : 128

In [14]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//128//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth: ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

Total test samples predicted label matched with ground truth:  56594
Accuracy of model is:  12.530221848292964


### mixture component :  256

In [15]:
# loading model from pickle file and storing it into list
gmm = []
for i in range(len(phoneme_names)):
    path = "models//256//"+phoneme_names[i]+".pkl"
    with open(path, 'rb') as f:
        gmm.append(pickle.load(f))
# testing dataset 
match_found=0
for i in range (total_test_sample):
    temp = test_features[i,1:]
    temp = temp.reshape(1,12)
    ground_truth = label_to_int[test_labels[i][0]]
    likelihood=[]
    for j in range (len(gmm)):
        likelihood.append(gmm[j].score(temp))
    predicted_label=likelihood.index(max(likelihood))
    if predicted_label==ground_truth:
        match_found=match_found+1
print("Total test samples predicted label matched with ground truth: ",match_found)
accuracy=(match_found/total_test_sample)*100
print("Accuracy of model is: ",accuracy)

Total test samples predicted label matched with ground truth:  54483
Accuracy of model is:  12.062834875791523
