# Nearest point classifier
In this program, we classify the phonemes according to how close they are to the center of gravity of the phonemes in the training set.

We use pandas to read the data from Peterson Barney

In [65]:
import pandas as pd
import numpy as np

In [16]:
vowel_data = pd.read_csv('verified_pb.data',delim_whitespace=True, header = None)
print(vowel_data)

      0   1   2   3      4      5       6       7
0     1   1   1  IY  160.0  240.0  2280.0  2850.0
1     1   1   1  IY  186.0  280.0  2400.0  2790.0
2     1   1   2  IH  203.0  390.0  2030.0  2640.0
3     1   1   2  IH  192.0  310.0  1980.0  2550.0
4     1   1   3  EH  161.0  490.0  1870.0  2420.0
...  ..  ..  ..  ..    ...    ...     ...     ...
1515  3  76   8  UH  322.0  610.0  1550.0  3400.0
1516  3  76   9  UW  345.0  520.0  1250.0  3460.0
1517  3  76   9  UW  334.0  500.0  1140.0  3380.0
1518  3  76  10  ER  308.0  740.0  1850.0  2160.0
1519  3  76  10  ER  328.0  660.0  1830.0  2200.0

[1520 rows x 8 columns]


  vowel_data = pd.read_csv('verified_pb.data',delim_whitespace=True, header = None)


In the pandas file, there are three different types of people: Man (1), Woman (2), Child (3). Each person pronounces twice the different phonemes enumerated from 1 to 10 which yields the results in columns 4,5,6,7. We choose 80% of each group as training data. With this, we will average the vectors and will predict which are the vowels pronounced in the test set depending on which is the closest phoneme to them.  

We divide the data into the three types of people. Notice that each person has 20 recordings, so we divide the amount of lines by 20 to get the amount of people in each group.

In [53]:
type1_vowel = vowel_data[vowel_data.iloc[:,0] == 1]
type2_vowel = vowel_data[vowel_data.iloc[:,0] == 2]
type3_vowel = vowel_data[vowel_data.iloc[:,0] == 3]
type1_length = int ((type1_vowel.shape[0])/20)
type2_length = int ((type2_vowel.shape[0])/20)
type3_length = int ((type3_vowel.shape[0])/20)

In [54]:
print(type1_length)
print(type2_length)
print(type3_length)

33
28
15


# We choose the training data:

We use 80% of each of the groups as training data

In [60]:
type1_training_length = int(type1_length*0.8)
type2_training_length = int(type2_length*0.8)
type3_training_length = int(type3_length*0.8)
type1_training = type1_vowel.iloc[0:type1_training_length*20,:]
type2_training = type2_vowel.iloc[0:type2_training_length*20,:]
type3_training = type3_vowel.iloc[0:type3_training_length*20,:]

In [61]:
print(type1_training)
print(type2_training)
print(type3_training)

     0   1   2   3      4      5       6       7
0    1   1   1  IY  160.0  240.0  2280.0  2850.0
1    1   1   1  IY  186.0  280.0  2400.0  2790.0
2    1   1   2  IH  203.0  390.0  2030.0  2640.0
3    1   1   2  IH  192.0  310.0  1980.0  2550.0
4    1   1   3  EH  161.0  490.0  1870.0  2420.0
..  ..  ..  ..  ..    ...    ...     ...     ...
515  1  26   8  UH  125.0  462.0   976.0  2450.0
516  1  26   9  UW  120.0  324.0   708.0  2440.0
517  1  26   9  UW  157.0  387.0   786.0  2518.0
518  1  26  10  ER  122.0  488.0  1468.0  1712.0
519  1  26  10  ER  118.0  472.0  1465.0  1725.0

[520 rows x 8 columns]
      0   1   2    3      4      5       6       7
660   2  34   1  *IY  230.0  370.0  2670.0  3100.0
661   2  34   1   IY  234.0  390.0  2760.0  3060.0
662   2  34   2   IH  234.0  468.0  2330.0  2930.0
663   2  34   2   IH  205.0  410.0  2380.0  2950.0
664   2  34   3   EH  190.0  550.0  2200.0  2880.0
...  ..  ..  ..  ...    ...    ...     ...     ...
1095  2  55   8   UH  235.0  47

# Now we make find the center of gravity for each phoneme using the four last rows:

To do this, we transform the dataframe into numpy arrays, which I found easier to work for calculations.

In [109]:
phonemes_center = np.full((10,4),[.0,.0,.0,.0])
for i in range(10):
    vectors1 = (type1_training[type1_training.iloc[:,2] == i+1].iloc[:,4:8]).to_numpy()
    vectors2 = (type2_training[type2_training.iloc[:,2] == i+1].iloc[:,4:8]).to_numpy()
    vectors3 = (type3_training[type3_training.iloc[:,2] == i+1].iloc[:,4:8]).to_numpy()
    phonemes_center[i,:] = (sum(vectors1)+sum(vectors2)+sum(vectors3)) /(vectors1.shape[0]+vectors2.shape[0]+vectors3.shape[0])
print(phonemes_center)

    

[[ 194.21666667  300.675      2652.54166667 3253.41666667]
 [ 195.55833333  437.75833333 2320.65833333 2965.625     ]
 [ 185.          586.45       2177.66666667 2897.55833333]
 [ 177.425       804.96666667 1970.46666667 2767.29166667]
 [ 186.7         724.11666667 1349.025      2717.56666667]
 [ 179.68333333  827.95       1193.83333333 2720.44166667]
 [ 183.26666667  594.30833333  903.89166667 2707.40833333]
 [ 195.71666667  469.53333333 1138.68333333 2642.7       ]
 [ 198.89166667  353.68333333  951.875      2605.98333333]
 [ 187.75833333  508.1        1562.26666667 1912.28333333]]


# Now we can make a classifier function

The function receives a 4x1 array and checks which center of gravity is closest to determine the correct phoneme. We add 1 at the end because python counts from 0.


In [121]:
def classifier(argument):
    distances = np.full(10,.0)
    for i in range(10):
        distances[i] = np.linalg.norm(argument-phonemes_center[i])
    return distances.argmin()+1
    

# Accuracy

We check the accuracy of the classifier with the rest of the data.


In [161]:
type1_test = type1_vowel.iloc[type1_training_length*20:type1_length*20,:]
type2_test = type2_vowel.iloc[type2_training_length*20:type2_length*20+1,:]
type3_test = type3_vowel.iloc[type3_training_length*20:type3_length*20+1,:]
#print(type1_test)
type1_test_np = type1_test.iloc[:,4:8].to_numpy()
type2_test_np = type2_test.iloc[:,4:8].to_numpy()
type3_test_np = type3_test.iloc[:,4:8].to_numpy()

check1 = np.full(type1_test_np.shape[0],False)
check2 = np.full(type2_test_np.shape[0],False)
check3 = np.full(type3_test_np.shape[0],False)
for i in range(type1_test_np.shape[0]):
    check1[i] = classifier(type1_test_np[i])== type1_test.iloc[i,2]
for i in range(type2_test_np.shape[0]):
    check2[i] = classifier(type2_test_np[i])== type2_test.iloc[i,2]
for i in range(type3_test_np.shape[0]):
    check3[i] = classifier(type3_test_np[i])== type3_test.iloc[i,2]
accuracy1 = sum(check1)/type1_test.shape[0]
accuracy2 = sum(check2)/type2_test.shape[0]
accuracy3 = sum(check3)/type3_test.shape[0]

# Results

In [162]:
print(accuracy1,accuracy2,accuracy3)

0.4142857142857143 0.75 0.48333333333333334


# Discussion

The accuracy of this type of classifier is pretty low at least for type 1 and type 3 people, that is, men and children. This may be the result of using a single classifier for all kind of voices. It is possible that the classifier may perform better if it identifies beforehand the type of person before making the decision. In other words, it may be better to have a different classifier for men, women and children: 


Men classifier:

In [163]:
phonemes_center1 = np.full((10,4),[.0,.0,.0,.0])
for i in range(10):
    vectors1 = (type1_training[type1_training.iloc[:,2] == i+1].iloc[:,4:8]).to_numpy()
    phonemes_center1[i,:] = sum(vectors1)/vectors1.shape[0]

def classifier1(argument):
    distances = np.full(10,.0)
    for i in range(10):
        distances[i] = np.linalg.norm(argument-phonemes_center1[i])
    return distances.argmin()+1


Women classifier:

In [164]:
phonemes_center2 = np.full((10,4),[.0,.0,.0,.0])
for i in range(10):
    vectors2 = (type2_training[type2_training.iloc[:,2] == i+1].iloc[:,4:8]).to_numpy()
    phonemes_center2[i,:] = sum(vectors2)/vectors2.shape[0]

def classifier2(argument):
    distances = np.full(10,.0)
    for i in range(10):
        distances[i] = np.linalg.norm(argument-phonemes_center2[i])
    return distances.argmin()+1


Children classifier:

In [165]:
phonemes_center3 = np.full((10,4),[.0,.0,.0,.0])
for i in range(10):
    vectors3 = (type3_training[type3_training.iloc[:,2] == i+1].iloc[:,4:8]).to_numpy()
    phonemes_center3[i,:] = sum(vectors3)/vectors3.shape[0]

def classifier3(argument):
    distances = np.full(10,.0)
    for i in range(10):
        distances[i] = np.linalg.norm(argument-phonemes_center3[i])
    return distances.argmin()+1

The new accuracy becomes: 

In [168]:
for i in range(type1_test_np.shape[0]):
    check1[i] = classifier1(type1_test_np[i])== type1_test.iloc[i,2]
for i in range(type2_test_np.shape[0]):
    check2[i] = classifier2(type2_test_np[i])== type2_test.iloc[i,2]
for i in range(type3_test_np.shape[0]):
    check3[i] = classifier3(type3_test_np[i])== type3_test.iloc[i,2]
accuracy1 = sum(check1)/type1_test.shape[0]
accuracy2 = sum(check2)/type2_test.shape[0]
accuracy3 = sum(check3)/type3_test.shape[0]

print(accuracy1,accuracy2,accuracy3)

0.8571428571428571 0.7333333333333333 0.75


With this, the baseline classifier is somewhat acceptable. However, it needs previous knowledge of the type of person being tested. Otherwise, we would need another classifier that could distinguish between male, female and children before performing the second classifier.