In [234]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from os.path import join
import scipy 
from scipy.stats import ttest_ind
import matplotlib.patches as mpatches

In [321]:
IS_DISYLLABLE = True
# IS_DISYLLABLE = False
EXP_NUM = 9
# DIR_PATH = 'result/eval_'+str(EXP_NUM)+'/formant/'
DIR_PATH = 'eval_di/formant'
# DIR_PATH = 'eval_mono/formant'
FORMANT_FILE = join(DIR_PATH, 'formant_df.csv')
DATAPOINT_FILE = join(DIR_PATH,'data_point.csv')

In [322]:
def sep_label_position(df):
    df[['Label','Position']] = df['Label'].str.split(';', expand=True) 
    return df

def prep_df(df):
    if IS_DISYLLABLE:
        df = sep_label_position(df)
    return df

def custome_reindex(df):
    return df.reindex(["a:", "i:", "u:","e:","E","@","9","o","O"])

def custome_reindex_type2(df):
    return df.reindex(["a:", "i:", "u:","e:",'ɛ:','ə:','œ:','o:','ɔ:'])

In [323]:
formant_df = pd.read_csv(FORMANT_FILE)
datapoint_df = pd.read_csv(DATAPOINT_FILE).drop(['Unnamed: 0'], axis=1)

formant_df = prep_df(formant_df)
print(formant_df.head())
print(datapoint_df.head())

  Label          F1          F2         F3 Position
0    a:    1.324905    7.172430   4.575608        1
1    i:    8.807210    1.855705   1.923865        2
2    a:    1.429687    7.768621   4.975275        1
3    u:  133.877115  143.320097  15.115863        2
4    a:    1.423626    7.640646   4.885537        1
  Label          F1           F2  Target
0  a:;1  669.990576  1154.795301       1
1  i:;2  344.740696  2020.981425       1
2  a:;1  670.052038  1154.808706       1
3  u:;2  360.821113   767.380300       1
4  a:;1  670.038928  1154.797153       1


## Compute Formant RMSE 

In [267]:
if IS_DISYLLABLE:
    formant_rmse = formant_df.groupby(['Label', 'Position']).mean().pivot_table(index=['Label'],columns='Position',values=['F1','F2','F3'],fill_value=0)
    formant_rmse.columns = ['-'.join(col).strip() for col in formant_rmse.columns.values]
else:
    formant_rmse = formant_df.copy().set_index('Label')
formant_rmse = custome_reindex(formant_rmse)
print(formant_rmse)
formant_rmse.to_csv(join(DIR_PATH,'formant_rmse_exp'+str(EXP_NUM)+'.csv'))

            F1-1       F1-2       F2-1       F2-2      F3-1      F3-2
Label                                                                
a:      1.399981   2.443049   7.557281   4.779231  4.842692  3.414801
i:     12.368307  10.412467   1.514911   3.370710  2.528884  4.938767
u:     26.752969  49.974638  14.288607  57.518856  3.561749  9.850690
e:      0.483650   2.899303   0.091354   1.113316  1.161189  1.219915
E       4.154068   4.871217   0.926588   1.518665  6.272840  5.692965
@      17.063419  12.318253   1.665021   4.091966  6.496688  7.779489
9      12.846999  10.206639   4.449338   4.625190  7.965251  7.832681
o       1.506267   3.145165   3.310455   9.094594  0.557440  1.336420
O       2.690173   2.314403   9.751842   6.967282  2.184202  2.302741


## Compute T-test 

In [268]:
datapoint_df.head()

Unnamed: 0,Label,F1,F2,Target
0,a:;1,669.990576,1154.795301,1
1,i:;2,344.740696,2020.981425,1
2,a:;1,670.052038,1154.808706,1
3,u:;2,360.821113,767.3803,1
4,a:;1,670.038928,1154.797153,1


In [269]:
datapoint_prep = datapoint_df.pivot_table(index=['Label'],columns='Target',values=['F1','F2'],fill_value=0)
datapoint_prep.columns = [(str(col[0])+'-'+str(col[1])).strip() for col in datapoint_prep.columns.values]
datapoint_prep = prep_df(datapoint_prep.reset_index())
datapoint_prep.head()

Unnamed: 0,Label,F1-0,F1-1,F2-0,F2-1,Position
0,9,573.659739,508.353087,1441.20579,1379.830601,1
1,9,547.35663,502.150841,1445.105118,1384.276987,2
2,@,537.517587,459.167924,1592.215045,1566.138577,1
3,@,517.848071,464.781835,1596.037085,1541.306773,2
4,E,586.356155,562.970704,1595.6263,1610.554401,1


In [302]:
if IS_DISYLLABLE:
    first_syl = datapoint_prep[datapoint_prep['Position']=='1']
    second_syl = datapoint_prep[datapoint_prep['Position']=='2']
    
    f_f1 = ttest_ind(first_syl['F1-0'], first_syl['F1-1'])
    f_f2 = ttest_ind(first_syl['F2-0'], first_syl['F2-1'])
    s_f1 = ttest_ind(second_syl['F1-0'], second_syl['F1-1'])
    s_f2 = ttest_ind(second_syl['F2-0'], second_syl['F2-1'])
    
    ttest_df = pd.DataFrame({'result':['t-test','p-value'], 
                             '1F1':[f_f1[0], f_f1[1]],
                             '1F2':[f_f2[0], f_f2[1]],
                             '2F1':[s_f1[0], s_f1[1]],
                             '2F2':[s_f2[0], s_f2[1]]})
else:

    f_f1 = ttest_ind(datapoint_prep['F1-0'], datapoint_prep['F1-1'])
    f_f2 = ttest_ind(datapoint_prep['F2-0'], datapoint_prep['F2-1'])
    
    ttest_df = pd.DataFrame({'result':['t-test','p-value'], 
                             '1F1':[f_f1[0], f_f1[1]],
                             '1F2':[f_f2[0], f_f2[1]]})
 
print(ttest_df.head())
ttest_df.to_csv(join(DIR_PATH,'ttest_exp'+str(EXP_NUM)+'.csv'))

    result       1F1       1F2
0   t-test  0.418236  0.325098
1  p-value  0.678405  0.747099


## Plot Formant Chart

In [274]:
datapoint_df.head()
print(datapoint_df)

    Label          F1           F2  Target Position
0      a:  669.990576  1154.795301       1        1
1      i:  344.740696  2020.981425       1        2
2      a:  670.052038  1154.808706       1        1
3      u:  360.821113   767.380300       1        2
4      a:  670.038928  1154.797153       1        1
5      e:  405.489101  1951.648056       1        2
6      a:  670.058595  1154.812237       1        1
7       E  586.422422  1531.863833       1        2
8      a:  670.062328  1154.813489       1        1
9       o  422.789177   734.701852       1        2
10     a:  670.001148  1154.803940       1        1
11      @  506.221266  1499.309620       1        2
12     a:  670.009561  1154.779924       1        1
13      9  542.222055  1339.742677       1        2
14     a:  670.103224  1154.839086       1        1
15      O  573.294479   965.556797       1        2
16     i:  266.845751  2185.974236       1        1
17     a:  608.969746  1344.840284       1        2
18     i:  2

In [376]:
datapoint_df = pd.read_csv(DATAPOINT_FILE).drop(['Unnamed: 0'], axis=1)

In [377]:
def set_datapoint_index(df):
    df_temp = df.copy()
    df_temp['Label_idx'] = df_temp['Label']
    return df_temp.set_index('Label_idx')

datapoint_df = prep_df(datapoint_df)
datapoint_df = set_datapoint_index(datapoint_df)

datapoint_df.at['E', 'Label']= 'ɛ:' 
datapoint_df.at['O', 'Label']= 'ɔ:'
datapoint_df.at['9', 'Label']= 'œ:'
datapoint_df.at['@', 'Label']= 'ə:'
datapoint_df.at['o', 'Label']= 'o:'

if IS_DISYLLABLE:
    datapoint_df = set_datapoint_index(datapoint_df.groupby(['Label','Target','Position']).mean().reset_index())
    first_syl = datapoint_df[datapoint_df['Position']=='1']
    second_syl = datapoint_df[datapoint_df['Position']=='2']

In [404]:
def plot_formant_chart(df, reindex_fn, note=None):

    filled_markers = ('o', 'v', "s", "P", "*", "D", "^", "X", "<")

    actual = reindex_fn(df[df['Target']==1].copy()).reset_index()
    estimated = reindex_fn(df[df['Target']==0].copy()).reset_index()

    fig, ax = plt.subplots()

    for idx, mark in enumerate(filled_markers):
        ax.scatter(actual['F2'][idx], actual['F1'][idx], marker=mark, color='red', label=actual['Label'][idx])
        ax.scatter(estimated['F2'][idx], estimated['F1'][idx], marker=mark, color='blue')

    ax.set_xticks(np.arange(400, 2400+1, 200))
    ax.set_yticks(np.arange(100, 800+1, 100))
    
    plt.gca().invert_xaxis()
    plt.gca().invert_yaxis()

    ax.set_xlabel('F2 [Hz]', fontsize=14)
    ax.set_ylabel('F1 [Hz]', fontsize=14)

    ax.legend()
    ax.legend(bbox_to_anchor=(1.05, 0.8), loc=2, borderaxespad=0.)

    # ax.grid()

    leg = ax.get_legend()
    for idx,_ in enumerate(leg.legendHandles):
        leg.legendHandles[idx].set_color('black')

    red_patch = mpatches.Patch(color='red', label='Target')
    blue_patch = mpatches.Patch(color='blue', label='Estimated')
    legend1 = ax.legend(handles=[red_patch, blue_patch], bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

    plt.gca().add_artist(leg)
    plt.gcf().set_size_inches(7,5)

    syllable_type = 'disyllable' if IS_DISYLLABLE else 'monosyllable'
    
    if note and IS_DISYLLABLE:
        ax.set_title('Formant Chart of '+note+' ['+syllable_type+']')
        filename = 'formant_chart_'+syllable_type+' '+note
    else:
        ax.set_title('Formant Chart of ['+syllable_type+']')
        filename = 'formant_chart_'+syllable_type

    for file_type in ['.png','.pdf']:
        plt.savefig(join(DIR_PATH, filename+file_type), dpi=300)
        
    plt.clf()

In [405]:
if IS_DISYLLABLE:
    plot_formant_chart(first_syl, custome_reindex_type2, note='FirstSyllable')
    plot_formant_chart(second_syl, custome_reindex_type2, note='SecondSyllable')
else:
    plot_formant_chart(datapoint_df, custome_reindex)

<Figure size 504x360 with 0 Axes>

<Figure size 504x360 with 0 Axes>