In [192]:
import glob
import pickle as pkl
import pandas as pd
from phoneme_info import PHONEME_INFO_DF, ENGLISH_PHONEME_LIST
from sklearn.linear_model import LinearRegression
import numpy as np
import tqdm

In [2]:
def get_phoneid2phone_dict(phoneid_dctpath='./phones/kaldi/phones-list.txt'):
    phoneid_dctpath='./phones/kaldi/phones-list.txt'
    phoneiddf = pd.read_csv(phoneid_dctpath, delimiter='\t', header=None, names=['phone', 'id_raw'])
    phoneiddf['phoneid'] = phoneiddf['id_raw'].str.strip().str[:-1].str.strip()
    # phoneiddf = phoneiddf.set_index('phone')
    id2name_dct = {}
    for row in phoneiddf.iterrows():
        _row = row[1]
        id2name_dct[_row.phoneid] = _row.phone
    return id2name_dct

In [3]:
phn_id2name = get_phoneid2phone_dict()

In [4]:
speakers = [foldername.split('/')[-1] for foldername in glob.glob('./child_speech_16_khz_test/*') if '.txt' not in foldername]

In [17]:
gop_dicts = {}
for speaker in speakers:
    gop_pickle_path = f'./experiments/{speaker}/gop_kaldi_labels_heldout/gop_scores/gop.pickle'
    gop_dict = pd.read_pickle(gop_pickle_path)
    
    for file in gop_dict.keys():
        dct = gop_dict[file]
        phn_names =  [phn_id2name[phn] for phn in dct['phones_pure']]
        dct['phones_pure'] = phn_names
    gop_dicts[speaker] = gop_dict

In [53]:
aligns = pd.read_pickle(f'./experiments/{speaker}/gop_kaldi_labels_heldout/gop_scores/alignments.pickle')

In [55]:
dct = aligns[list(aligns.keys())[0]]

In [10]:
#todo
# def get_durations_from_kaldi_alignments(speaker):
    # '''
    # returns a dictionary where the keys are filenames for a speaker
    # the items are the duration of each phoneme in the utterance
    # '''
    
    
    

In [36]:
def get_all_phones_of_type(descriptions, removephones= None):
    # PHONEME_INFO_DF = PHONEME_INFO_DF.set_index('phoneme')
    phones = []
    for descrip in descriptions:
        _phns = list(PHONEME_INFO_DF[PHONEME_INFO_DF['type'].str.contains(descrip)].phoneme)
        phones.extend(_phns)
    
    phones = set(phones)
    
    if removephones is not None:
        for rphn in removephones:
            phones.discard(rphn)
    return list(phones)

stops = get_all_phones_of_type(['stop', 'plosive'], removephones=['CH'])
stops = set(stops)
stops.discard('CH')
stops = list(stops)
fricatives = get_all_phones_of_type(['fricative'], removephones=['CH']) + get_all_phones_of_type(['affricate'], removephones=['CH'])
unvoiced_fricatives = ['F', 'TH', 'S', 'SH']
voiced_fricatives = ['V', 'DH', 'Z']
liquid_approximants = ['R', 'L']
glide_approximants = ['JH', 'W']
nasals = ['NG', 'N', 'M']
vowels = get_all_phones_of_type(['vowel'])
obstruents = stops + fricatives
affricates = ['JH', 'CH']

phone_classes_dct = {
    'Stops': stops, 
    'UnvoicedFricatives':unvoiced_fricatives, 
    'VoicedFricatives': voiced_fricatives,
    'LiquidApproximants': liquid_approximants,
    'GlideApproximants':glide_approximants,
    'Nasals': nasals,
    'Vowels': vowels,
    'Obstruents': obstruents,
    'Affricates': affricates}

In [43]:
speakerwise_results_dict = {}
for speaker in speakers:
    speakerwise_results_dict[speaker] = {}
    for phone in ENGLISH_PHONEME_LIST:
        speakerwise_results_dict[speaker][phone] = {}
        speakerwise_results_dict[speaker][phone]['pllrs'] = []
        speakerwise_results_dict[speaker][phone]['filename'] = []

In [44]:
phonepllrs = {}
data = []

for speaker in speakers: 
    phonepllrs[speaker] = []
    gop_dict = gop_dicts[speaker]
    
    for targetphn in ENGLISH_PHONEME_LIST:
        for (filename, filegopdict) in gop_dict.items():
            filephones = filegopdict['phones_pure']
            filegops = filegopdict['gop']
            for ii, phn in enumerate(filephones):
                if targetphn==phn:
                    speakerwise_results_dict[speaker][phn]['pllrs'].append(filegops[ii])
                    speakerwise_results_dict[speaker][phn]['filename'].append(filename)


In [34]:
pllrdct = speakerwise_results_dict

In [71]:
def get_phone_membership(phone, phone_class_membership=phone_classes_dct):
    membership = []
    for (phnclass, phns) in phone_class_membership.items():
        membership.append(phone in phns)
        
    return membership

In [130]:
phone_classes = list(phone_classes_dct.keys())
fulldf_cols = ['Phoneme', 'OccurencePLLR', 'Age', 'Speaker', 'Filename'] + phone_classes
# fulldf_cols = ['Phoneme', 'OccurencePLLR', 'Age', 'Speaker', 'Filename'] 
fulldf_notype = pd.DataFrame(columns=fulldf_cols)

for speaker, speakerpllrdct in tqdm.tqdm(pllrdct.items()):
    speaker_age = float(speaker[1])+ float(speaker[2:4])/12

    for phn in ENGLISH_PHONEME_LIST:
        n_occurances = len(speakerpllrdct[phn]['pllrs'])
        phonepllrs = speakerpllrdct[phn]
        phn_membership = np.array([get_phone_membership(phn)]*n_occurances)
        # [[phn]*n_occurances, phonepllrs['pllrs'], [speaker_age]*n_occurances, [speaker]*n_occurances, phonepllrs['filename']]
        pllrs = np.array(phonepllrs['pllrs']).astype('float')
        _data = np.array([[phn]*n_occurances, phonepllrs['pllrs'], [speaker_age]*n_occurances, [speaker]*n_occurances, phonepllrs['filename']]).T
        _data = np.concatenate((_data, phn_membership), axis=1)
        phndf = pd.DataFrame(_data, columns=fulldf_cols)
        
        fulldf_notype = pd.concat([fulldf_notype, phndf], axis=0)
        # break

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:08<00:00,  5.14it/s]


In [131]:
fulldf_notype.to_csv('./dnngop_pllrs_child.csv')

In [132]:
fulldf_cols = ['PhoneType', 'Phoneme', 'OccurencePLLR', 'Age', 'Speaker']
fulldf = pd.DataFrame(columns=fulldf_cols)

for speaker, phonepllrs in pllrdct.items():
    speaker_age = float(speaker[1])+ float(speaker[2:4])/12
    
    for phonetype, phonesubset in phone_classes_dct.items():
        # print(phonetype, phonesubset)
        subtypepllrs = []
        phoneid = []
        for phn in phonesubset:
            # phonepllrs[phn]
            try:
                pllrs = np.array(phonepllrs[phn]['pllrs']).astype('float')
                subtypepllrs.extend(pllrs)
                phoneid.extend([phn]*len(phonepllrs[phn]['pllrs']))
            except:
                print(f'No occurences of phone: {phn} found for speaker: {speaker}')

        # print(subtypedurations)
        n_occurences = len(subtypepllrs)
        _data = np.array([[phonetype] * n_occurences, phoneid, subtypepllrs, [speaker_age] * n_occurences, [speaker] * n_occurences]).T
        typedf = pd.DataFrame(_data, columns = fulldf_cols)

        fulldf = pd.concat((fulldf, typedf), axis=0)
        
fulldf.to_csv('./pllr_by_class_repeat_phones_exist.csv')

In [138]:
phndf

Unnamed: 0,Phoneme,OccurencePLLR,Age,Speaker,Filename,Stops,UnvoicedFricatives,VoicedFricatives,LiquidApproximants,GlideApproximants,Nasals,Vowels,Obstruents,Affricates
0,AA,-2.733563871608537,3.25,0303_F_CT,0303_F_CTs4T07,False,False,False,False,False,False,True,False,False
1,AA,-2.777128561492372,3.25,0303_F_CT,0303_F_CTwT28,False,False,False,False,False,False,True,False,False
2,AA,-2.974718381927321,3.25,0303_F_CT,0303_F_CTs4T04,False,False,False,False,False,False,True,False,False
3,AA,-2.727199161492704,3.25,0303_F_CT,0303_F_CTwT29,False,False,False,False,False,False,True,False,False
4,AA,-2.7575340077523616,3.25,0303_F_CT,0303_F_CTs2T07,False,False,False,False,False,False,True,False,False
5,AA,-3.067003918510882,3.25,0303_F_CT,0303_F_CTwT16,False,False,False,False,False,False,True,False,False
6,AA,-3.149454222611659,3.25,0303_F_CT,0303_F_CTs3T06,False,False,False,False,False,False,True,False,False
7,AA,-2.5567054974602814,3.25,0303_F_CT,0303_F_CTwT27,False,False,False,False,False,False,True,False,False
8,AA,-2.6913264688027905,3.25,0303_F_CT,0303_F_CTwT34,False,False,False,False,False,False,True,False,False
9,AA,-2.720620409341161,3.25,0303_F_CT,0303_F_CTs2T08,False,False,False,False,False,False,True,False,False


In [149]:
inteligdf = pd.read_csv('adjust-adjusted-intelligibility.csv').set_index('child')

In [153]:
inteligdf

Unnamed: 0_level_0,age_months,n_items,mean_intelligibility,age_adjusted_intelligibility
child,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0301_M_AH,37,57,0.508772,-0.149216
0303_F_CT,39,67,0.662313,-0.008308
0303_M_AN,39,67,0.593905,-0.076716
0305_F_AN,41,66,0.662374,-0.020882
0305_F_MB,41,68,0.710784,0.027529
0305_M_JG,41,64,0.460938,-0.222318
0307_F_IC,43,63,0.672619,-0.02327
0307_M_EC,43,57,0.631579,-0.06431
0308_F_AGL,44,98,0.923919,0.221713
0309_F_LB,45,96,0.808147,0.099624


In [200]:
''' Age residualize intelligibility '''

# load age residualized intelligibilities
# claculate age residualized dnn gops: average pllrs (by speaker across phonemes),  fit linear model, calculate the 


speakerwise_data = []
for speaker, df in fulldf_notype.groupby('Speaker'):
    mean_pllrs = []
    age = df['Age'].iloc[0]
    intellig = inteligdf.loc[speaker,'mean_intelligibility']
    age_intellig = inteligdf.loc[speaker,'age_adjusted_intelligibility']
    for phn, phndf in df.groupby('Phoneme'):
        mean_pllrs.append(np.mean(phndf['OccurencePLLR'].astype('float')))
    _data = [speaker, np.mean(mean_pllrs), age, intellig, age_intellig]
    speakerwise_data.append(_data)
    
speakerwise_pllr_df = pd.DataFrame(np.array(speakerwise_data), columns=['Speaker', 'DNN_PLLR', 'Age', 'Intellig', 'AgeAdjustedIntellig'])
speakerwise_pllr_df[['DNN_PLLR', 'Age', 'Intellig', 'AgeAdjustedIntellig']] = speakerwise_pllr_df[['DNN_PLLR', 'Age', 'Intellig', 'AgeAdjustedIntellig']].astype('float')
speakerwise_pllr_df= speakerwise_pllr_df.set_index('Speaker')
lm = LinearRegression()
lm.fit(speakerwise_pllr_df['Age'].values.reshape(-1, 1), speakerwise_pllr_df['DNN_PLLR'].values.reshape(-1, 1))
speakerwise_pllr_df['AgeAdjustedDNNPLLR'] = speakerwise_pllr_df['DNN_PLLR'].values.reshape(-1,1 ) - lm.predict(speakerwise_pllr_df['Age'].values.reshape(-1, 1))
# speakerwise_pllr_df = pd.DataFrame(np.array(speakerwise_data), columns=['Speaker', 'DNN_PLLR', 'Age', 'Intellig', 'AgeAdjustedIntellig'])
speakerwise_pllr_df = speakerwise_pllr_df[['Intellig', 'DNN_PLLR', 'Age', 'AgeAdjustedIntellig', 'AgeAdjustedDNNPLLR']]
speakerwise_pllr_df.corr().round(3)

Unnamed: 0,Intellig,DNN_PLLR,Age,AgeAdjustedIntellig,AgeAdjustedDNNPLLR
Intellig,1.0,0.832,0.653,0.731,0.516
DNN_PLLR,0.832,1.0,0.8,0.377,0.6
Age,0.653,0.8,1.0,-0.04,-0.0
AgeAdjustedIntellig,0.731,0.377,-0.04,1.0,0.681
AgeAdjustedDNNPLLR,0.516,0.6,-0.0,0.681,1.0


In [None]:
''' 

calculate alignment metrics for 

'''



In [None]:
'''

create a package or set of functions for alignment evaluation (after cleaning up your old code)

'''