In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report

### Age Loss Weighting

In [3]:
base = pd.read_csv('/home/selinawisco/asr_ssd_main/0421_44_eval.csv')

b_old = base[base['age']>5]
print(classification_report(b_old['new_label'], b_old['pred_by_ASR']))

young = base[base['age']<=5]
print(classification_report(base['new_label'], base['pred_by_ASR']))

              precision    recall  f1-score   support

           0       0.87      0.86      0.87      1355
           1       0.41      0.43      0.42       300

    accuracy                           0.78      1655
   macro avg       0.64      0.65      0.64      1655
weighted avg       0.79      0.78      0.79      1655

              precision    recall  f1-score   support

           0       0.84      0.79      0.81      2868
           1       0.63      0.70      0.66      1462

    accuracy                           0.76      4330
   macro avg       0.73      0.75      0.74      4330
weighted avg       0.77      0.76      0.76      4330



In [4]:
df = pd.read_csv('0502_age_label_age_loss_weighting_44_eval.csv')

old = df[df['age']>5]
print(classification_report(old['new_label'], old['pred_by_ASR']))

young = df[df['age']<=5]
print(classification_report(young['new_label'], young['pred_by_ASR']))

              precision    recall  f1-score   support

           0       0.86      0.88      0.87      1355
           1       0.40      0.37      0.39       300

    accuracy                           0.79      1655
   macro avg       0.63      0.63      0.63      1655
weighted avg       0.78      0.79      0.78      1655

              precision    recall  f1-score   support

           0       0.78      0.75      0.77      1513
           1       0.69      0.72      0.71      1162

    accuracy                           0.74      2675
   macro avg       0.74      0.74      0.74      2675
weighted avg       0.74      0.74      0.74      2675



In [5]:
b_wrong = base[base['new_label']!=base['pred_by_ASR']]
b_wrong['new_label'].value_counts()

new_label
0    592
1    443
Name: count, dtype: int64

In [6]:
b_wrong['age'].value_counts()

age
4     241
5     219
3     185
6     147
7     120
8      62
2      32
9      23
10      6
Name: count, dtype: int64

In [7]:
import jiwer
# human <-> target CER
def cer1(row):
    return jiwer.cer(row['human_text_jamo'], row['target_text_jamo'])

def cer2(row):
    return jiwer.cer(row['human_text_jamo'], row['asr_human_transcription'])

def cer3(row):
    return jiwer.cer(row['asr_human_transcription'], row['target_text_jamo'])

### ASR 이 맞춘 음성의 평균 CER1    VS    ASR 이 못 맞춘 음성의 평균 CER1

In [8]:
b_right= base[base['new_label']==base['pred_by_ASR']]

In [9]:
b_right = b_right.copy()
b_right['cer1'] = b_right.apply(lambda x: cer1(x),axis=1).mean()
b_right['cer1'].mean()

0.1288179212458424

In [10]:
b_wrong = b_wrong.copy()
b_wrong['cer1'] = b_wrong.apply(lambda x: cer1(x), axis=1).mean()
b_wrong['cer1'].mean()

0.13436853002070398

### ASR 이 못 맞춘 음성 중에서 old 와 young 의 평균 CER1

In [11]:
b_old_wr = b_wrong[b_wrong['age']>5]
b_young_wr = b_wrong[b_wrong['age']<=5]

In [12]:
b_old_wr = b_wrong[b_wrong['age']>5]
b_old_wr = b_old_wr.copy()
b_old_wr['cer1'] = b_old_wr.apply(lambda x: cer1(x), axis=1)
b_old_wr['cer1'].mean()

0.1407355679702048

In [13]:
b_young_wr = b_young_wr.copy()
b_young_wr['cer1'] = b_young_wr.apply(lambda x: cer1(x), axis=1)
b_young_wr['cer1'].mean()

0.13100161778152913

### 나이별 평균 CER1
##### 나이가 커질 수록 CER1 이 떨어진다. (성립)

In [14]:
base['cer1'] = base.apply(lambda x: cer1(x), axis=1)

age = base.groupby('age')['cer1'].mean()
age


age
2     0.303602
3     0.224187
4     0.178342
5     0.095007
6     0.089887
7     0.047299
8     0.032450
9     0.033142
10    0.018939
Name: cer1, dtype: float64

##### ASR이 틀린 음성 중 나이가 커질 수록 CER 1 이 떨어진다. (성립 X)

In [15]:
b_wrong['cer1'] = b_wrong.apply(lambda x: cer1(x),axis=1)
age = b_wrong.groupby('age')['cer1'].mean()
age

age
2     0.112649
3     0.156911
4     0.114162
5     0.130327
6     0.180977
7     0.104187
8     0.132296
9     0.111957
10    0.083333
Name: cer1, dtype: float64

### CER1 이 0.15 이하인 음성의 평균 UAR 은 CER1 이 0.15 이상인 음성의 평균 UAR 보다 낮다. (성립 X)


In [16]:
from sklearn.metrics import recall_score, accuracy_score

In [38]:
cer_low = base[(base['cer1']>0) & (base['cer1']<0.15)]
print(cer_low['age'].value_counts())
print(cer_low['pred_by_ASR'].value_counts())
recall_score(cer_low['new_label'], cer_low['pred_by_ASR'])

age
4     32
3     22
6     15
5     15
7      8
8      2
10     1
2      1
9      1
Name: count, dtype: int64
pred_by_ASR
1.0    71
0.0    26
Name: count, dtype: int64


0.7319587628865979

In [18]:
b_right['cer1'] = b_right.apply(lambda x: cer1(x), axis=1)
age = b_right.groupby('age')['cer1'].mean()

age



age
2     0.352098
3     0.247150
4     0.197579
5     0.080301
6     0.060197
7     0.030232
8     0.011746
9     0.018036
10    0.004630
Name: cer1, dtype: float64

In [37]:
cer_high = base[base['cer1']>0.15]
# print(recall_score(cer_high['new_label'], cer_high['pred_by_ASR'], average='macro'))
print(cer_high['age'].value_counts())
print(cer_high['pred_by_ASR'].value_counts())
recall_score(cer_high['new_label'], cer_high['pred_by_ASR'])


age
4     436
3     352
5     204
6     148
2      99
7      70
8      39
9      14
10      2
Name: count, dtype: int64
pred_by_ASR
1.0    947
0.0    417
Name: count, dtype: int64


0.6942815249266863

In [20]:
base['cer1'].max()

1.5

In [21]:
agelist = np.sort(base['age'].unique())


In [22]:
for i in agelist:
    subdf = base[base['age']==i]
    print(i, recall_score(subdf['new_label'],subdf['pred_by_ASR'], average='macro'))

2 0.7639655172413793
3 0.7444819802760145
4 0.7735689113954038
5 0.6678993697588416
6 0.66991749524011
7 0.6214177978883861
8 0.5841463414634147
9 0.6747395833333334
10 0.6


In [23]:
print('cer1')
for i in agelist:
    subdf = base[base['age']==i]
    print(i, subdf['cer1'].mean())

cer1
2 0.30360156720916215
3 0.22418724481998206
4 0.17834158398751698
5 0.09500692447001172
6 0.08988692466953335
7 0.0472985347985348
8 0.032449544914918874
9 0.033141858141858146
10 0.01893939393939394


In [24]:
print('cer2')

base['cer2'] = base.apply(lambda x: cer2(x), axis=1)
for i in agelist:
    subdf = base[base['age']==i]
    print(i, subdf['cer2'].mean())

cer2
2 0.35593731163351416
3 0.2606455099233641
4 0.2293045630605439
5 0.16076338650164151
6 0.12467019695280565
7 0.0970970695970696
8 0.0548069735742866
9 0.055025530025530026
10 0.05357142857142857


In [25]:
print('cer3')

base['cer3'] = base.apply(lambda x: cer3(x), axis=1)
for i in agelist:
    subdf = base[base['age']==i]
    print(i, subdf['cer3'].mean())

cer3
2 0.4527375929274663
3 0.3089752407152682
4 0.27527644869750134
5 0.15291786513263023
6 0.12237617455008759
7 0.08921703296703296
8 0.03438860308666403
9 0.04555444555444555
10 0.035894660894660896


In [26]:
#틀린음성만 고려
print('cer1')
df['cer1'] = df.apply(lambda x: cer1(x),axis=1)
for i in agelist:
    subdf = df[df['age']==i]
    print(i, subdf['cer1'].mean())

cer1
2 0.30360156720916215
3 0.22418724481998206
4 0.17834158398751698
5 0.09500692447001172
6 0.08988692466953335
7 0.0472985347985348
8 0.032449544914918874
9 0.033141858141858146
10 0.01893939393939394


In [27]:
#틀린음성만 고려
print('cer2')
df['cer2'] =df.apply(lambda x: cer2(x),axis=1)
for i in agelist:
    subdf = df[df['age']==i]
    print(i, subdf['cer2'].mean())

cer2
2 0.35508589511754074
3 0.26473166524748365
4 0.2310696161892334
5 0.15819653873345146
6 0.1222991983861549
7 0.08952609890109889
8 0.05298223629248559
9 0.04829337329337329
10 0.03463203463203463


In [28]:
#틀린음성만 고려
print('cer3')
df['cer3'] =df.apply(lambda x: cer3(x),axis=1)
for i in agelist:
    subdf = df[df['age']==i]
    print(i, subdf['cer3'].mean())

cer3
2 0.42454792043399636
3 0.25837208794567806
4 0.24146657967232127
5 0.13445722808138913
6 0.08703017996496257
7 0.07924221611721612
8 0.027127028096557183
9 0.049983349983349995
10 0.01948051948051948


In [29]:
b_right_audios = b_right['audio'].tolist()
print(len(b_right_audios))

awl_right = df[df['audio'].isin(b_right_audios)]
print(len(awl_right))

now_wrong = awl_right[awl_right['new_label']!=awl_right['pred_by_ASR']]
now_wrong['cer1'].mean()
# len(now_wrong)

3295
3295


0.18300559090032775

In [30]:
b_wrong_audios = b_wrong['audio'].tolist()
print(len(b_wrong_audios))

filtered = df[df['audio'].isin(b_wrong_audios)]
print(len(filtered))

now_right = filtered[filtered['new_label']==filtered['pred_by_ASR']]
now_right['cer1'].mean()

# len(now_right)

1035
1035


0.0935877784590231

In [31]:
b_right['cer1'].mean()

0.12881792124584235

In [32]:
b_wrong['cer1'].mean()

0.13436853002070395

In [33]:
b_wrong_old = b_wrong[b_wrong['age']>5]
b_wrong_old['cer1'].mean()

len(b_wrong_old)

358

In [34]:
b_wrong_young = b_wrong[b_wrong['age']<=5]
b_wrong_young['cer1'].mean()

len(b_wrong_young)

677

### Age Embedding 모델

In [35]:
import pandas as pd
from sklearn.metrics import recall_score
emb = pd.read_csv('/home/selinawisco/asr_ssd_main/0508_age_label_embedding_44')

emb_old = emb[emb['age']>5]
print(recall_score(emb_old['new_label'], emb_old['pred_by_ASR'],average='macro'))

0.6311623616236162


In [36]:
emb_old['pred_by_ASR'].value_counts()

pred_by_ASR
0.0    1365
1.0     290
Name: count, dtype: int64

In [39]:
emb_young = emb[emb['age']<=5]
print(recall_score(emb_young['new_label'], emb_young['pred_by_ASR'],average='macro'))

0.7454405479533088
