In [1]:
import pandas as pd

In [2]:
df = pd.read_json('lemmatizer_predictions_lfg_dev.json')
poss = list(df['UD_POS'].unique())

## Baseline

In [3]:
df['ok_baseline'] = df.lemma == df.orth
sum(df['ok_baseline'])/len(df)

0.5081266692102251

In [4]:
for POS in poss:
    df_pos = df[df['UD_POS']==POS]
    score = sum(df_pos['ok_baseline'])
    size = len(df_pos)
    print(f"{POS}: matched {score} / {size}, accuracy {round(score/size, 2)}")

NUM: matched 53 / 80, accuracy 0.66
NOUN: matched 623 / 2516, accuracy 0.25
PRON: matched 512 / 983, accuracy 0.52
VERB: matched 314 / 2206, accuracy 0.14
PUNCT: matched 2574 / 2574, accuracy 1.0
AUX: matched 64 / 430, accuracy 0.15
ADJ: matched 119 / 819, accuracy 0.15
PART: matched 454 / 614, accuracy 0.74
ADP: matched 864 / 1060, accuracy 0.82
ADV: matched 405 / 604, accuracy 0.67
DET: matched 64 / 326, accuracy 0.2
PROPN: matched 238 / 403, accuracy 0.59
CCONJ: matched 250 / 337, accuracy 0.74
SCONJ: matched 123 / 150, accuracy 0.82
INTJ: matched 2 / 3, accuracy 0.67


## Evaluation of all cases

In [5]:
df['ok'] = df.apply(lambda row: row.lemma in row.predictions, axis=1)

In [6]:
sum(df['ok'])/len(df)

0.8031285768790538

In [7]:
for POS in poss:
    df_pos = df[df['UD_POS']==POS]
    score = sum(df_pos['ok'])
    size = len(df_pos)
    print(f"{POS}: matched {score} / {size}, accuracy {round(score/size, 2)}")

NUM: matched 55 / 80, accuracy 0.69
NOUN: matched 2352 / 2516, accuracy 0.93
PRON: matched 581 / 983, accuracy 0.59
VERB: matched 1805 / 2206, accuracy 0.82
PUNCT: matched 2574 / 2574, accuracy 1.0
AUX: matched 75 / 430, accuracy 0.17
ADJ: matched 472 / 819, accuracy 0.58
PART: matched 605 / 614, accuracy 0.99
ADP: matched 1034 / 1060, accuracy 0.98
ADV: matched 404 / 604, accuracy 0.67
DET: matched 79 / 326, accuracy 0.24
PROPN: matched 0 / 403, accuracy 0.0
CCONJ: matched 337 / 337, accuracy 1.0
SCONJ: matched 149 / 150, accuracy 0.99
INTJ: matched 3 / 3, accuracy 1.0


## Evaluation of non-trivial cases only

In [8]:
df2=df[df.orth!=df.lemma]

In [9]:
sum(df2['ok'])/len(df2)

0.6580825318026683

In [14]:
for POS in poss:
    df_pos = df2[df2['UD_POS']==POS]
    size = len(df_pos)
    if size==0:
        continue
    score = sum(df_pos['ok'])
    print(f"{POS}: matched {score} / {size}, accuracy {round(score/size, 2)}")

NUM: matched 2 / 27, accuracy 0.07
NOUN: matched 1732 / 1893, accuracy 0.91
PRON: matched 69 / 471, accuracy 0.15
VERB: matched 1491 / 1892, accuracy 0.79
AUX: matched 11 / 366, accuracy 0.03
ADJ: matched 358 / 700, accuracy 0.51
PART: matched 157 / 160, accuracy 0.98
ADP: matched 170 / 196, accuracy 0.87
ADV: matched 123 / 199, accuracy 0.62
DET: matched 15 / 262, accuracy 0.06
PROPN: matched 0 / 165, accuracy 0.0
CCONJ: matched 87 / 87, accuracy 1.0
SCONJ: matched 26 / 27, accuracy 0.96
INTJ: matched 1 / 1, accuracy 1.0


### Conclusions:
 * Good scores on nouns, except on proper nouns score is worse then no lemmatization at all 
     * I suggest to temporarly remove lemmatization of proper nouns
 * Try to add more rules/exceptions to determiners (this is a closed class of lexemes, in my opinion it is the easiest way to improve accuracy)

In [19]:
df_det = df[df['UD_POS']=='DET']

In [27]:
df_det[~df_det['ok']].head()

Unnamed: 0,orth,lemma,UD_POS,predictions,ok_baseline,ok
1002,takimi,taki,DET,[takimi],False,False
10084,swego,swój,DET,[swego],False,False
1012,nasze,nasz,DET,[nasze],False,False
10142,to,ten,DET,[to],False,False
10150,tę,ten,DET,[tę],False,False
