In [1]:
import tensorflow as tf
from tagger import Config, Tagger, calculate_confusion, print_confusion

#Validation against test set

All three models manifest similar performance. The last model was choosen for final accuracy estimation.

The model is a 3-layered neural network feed with 3-word windows of (preceding word, tagged word, next word) and predicted the tag of center word.

In [3]:
config = Config()
print config
with tf.Graph().as_default():
    with tf.Session() as session:
        model = Tagger(config)
        init = tf.global_variables_initializer()
        session.run(init)
        saver = tf.train.Saver()
        # Restore previously trained weights
        saver.restore(session, './final_weights/tagger.weights')
        _, predictions = model.predict(session, 
                                       model.X_test, 
                                       model.y_test,
                                       verbose=False
                                       )
        total_correct_examples = 0
        total_processed_examples = len(predictions)
        failed_cases = []
        for i, (y_hat, y) in enumerate(
                zip(predictions, model.y_test)):
            total_correct_examples += 1 if y == y_hat else 0
            if y != y_hat:
                failed_cases.append((i, y_hat, y))
        print 'Test accuracy is', total_correct_examples / float(
            total_processed_examples)

Model hyperparameters:
window_size: 3
embed_size: 50
dropout: 0.9
batch_size: 64
l2: 0.003
max_epochs: 30
lr: 0.001
hidden_size: 200
early_stopping: 2



85565 sentences loaded


1386544 3-word windows loaded
Shape of X is (1386544, 3)
Shape of y is (1386544,)


Test accuracy is 0.929198370055


In [4]:
confusion = calculate_confusion(
    predictions, model.y_test, model.tagset_size)
print_confusion(confusion, model.num_to_tag)

Confusion matrix (precission and recall for each tag)
Tag: pant - P nan / R 0.0000
Tag: adjp - P 0.9861 / R 0.7802
Tag: interp - P 0.9997 / R 0.9998
Tag: num - P 0.9578 / R 0.8257
Tag: interj - P 1.0000 / R 0.5329
Tag: adja - P 1.0000 / R 0.1639
Tag: ppron3 - P 0.9892 / R 0.9922
Tag: </s> - P 1.0000 / R 1.0000
Tag: inf - P 0.9345 / R 0.8266
Tag: adv - P 0.9422 / R 0.8885
Tag: winien - P 0.9756 / R 0.9524
Tag: pred - P 0.8864 / R 0.7158
Tag: subst - P 0.8608 / R 0.9663
Tag: ppas - P 0.7342 / R 0.4514
Tag: siebie - P 1.0000 / R 0.9956
Tag: fin - P 0.8233 / R 0.8875
Tag: prep - P 0.9878 / R 0.9929
Tag: conj - P 0.9420 / R 0.9440
Tag: ger - P 0.7738 / R 0.5454
Tag: imps - P 1.0000 / R 0.3780
Tag: pcon - P 0.9832 / R 0.4179
Tag: comp - P 0.9800 / R 0.9662
Tag: xxx - P nan / R 0.0000
Tag: burk - P nan / R 0.0000
Tag: aglt - P 0.9850 / R 1.0000
Tag: adjc - P nan / R 0.0000
Tag: qub - P 0.9562 / R 0.9241
Tag: bedzie - P 1.0000 / R 1.0000
Tag: pact - P 0.8653 / R 0.3064
Tag: praet - P 0.9262 / 

  prec = confusion[i, i] / float(total_guessed_tags[i])


## Examples of mistagged words

The first row is a three word window with center word's tag being predicted. The second row is predicted tag and true tag.

`UUUNKKK` represents unknown word (words with fewer than 5 occurences are dismissed from the first word2vec layer). `<s>` and `</s>` are special tokens marking the beginning and ending of a sentence.

In [6]:
unks = 0
for i, y_hat, y in failed_cases:
    problematic_word = model.num_to_word[model.X_dev[i][1]]
    if problematic_word == 'UUUNKKK':
        unks += 1
        continue
    print ' '.join([model.num_to_word[model.X_dev[i][j]] 
           for j in range(0, 3)])
    print model.num_to_tag[y_hat], model.num_to_tag[y]
print     
print '{:.2f}% of mistagged words were marked as unknown to the model ' \
      '(i.e. occurred fewer than 5 times in the corpus). The other' \
      ' mistagged words are displayed above.'.format(
    unks/float(len(failed_cases))*100)

całkiem sporo .
fin adj
UUUNKKK lęku .
subst qub
los ; wierzysz
subst pact
tego za jednym
fin praet
. </s> <s>
adj adja
też wszystkie duże
subst praet
o zbyt UUUNKKK
fin praet
</s> <s> UUUNKKK
subst ger
. </s> <s>
subst adv
zarazem , żadnych
subst praet
proc . tego
subst adj
wyjściu na prostą
subst ppas
powiedział , że
subst adj
i z kim
subst adj
" , który
adj ppas
nazywam tak ,
adj ppas
bądź co bądź
ger subst
, jej adwokat
prep conj
ma sensu tworzenia
subst qub
</s> <s> to
subst inf
) , tworzenia
adj ppas
się na modlitwy
subst adj
UUUNKKK do tej
adv conj
UUUNKKK był UUUNKKK
adj ppas
wypadków . </s>
subst praet
– nie </s>
subst adj
. </s> <s>
num adv
, w której
subst ppas
<s> To UUUNKKK
fin ger
dziś widzę wyraz
subst pred
tygodniu po UUUNKKK
fin praet
zginęły w hitlerowskiej
praet fin
. </s> <s>
subst adj
nie ona była
subst adj
widoczne okoliczności tej
adj fin
, czy chodzi
subst ger
niedzielę . </s>
adj pact
potrawy . </s>
fin praet
Wiele godzin czekał
subst ppas
drugiej strony ,
fin 

adj ppas
przy tym samym
subst adj
tutaj jak na
subst ger
" Cóż Ty
adj subst
UUUNKKK się w
adj subst
</s> <s> To
subst adv
kran ? </s>
adj subst
wisiał na ścianie
subst adj
UUUNKKK i znaleźć
fin subst
przepisy prawne z
subst ger
Nikt rozsądny nie
subst num
złapie kilku UUUNKKK
fin ppas
UUUNKKK , drzewa
subst ger
UUUNKKK systemu UUUNKKK
inf num
, gdy fala
subst adj
</s> <s> Ale
subst adv
A , to
fin praet
kolejny już raz
subst adj
same słowa ,
subst impt
. </s> <s>
subst adj
w trzech sklepach
subst adv
UUUNKKK w UUUNKKK
subst adj
tutaj wieża wodna
subst qub
zgonu były UUUNKKK
adj ger
</s> <s> Jesteś
subst adj
UUUNKKK Chrystusa ,
inf fin
<s> - Szkoda
fin impt
<s> - Widzisz
subst ppas
</s> <s> Po
subst adj
UUUNKKK . </s>
adj inf
br . nieznani
brev subst
<s> Tyle ,
fin inf
że ci ,
fin praet
obecnie ze UUUNKKK
subst adj
<s> Człowiek był
fin adj
55 , 17
subst adj
to nie niekoniecznie
subst imps
W umowie strony
subst ger
. która .
subst adj
UUUNKKK śmy zwłoki
adj ppas
rozwój techniki polskiej
s

xxx
UUUNKKK m się
subst fin
o jak UUUNKKK
conj qub
UUUNKKK powinno być
subst ger
niż u UUUNKKK
subst adj
przypadkach policjanci twierdzą
subst adj
udziałów w UUUNKKK
adj subst
, będzie miał
subst adj
</s> <s> Monika
subst adj
UUUNKKK do sypialni
subst interj
Zjednoczonych większość ludzi
subst ppas
. </s> <s>
comp qub
mu 31 zł
subst adj
<s> Nie jest
subst adj
ile tygodni ?
inf praet
coś zrobić ,
subst num
8 % osób
subst praet
UUUNKKK " Gazety
subst ppas
. </s> <s>
adv qub
roboty , UUUNKKK
fin adj
UUUNKKK warunków nauczania
subst imps
</s> <s> Halina
fin impt
</s> <s> UUUNKKK
subst ppas
1994 . </s>
adj num
</s> <s> "
qub conj
1999 r .
subst adj
ci pomogę ,
num adj
– prywatne osoby
subst pact
UUUNKKK , a
conj pred
) . </s>
fin adj
? </s> <s>
ppron3 brev
Był em jedynym
adj ppas
dokonać zakupów poza
adj ppas
jedną z UUUNKKK
subst praet
zawodowym . </s>
qub adv
całego seksualnego zamieszania
num adj
z uwzględnieniem przepisów
fin inf
</s> <s> UUUNKKK
subst qub
imprezie zorganizowanej na
sub


adj num
strzały na małe
adj subst
za czas próby
subst adj
Nic , rzeczy
subst adj
30 tys .
praet adj
sądem za UUUNKKK
adj ger
wspomina wielokrotnie w
adj praet
UUUNKKK w wojsku
adj subst
ma już UUUNKKK
prep interj
Przed sklepem UUUNKKK
adj praet
nie był UUUNKKK
fin praet
tych dokumentów w
subst adj
zatwierdza wewnętrzne UUUNKKK
adj pact
UUUNKKK najwcześniej po
subst adj
" jest "
subst inf
, który pił
subst adj
i do zobaczenia
praet adj
nie , to
subst ger
<s> Nie były
subst adj
UUUNKKK </s> <s>
adj num
od podatku dochodowego
qub pred
<s> Szczególnie trzeba
subst adj
, I UUUNKKK
fin pcon
zamiast powoływania straży
subst xxx
sądem w kolejnym
pred subst
. </s> <s>
fin praet
<s> nie no
adj subst
zł , a
subst inf
UUUNKKK . </s>
fin praet
<s> Zgodnie z
subst brev
" Wolna Europa
subst adj
. </s> <s>
subst pred
UUUNKKK dla tych
ger subst
spółdzielnie z woj
ppas pcon
jak obecnie ,
adj ppas
ja był em
adj ppas
reprezentujący zrozumieli ,
adj ppas
. </s> <s>
conj qub
<s> - Wiesz
adj pact
Z jednej s

Tymczasem PiS domaga
adv qub
było jednak na
ger subst
UUUNKKK się prawie
adj ger
wolny od licznych
fin praet
UUUNKKK instytucji rozwodu
subst adj
UUUNKKK . .
qub pred
</s> <s> 2
qub conj
4 , orzeka
adj num
<s> – no
conj qub
wariata ? </s>
subst ger
UUUNKKK - szepnął
ppron12 subst
UUUNKKK do "
subst pred
I tak ,
prep qub
UUUNKKK - powiedział
inf praet
biegiem ze swoim
subst fin
utrzymywać od co
adj subst
? </s> <s>
subst adj
leży na moim
subst adj
. od naszego
subst adj
w ogóle ,
subst praet
i tworząc UUUNKKK
subst adj
szczegóły pobytu w
subst pred
panie marszałku ?
praet fin
rąk . </s>
subst xxx
opozycję przeciw UUUNKKK
subst adja
XVI wieku -
subst adv
eś w UUUNKKK
subst ppas
. . </s>
subst praet
zamieszkania ( siedzibę
subst qub
UUUNKKK zmianę mogli
subst adj
Kwaśniewskiego z 1993
conj qub
, gdy inni
subst adj
Post " napisał
subst fin
zresztą kobieta ,
subst adj
UUUNKKK i młodym
conj prep
gra na UUUNKKK
adj praet
w sobotę na
subst praet
. </s> <s>
ger adj
co do poziomu
qub conj
UUUNKK

 ppas
jest tak że
subst adj
nas w sposób
ger pact
, zamiast opowiadać
subst adj
<s> tak samo
adj subst
imię . </s>
subst adj
UUUNKKK do UUUNKKK
subst adj
listów wspomina o
prep qub
bardzo dużo robimy
conj pred
</s> <s> Tak
prep brev
siedzibę narodową w
fin praet
lub nie )
subst imps
. pensji należy
fin praet
. </s> <s>
adj praet
czas . </s>
subst praet
. </s> <s>
subst adj
siebie złości ,
fin praet
polityka związanego z
adj pact
To ten UUUNKKK
adj fin
rady . </s>
subst adj
w ciągu pięciu
adv qub
. </s> <s>
adj ger
inwestorów do składania
subst adj
dni ponosi ZUS
subst num
rycerzy z zakonu
adj ger
. </s> <s>
qub conj
, głosu ,
subst ppas
, że płaci
subst praet
. </s> <s>
fin imps
. </s> <s>
praet fin
po dniu ,
subst adv
Zastanawiam się ,
adj fin
śmy do UUUNKKK
adj ger
UUUNKKK mieszkańca budowli
subst adv
– to się
subst pred
udział kilkuset UUUNKKK
subst conj
zrobię . </s>
ppas subst
także ściśle UUUNKKK
praet fin
są że dzieci
adv num
się 18 kwietnia
adj subst
UUUNKKK ? )
fin inf
dziwny 


qub adv
i że tego
subst adj
Marian Krzaklewski :
subst adja
UUUNKKK ziemię ?
adj subst
robotę " .
subst adj
Polski . </s>
subst adj
</s> <s> Krzysztof
prep brev
2004 r .
fin pcon
się w UUUNKKK
subst adj
. . </s>
subst praet
wieżę . </s>
adj num
opinie i uwagi
qub subst
ryc 1 )
adv qub
[ s .
adj praet
czym pacjent powtarza
subst adj
wyborczych było również
subst praet
. </s> <s>
comp qub
przez niego .
adj subst
cała klatka UUUNKKK
adj subst
niczego tak nie
adj fin
nie mówił em
adj ppas
</s> <s> –
fin qub
UE obawiają się
subst adj
się w gabinecie
prep qub
że w miejscu
subst ppas
, do ostatniego
subst ppas
ale nie do
subst ppas
ludźmi , mamy
adj subst
której muszą się
subst adj
Oprócz tysięcy UUUNKKK
praet fin
UUUNKKK pilota tak
adj ppas
. </s> <s>
subst adj
<s> yyy czy
adj subst
UUUNKKK , albo
pred subst
</s> <s> Nawet
subst praet
, Seks ,
prep conj
UUUNKKK członków Rady
subst ger
ostatnią poważną próbę
adj subst
niektórych miejscach ,
subst adj
, ośrodków budżetowych
fin subst
</s> <s>

Tadeusz Pawłowski .
subst qub
intensywnie tłumaczył ,
qub comp
jest żonaty .
adj adv
że dziecko delikatnie
subst ppas
morskiego , o
aglt brev
<s> " Na
subst praet
trafił pan Przemysław
subst pred
śniegu zaczęła z
subst fin
</s> <s> W
fin pact
pośrednictwem oraz środki
subst ger
przyjaźni z kimś
praet pact
obrazy z UUUNKKK
subst ger
głos kobiety .
subst adj
, po czym
adj praet
? </s> <s>
adj fin
. </s> <s>
subst fin
z miasta UUUNKKK
adj subst
UUUNKKK z opłat
subst pred
. </s> <s>
adj pact
. . </s>
adj subst
</s> <s> –
adj subst
. </s> <s>
conj qub
komu , jeśli
subst ppas
przed dziesięciu laty
subst praet
kolegów na UUUNKKK
subst fin
, jeżeli nie
adj adjp
<s> Jeden z
praet subst
- powoduje ,
prep adv
Muzeum Wojska Polskiego
conj subst
</s> <s> UUUNKKK
fin ppas
UUUNKKK brutto 1500
adj subst
UUUNKKK ok .
subst impt
ma 5 minut
adj subst
przyszłości . .
subst adj
w przyszłym tygodniu
adj subst
kilkaset osób .
conj brev
w swoim czasie
fin impt
. </s> <s>
praet fin
której było przeznaczone
adj


subst praet
politycznym ? </s>
fin ppas
pieszych . </s>
inf subst
jakieś jaja .
subst adj
UUUNKKK . </s>
subst adj
taka szansa ,
subst num
</s> <s> UUUNKKK
praet fin
może dojść do
subst adj
we znaki .
subst adj
</s> <s> Lecz
fin subst
</s> <s> Władza
adj subst
to w ogóle
subst ger
UUUNKKK i Rosji
subst pred
</s> <s> ja
prep brev
niczego nowego na
fin subst
</s> <s> Wydało
adj inf
UUUNKKK zasady ,
subst fin
też swojej UUUNKKK
subst praet
handlową i UUUNKKK
num subst
UUUNKKK , że
subst ppas
że nowy statut
adj fin
że na UUUNKKK
adj subst
pokiwał głową .
subst qub
tu , w
subst adj
Choć posłanka spędziła
subst praet
ośrodki zdrowia oraz
ppas adj
</s> <s> UUUNKKK
ppas subst
UUUNKKK życia pacjentów
comp qub
30 proc .
subst adj
. </s> <s>
subst ger
UUUNKKK , chciał
subst adj
– A co
subst ger
UUUNKKK dyskusji UUUNKKK
subst praet
</s> <s> Dzieci
subst brev
</s> <s> UUUNKKK
subst praet
mówi takim językiem
subst adj
nie było w
subst adj
. 8 .
adj num
kraju ok .
adj subst
swoich obywateli z
subst 

 qub
Dlaczego ? -
fin pact
zapłacić po około
subst adj
, a w
prep interj
UUUNKKK pewnych istotnych
subst adj
ich kontroli .
subst adj
Wrocławiu odbył się
adj ger
z urodzin Lecha
ppron3 brev
UUUNKKK się z
subst fin
na tym UUUNKKK
adv num
mecenas sztuki ,
subst inf
chciały by 49
ppas praet
problemy wewnętrzne ,
subst qub
<s> Z prostego
subst praet
Panie Marszałku !
subst fin
<s> Potem ,
inf praet
Na ich zdobycie
subst adj
jeden pokroić w
ger subst
to wypowiedzi różnych
subst ger
" , "
ppron3 comp
mój drogi .
subst qub
Artur , nasz
subst ppas
7 kandydatów do
subst praet
. </s> <s>
subst fin
rola jest UUUNKKK
subst adj
wyborczą do Sejmu
subst inf
protesty artystów ,
subst praet
niepodległości państwa .
fin pcon
również uchwały i
qub fin
gdy ją zobaczy
fin subst
pełnej stabilizacji ,
ppas subst
Potem i na
ppas adj
to pierwsze oficjalne
adj subst
, że mnie
subst ppas
w sytuacji ,
fin qub
. </s> <s>
subst praet
świecie z powodu
subst praet
, ale inne
subst praet
decyzja o UUUNKKK
subst adj
je

ppas
nad znajomym ,
adj subst
zachodzie Słońca ,
subst adj
</s> <s> Spotkał
conj qub
zarzut postawił publicznie
subst ger
nie wino ,
subst adj
sprawach gospodarczych ,
subst pact
przyjęciem do pracy
subst adj
tak jak kibice
subst adj
filozofię i naukę
subst adj
na ostro ”
subst adj
UUUNKKK zmianie wtedy
adj subst
języku publicznym naszego
qub adv
jest tylko UUUNKKK
fin praet
, że UUUNKKK
adj subst
UW może ,
adj subst
jasne jest ,
prep conj
<s> Toteż Kościół
praet fin
</s> <s> Później
subst burk
liczą się wysiłki
subst praet
, tak też
subst ppas
UUUNKKK , przesuwa
ppas subst
Wiadomość taką UUUNKKK
subst praet
tej izby .
ppas subst
on sam to
subst praet
opuszczeniu przez UUUNKKK
fin adj
Ostatnio wygrała m
adj fin
rzekł zimno ,
adv adj
możliwych obrażeń głowy
qub fin
Jakie są Pani
subst inf
z lekarzem wszystkie
adv conj
ich żaden UUUNKKK
subst praet
UUUNKKK i ogrzewanie
adj subst
przywódcą . </s>
subst adj
baz wojskowych i
subst adj
– i to
subst adj
Dla jednych jest
num adj
UUUNKKK , tak
