# Tagger Evaluation


`Evaluates the tagger by getting the number of predicted pos tags vs. actual pos tags (F-score)`

In [1]:
import evaluator as te
from sklearn.metrics import f1_score, recall_score, precision_score
import pandas as pd

## News sentences (1200 tokens)

In [2]:
words = te.tag_test_sentences(test_all=False, specific='news-sentences.txt')

y_pred_1 = te.extract_predicted_pos_tags(words=words)
y_true_1 = te.extract_actual_pos_tags(test_all=False, specific='news-sentences.txt', words=words)

print(len(y_pred_1))
print(len(y_true_1))

('Gitanggong', 'VERB')
('karon', 'ADV')
('sa', 'PART')
('Carbon', 'NOUN')
('Police', 'NOUN')
('Station', 'NOUN')
('ang', 'DET')
('langyaw', 'ADJ')
('nga', 'PART')
('si', 'DET')
('Ken', 'NOUN')
('.', 'SYM')
('Dihang', 'ADV')
('wa', 'ADV')
('nakahatag', 'VERB')
('og', 'CONJ')
('igong', 'ADJ')
('rason', 'NOUN')
('nganong', 'ADV')
('kuyog', 'VERB')
('niya', 'PRON')
('ang', 'DET')
('mga', 'PART')
('bata', 'NOUN')
(',', 'SYM')
('gi-turnover', 'VERB')
('si', 'DET')
('Ken', 'NOUN')
('ngadto', 'PART')
('sa', 'PART')
('kapulisan', 'ADJ')
('.', 'SYM')
('Hugot', 'ADJ')
('nga', 'PART')
('gipanghimakak', 'OTH')
('ni', 'DET')
('Ken', 'NOUN')
('nga', 'PART')
('duna', 'ADJ')
('siyay', 'PRON')
('dautang', 'OTH')
('tuyo', 'VERB')
('sa', 'PART')
('mga', 'PART')
('bata', 'NOUN')
('.', 'SYM')
('Matod', 'ADV')
('niya', 'PRON')
(',', 'SYM')
('lima', 'NUM')
('ka', 'PRON')
('tuig', 'NOUN')
('na', 'ADV')
('siya', 'PRON')
('nga', 'PART')
('nagbalik-balik', 'VERB')
('sa', 'PART')
('Sugbo', 'NOUN')
('aron', 'CONJ')

In [3]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true_1, pred=y_pred_1)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,28,14,0,0,2,0,3,1,0,0,5
ADV,0,91,0,0,0,0,0,0,0,0,2
CONJ,0,0,51,0,0,0,0,0,0,0,1
DET,0,0,11,64,0,0,2,0,0,0,6
NOUN,16,0,1,0,217,0,47,1,0,0,34
NUM,0,0,0,0,1,36,0,0,0,0,1
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,0,1,2,1,0,0,1,272,15,0,11
PRON,0,1,0,0,0,0,1,0,39,0,11
SYM,0,0,0,0,0,0,0,0,0,87,0


In [4]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,28,20,25,1147
ADV,91,17,2,1110
CONJ,51,14,1,1154
DET,64,1,19,1136
NOUN,217,27,99,877
NUM,36,0,2,1182
OTH,0,78,0,1142
PART,272,2,31,915
PRON,39,17,13,1151
SYM,87,0,0,1133


In [5]:
recalls = recall_score(y_true=y_true_1, y_pred=y_pred_1, average=None)
precisions = precision_score(y_true=y_true_1, y_pred=y_pred_1, average=None)
fscores = f1_score(y_true=y_true_1, y_pred=y_pred_1, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F-Scores', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)


Unnamed: 0,Recall,Precision,F-Scores
ADJ,0.528302,0.583333,0.554455
ADV,0.978495,0.842593,0.905473
CONJ,0.980769,0.784615,0.871795
DET,0.771084,0.984615,0.864865
NOUN,0.686709,0.889344,0.775
NUM,0.947368,1.0,0.972973
OTH,0.0,0.0,0.0
PART,0.89769,0.992701,0.942808
PRON,0.75,0.696429,0.722222
SYM,1.0,1.0,1.0


In [6]:
recall = recall_score(y_true=y_true_1, y_pred=y_pred_1, average='weighted')
precision = precision_score(y_true=y_true_1, y_pred=y_pred_1, average='weighted')
fscore = f1_score(y_true=y_true_1, y_pred=y_pred_1, average='weighted')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.797541
Precision,0.863919
F1 Score,0.824712


## Blog sentences (1045 tokens)

In [7]:
words = te.tag_test_sentences(test_all=False, specific='blog-sentences.txt')
y_true_2 = te.extract_actual_pos_tags(test_all=False, specific='blog-sentences.txt')
y_pred_2 = te.extract_predicted_pos_tags(words=words)
print(len(y_true_2))
print(len(y_pred_2))

('Kung', 'CONJ')
('hilig', 'ADJ')
('ka', 'PRON')
('og', 'CONJ')
('food', 'OTH')
('trip', 'OTH')
('ug', 'CONJ')
('laag', 'VERB')
(',', 'SYM')
('adunay', 'ADJ')
('usa', 'NUM')
('ka', 'PRON')
('food', 'OTH')
('court', 'OTH')
('sud', 'ADJ')
('sa', 'PART')
('usa', 'NUM')
('ka', 'PRON')
('mall', 'OTH')
('sa', 'PART')
('dakbayan', 'NOUN')
('sa', 'PART')
('Mandaue', 'NOUN')
('nga', 'PART')
('angayan', 'ADJ')
('nimong', 'PRON')
('bisitahon', 'VERB')
('.', 'SYM')
('Si', 'VERB')
('Kiko', 'NOUN')
('adunay', 'ADJ')
('usa', 'NUM')
('ka', 'PRON')
('iro', 'NOUN')
('.', 'SYM')
('Blacky', 'NOUN')
('ang', 'DET')
('ngalan', 'NOUN')
('sa', 'PART')
('iyang', 'PRON')
('iro', 'NOUN')
('.', 'SYM')
('Itom', 'ADJ')
('ang', 'DET')
('kolor', 'NOUN')
('sa', 'PART')
('iyang', 'PRON')
('balahibo', 'NOUN')
(',', 'SYM')
('tambokon', 'ADJ')
(',', 'SYM')
('abtikon', 'ADJ')
(',', 'SYM')
('ug', 'CONJ')
('buotan', 'ADJ')
('.', 'SYM')
('Kada', 'ADV')
('sayo', 'ADJ')
('sa', 'PART')
('buntag', 'NOUN')
('kon', 'CONJ')
('way', '

In [8]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true_2, pred=y_pred_2)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,54,12,0,0,6,0,6,0,0,0,3
ADV,4,65,0,1,2,0,7,1,0,0,4
CONJ,0,0,54,0,0,0,0,0,0,0,0
DET,0,0,17,49,0,0,0,0,0,0,9
NOUN,13,9,0,0,106,7,36,0,0,0,40
NUM,0,0,0,0,0,16,0,0,0,0,0
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,1,1,0,0,0,0,2,177,13,0,8
PRON,1,0,1,1,1,0,0,0,84,0,7
SYM,0,0,0,0,0,0,0,0,0,97,0


In [9]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,54,31,27,933
ADV,65,25,19,936
CONJ,54,18,0,973
DET,49,2,26,968
NOUN,106,27,105,807
NUM,16,7,0,1022
OTH,0,63,0,982
PART,177,2,25,841
PRON,84,14,11,936
SYM,97,0,0,948


In [10]:
recalls = recall_score(y_true=y_true_2, y_pred=y_pred_2, average=None)
precisions = precision_score(y_true=y_true_2, y_pred=y_pred_2, average=None)
fscores = f1_score(y_true=y_true_2, y_pred=y_pred_2, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F1 Score', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

Unnamed: 0,Recall,Precision,F1 Score
ADJ,0.666667,0.635294,0.650602
ADV,0.77381,0.722222,0.747126
CONJ,1.0,0.75,0.857143
DET,0.653333,0.960784,0.777778
NOUN,0.50237,0.796992,0.616279
NUM,1.0,0.695652,0.820513
OTH,0.0,0.0,0.0
PART,0.876238,0.988827,0.929134
PRON,0.884211,0.857143,0.870466
SYM,1.0,1.0,1.0


In [11]:
recall = recall_score(y_true=y_true_2, y_pred=y_pred_2, average='weighted')
precision = precision_score(y_true=y_true_2, y_pred=y_pred_2, average='weighted')
fscore = f1_score(y_true=y_true_2, y_pred=y_pred_2, average='weighted')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.751196
Precision,0.815518
F1 Score,0.771871


## Example sentences (300 tokens)

In [12]:
words = te.tag_test_sentences(test_all=False, specific='example-sentences.txt')
y_true_3 = te.extract_actual_pos_tags(test_all=False, specific='example-sentences.txt')
y_pred_3 = te.extract_predicted_pos_tags(words=words)

print(len(y_true_3))
print(len(y_pred_3))

('Tig-dala', 'NOUN')
('man', 'PART')
('siya', 'PRON')
('diri', 'ADV')
('ug', 'CONJ')
('mga', 'PART')
('tnt', 'OTH')
('.', 'SYM')
('Siya', 'PRON')
('man', 'PART')
('tig-kuha', 'NOUN')
('ug', 'CONJ')
('visa', 'VERB')
('.', 'SYM')
('Pulis', 'ADJ')
('man', 'PART')
('kana', 'PRON')
('siya', 'PRON')
('mandakop', 'VERB')
('man', 'PART')
('kana', 'PRON')
('siya', 'PRON')
('ug', 'CONJ')
('tnt', 'OTH')
('.', 'SYM')
('Yun', 'NOUN')
('kuno', 'ADV')
('ang', 'DET')
('pinakaluud', 'ADJ')
('kuno', 'ADV')
('kaayo', 'ADV')
('tanawon', 'VERB')
('.', 'SYM')
('Duha', 'NUM')
('ka', 'PRON')
('electric', 'OTH')
('fan', 'ADJ')
('gamiton', 'VERB')
('nako', 'PRON')
('.', 'SYM')
('Siya', 'PRON')
('di', 'ADV')
('gyud', 'OTH')
('puydi', 'ADV')
(',', 'SYM')
('singtanon', 'OTH')
('.', 'SYM')
('Silhigan', 'NOUN')
('ni', 'DET')
('Rosa', 'NOUN')
('ang', 'DET')
('kwarto', 'NOUN')
('.', 'SYM')
('Ilabay', 'VERB')
('sa', 'PART')
('bata', 'NOUN')
('ang', 'DET')
('basura', 'NOUN')
('.', 'SYM')
('Si', 'VERB')
('Ruben', 'NOUN')

In [13]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true_3, pred=y_pred_3)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,4,1,0,0,0,0,1,0,0,0,0
ADV,0,8,2,0,1,1,2,1,0,0,0
CONJ,0,0,7,0,0,0,0,0,0,0,0
DET,0,0,9,25,0,0,0,1,0,0,7
NOUN,3,0,0,0,51,0,6,0,2,0,5
NUM,0,0,0,0,0,3,0,0,0,0,0
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,0,5,2,0,0,0,0,25,3,0,0
PRON,0,0,0,0,1,0,0,0,35,0,2
SYM,0,0,0,0,0,0,0,0,0,45,0


In [14]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,4,4,2,290
ADV,8,7,7,278
CONJ,7,13,0,280
DET,25,0,17,258
NOUN,51,8,16,225
NUM,3,1,0,296
OTH,0,10,0,290
PART,25,2,10,263
PRON,35,5,3,257
SYM,45,0,0,255


In [15]:
recalls = recall_score(y_true=y_true_3, y_pred=y_pred_3, average=None)
precisions = precision_score(y_true=y_true_3, y_pred=y_pred_3, average=None)
fscores = f1_score(y_true=y_true_3, y_pred=y_pred_3, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F1 Score', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

Unnamed: 0,Recall,Precision,F1 Score
ADJ,0.666667,0.5,0.571429
ADV,0.533333,0.533333,0.533333
CONJ,1.0,0.35,0.518519
DET,0.595238,1.0,0.746269
NOUN,0.761194,0.864407,0.809524
NUM,1.0,0.75,0.857143
OTH,0.0,0.0,0.0
PART,0.714286,0.925926,0.806452
PRON,0.921053,0.875,0.897436
SYM,1.0,1.0,1.0


In [16]:
recall = recall_score(y_true=y_true_3, y_pred=y_pred_3, average='weighted')
precision = precision_score(y_true=y_true_3, y_pred=y_pred_3, average='weighted')
fscore = f1_score(y_true=y_true_3, y_pred=y_pred_3, average='weighted')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.786667
Precision,0.85254
F1 Score,0.805618


## All Sentences (2565 tokens)

* News sentences
* Blog sentences
* Example sentences

In [17]:
words = te.tag_test_sentences()
y_true = te.extract_actual_pos_tags()
y_pred = te.extract_predicted_pos_tags(words=words)

('Tig-dala', 'NOUN')
('man', 'PART')
('siya', 'PRON')
('diri', 'ADV')
('ug', 'CONJ')
('mga', 'PART')
('tnt', 'OTH')
('.', 'SYM')
('Siya', 'PRON')
('man', 'PART')
('tig-kuha', 'NOUN')
('ug', 'CONJ')
('visa', 'VERB')
('.', 'SYM')
('Pulis', 'ADJ')
('man', 'PART')
('kana', 'PRON')
('siya', 'PRON')
('mandakop', 'VERB')
('man', 'PART')
('kana', 'PRON')
('siya', 'PRON')
('ug', 'CONJ')
('tnt', 'OTH')
('.', 'SYM')
('Yun', 'NOUN')
('kuno', 'ADV')
('ang', 'DET')
('pinakaluud', 'ADJ')
('kuno', 'ADV')
('kaayo', 'ADV')
('tanawon', 'VERB')
('.', 'SYM')
('Duha', 'NUM')
('ka', 'PRON')
('electric', 'OTH')
('fan', 'ADJ')
('gamiton', 'VERB')
('nako', 'PRON')
('.', 'SYM')
('Siya', 'PRON')
('di', 'ADV')
('gyud', 'OTH')
('puydi', 'ADV')
(',', 'SYM')
('singtanon', 'OTH')
('.', 'SYM')
('Silhigan', 'NOUN')
('ni', 'DET')
('Rosa', 'NOUN')
('ang', 'DET')
('kwarto', 'NOUN')
('.', 'SYM')
('Ilabay', 'VERB')
('sa', 'PART')
('bata', 'NOUN')
('ang', 'DET')
('basura', 'NOUN')
('.', 'SYM')
('Si', 'VERB')
('Ruben', 'NOUN')

('Busa', 'ADV')
(',', 'SYM')
('dihang', 'ADV')
('nakakita', 'VERB')
('kog', 'PRON')
('opportunity', 'OTH')
(',', 'SYM')
('nikuha', 'VERB')
('ko', 'PRON')
('og', 'CONJ')
('Niner', 'NOUN')
('nga', 'PART')
('mao', 'VERB')
('na', 'ADV')
('karon', 'ADV')
('ang', 'DET')
('akong', 'PRON')
('gigamit', 'VERB')
('sa', 'PART')
('pagsikad', 'NOUN')
('sulod', 'ADJ')
('na', 'ADV')
('sa', 'PART')
('pipila', 'VERB')
('ka', 'PRON')
('mga', 'PART')
('higayon', 'VERB')
('.', 'SYM')
('Kon', 'CONJ')
('itandi', 'VERB')
('sa', 'PART')
('26', 'NUM')
(',', 'SYM')
('duna', 'ADJ')
('kini', 'PRON')
('deperensya', 'VERB')
('sa', 'PART')
('kagahion', 'NOUN')
('sa', 'PART')
('pagtungas', 'NOUN')
(',', 'SYM')
('apan', 'CONJ')
('sa', 'PART')
('ako', 'PRON')
('nang', 'ADV')
('ginaingon', 'OTH')
(',', 'SYM')
('mas', 'PART')
('nindot', 'ADJ')
('padulhogon', 'VERB')
('ang', 'DET')
('dako', 'ADJ')
('og', 'CONJ')
('ligid', 'VERB')
('kay', 'CONJ')
('mas', 'PART')
('stable', 'OTH')
('alang', 'PART')
('nako', 'PRON')
('.', 'SY

In [18]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true, pred=y_pred)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,86,29,0,0,6,0,10,0,0,0,9
ADV,4,164,2,1,3,1,9,2,0,0,6
CONJ,0,0,112,0,0,0,0,0,0,0,1
DET,0,0,37,136,0,0,2,1,0,0,24
NOUN,32,15,1,0,368,7,89,0,2,0,80
NUM,0,0,0,0,1,55,0,0,0,0,1
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,4,9,4,1,0,0,3,473,29,0,17
PRON,1,1,1,1,2,0,1,0,158,0,20
SYM,0,0,0,0,0,0,0,0,0,229,0


In [19]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,86,58,54,2367
ADV,164,64,28,2309
CONJ,112,45,1,2407
DET,136,3,64,2362
NOUN,368,59,226,1912
NUM,55,8,2,2500
OTH,0,151,0,2414
PART,473,4,67,2021
PRON,158,32,27,2348
SYM,229,0,0,2336


In [20]:
recalls = recall_score(y_true=y_true, y_pred=y_pred, average=None)
precisions = precision_score(y_true=y_true, y_pred=y_pred, average=None)
fscores = f1_score(y_true=y_true, y_pred=y_pred, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F1 Score', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

Unnamed: 0,Recall,Precision,F1 Score
ADJ,0.614286,0.597222,0.605634
ADV,0.854167,0.719298,0.780952
CONJ,0.99115,0.713376,0.82963
DET,0.68,0.978417,0.80236
NOUN,0.619529,0.861827,0.720862
NUM,0.964912,0.873016,0.916667
OTH,0.0,0.0,0.0
PART,0.875926,0.991614,0.930187
PRON,0.854054,0.831579,0.842667
SYM,1.0,1.0,1.0


In [21]:
recall = recall_score(y_true=y_true, y_pred=y_pred, average='weighted')
precision = precision_score(y_true=y_true, y_pred=y_pred, average='weighted')
fscore = f1_score(y_true=y_true, y_pred=y_pred, average='weighted')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.773099
Precision,0.840063
F1 Score,0.797318
