# Tagger Evaluation


`Evaluates the tagger by getting the number of predicted pos tags vs. actual pos tags (F-score)`

In [1]:
import evaluator as te
from sklearn.metrics import f1_score, recall_score, precision_score
import pandas as pd

## News sentences (1200 tokens)

In [2]:
words = te.tag_test_sentences(test_all=False, specific='news-sentences.txt')

y_true_1 = te.extract_actual_pos_tags(test_all=False, specific='news-sentences.txt')
y_pred_1 = te.extract_predicted_pos_tags(words=words)
print(len(y_pred_1))

1220


In [3]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true_1, pred=y_pred_1)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,28,14,0,0,2,0,3,1,0,0,5
ADV,0,91,0,0,0,0,0,0,0,0,2
CONJ,0,0,51,0,0,0,0,0,0,0,1
DET,0,0,11,64,0,0,2,0,0,0,6
NOUN,16,0,1,0,217,0,47,1,0,0,34
NUM,0,0,0,0,1,35,1,0,0,0,1
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,0,1,2,1,0,0,1,272,15,0,11
PRON,0,1,0,0,0,0,1,0,39,0,11
SYM,0,0,0,0,0,0,0,0,0,87,0


In [4]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,28,20,25,1147
ADV,91,17,2,1110
CONJ,51,14,1,1154
DET,64,1,19,1136
NOUN,217,27,99,877
NUM,35,0,3,1182
OTH,0,79,0,1141
PART,272,2,31,915
PRON,39,17,13,1151
SYM,87,0,0,1133


In [6]:
recalls = recall_score(y_true=y_true_1, y_pred=y_pred_1, average=None)
precisions = precision_score(y_true=y_true_1, y_pred=y_pred_1, average=None)
fscores = f1_score(y_true=y_true_1, y_pred=y_pred_1, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F1 Score', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

Unnamed: 0,Recall,Precision,F1 Score
ADJ,0.528302,0.583333,0.554455
ADV,0.978495,0.842593,0.905473
CONJ,0.980769,0.784615,0.871795
DET,0.771084,0.984615,0.864865
NOUN,0.686709,0.889344,0.775
NUM,0.921053,1.0,0.958904
OTH,0.0,0.0,0.0
PART,0.89769,0.992701,0.942808
PRON,0.75,0.696429,0.722222
SYM,1.0,1.0,1.0


In [7]:
recall = recall_score(y_true=y_true_1, y_pred=y_pred_1, average='micro')
precision = precision_score(y_true=y_true_1, y_pred=y_pred_1, average='micro')
fscore = f1_score(y_true=y_true_1, y_pred=y_pred_1, average='micro')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.796721
Precision,0.796721
F1 Score,0.796721


## Blog sentences (1045 tokens)

In [2]:
words = te.tag_test_sentences(test_all=False, specific='blog-sentences.txt')
y_true_2 = te.extract_actual_pos_tags(test_all=False, specific='blog-sentences.txt')
y_pred_2 = te.extract_predicted_pos_tags(words=words)
print(len(y_true_2))
print(len(y_pred_2))

1045
1045


In [3]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true_2, pred=y_pred_2)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,54,12,0,0,6,0,6,0,0,0,3
ADV,4,66,0,1,1,0,7,1,0,0,4
CONJ,0,0,54,0,0,0,0,0,0,0,0
DET,0,0,17,58,0,0,0,0,0,0,0
NOUN,13,9,0,0,109,0,43,0,0,0,37
NUM,0,0,0,0,0,16,0,0,0,0,0
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,1,1,0,0,0,0,2,178,15,0,5
PRON,1,0,1,1,1,0,0,0,89,0,2
SYM,0,0,0,0,0,0,0,0,0,97,0


In [4]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,54,31,27,933
ADV,66,25,18,936
CONJ,54,18,0,973
DET,58,2,17,968
NOUN,109,26,102,808
NUM,16,0,0,1029
OTH,0,70,0,975
PART,178,2,24,841
PRON,89,16,6,934
SYM,97,0,0,948


In [6]:
recalls = recall_score(y_true=y_true_2, y_pred=y_pred_2, average=None)
precisions = precision_score(y_true=y_true_2, y_pred=y_pred_2, average=None)
fscores = f1_score(y_true=y_true_2, y_pred=y_pred_2, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F1 Score', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

Unnamed: 0,Recall,Precision,F1 Score
ADJ,0.666667,0.635294,0.650602
ADV,0.785714,0.725275,0.754286
CONJ,1.0,0.75,0.857143
DET,0.773333,0.966667,0.859259
NOUN,0.516588,0.807407,0.630058
NUM,1.0,1.0,1.0
OTH,0.0,0.0,0.0
PART,0.881188,0.988889,0.931937
PRON,0.936842,0.847619,0.89
SYM,1.0,1.0,1.0


In [7]:
recall = recall_score(y_true=y_true_2, y_pred=y_pred_2, average='micro')
precision = precision_score(y_true=y_true_2, y_pred=y_pred_2, average='micro')
fscore = f1_score(y_true=y_true_2, y_pred=y_pred_2, average='micro')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.769378
Precision,0.769378
F1 Score,0.769378


## Example sentences (300 tokens)

In [8]:
words = te.tag_test_sentences(test_all=False, specific='example-sentences.txt')
y_true_3 = te.extract_actual_pos_tags(test_all=False, specific='example-sentences.txt')
y_pred_3 = te.extract_predicted_pos_tags(words=words)

In [9]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true_3, pred=y_pred_3)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,4,1,0,0,0,0,1,0,0,0,0
ADV,0,8,2,0,1,1,2,1,0,0,0
CONJ,0,0,7,0,0,0,0,0,0,0,0
DET,0,0,9,32,0,0,0,1,0,0,0
NOUN,2,0,0,0,52,0,6,0,2,0,5
NUM,0,0,0,0,0,3,0,0,0,0,0
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,0,5,2,0,0,0,0,25,3,0,0
PRON,0,0,0,0,1,0,0,0,36,0,1
SYM,0,0,0,0,0,0,0,0,0,45,0


In [10]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,4,3,2,291
ADV,8,7,7,278
CONJ,7,13,0,280
DET,32,0,10,258
NOUN,52,8,15,225
NUM,3,1,0,296
OTH,0,10,0,290
PART,25,2,10,263
PRON,36,5,2,257
SYM,45,0,0,255


In [11]:
recalls = recall_score(y_true=y_true_3, y_pred=y_pred_3, average=None)
precisions = precision_score(y_true=y_true_3, y_pred=y_pred_3, average=None)
fscores = f1_score(y_true=y_true_3, y_pred=y_pred_3, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F1 Score', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

Unnamed: 0,Recall,Precision,F1 Score
ADJ,0.666667,0.571429,0.615385
ADV,0.533333,0.533333,0.533333
CONJ,1.0,0.35,0.518519
DET,0.761905,1.0,0.864865
NOUN,0.776119,0.866667,0.818898
NUM,1.0,0.75,0.857143
OTH,0.0,0.0,0.0
PART,0.714286,0.925926,0.806452
PRON,0.947368,0.878049,0.911392
SYM,1.0,1.0,1.0


In [12]:
recall = recall_score(y_true=y_true_3, y_pred=y_pred_3, average='micro')
precision = precision_score(y_true=y_true_3, y_pred=y_pred_3, average='micro')
fscore = f1_score(y_true=y_true_3, y_pred=y_pred_3, average='micro')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.816667
Precision,0.816667
F1 Score,0.816667


## All Sentences (2565 tokens)

* News sentences
* Blog sentences
* Example sentences

In [13]:
words = te.tag_test_sentences()
y_true = te.extract_actual_pos_tags()
y_pred = te.extract_predicted_pos_tags(words=words)

In [14]:
tag_columns = ['ADJ', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'OTH', 'PART', 'PRON', 'SYM', 'VERB']
matrix = te.confusion_matrix(actual=y_true, pred=y_pred)
cm_df = pd.DataFrame(matrix,columns=tag_columns, index=tag_columns)
cm_df

Unnamed: 0,ADJ,ADV,CONJ,DET,NOUN,NUM,OTH,PART,PRON,SYM,VERB
ADJ,86,29,0,0,6,0,10,0,0,0,9
ADV,4,165,2,1,2,1,9,2,0,0,6
CONJ,0,0,112,0,0,0,0,0,0,0,1
DET,0,0,37,145,0,0,2,1,0,0,15
NOUN,31,16,1,0,371,0,96,0,2,0,77
NUM,0,0,0,0,1,54,1,0,0,0,1
OTH,0,0,0,0,0,0,0,0,0,0,0
PART,4,9,4,1,0,0,3,474,29,0,16
PRON,1,1,1,1,2,0,1,0,159,0,19
SYM,0,0,0,0,0,0,0,0,0,229,0


In [15]:
values_col = ['TP', 'FP', 'FN', 'TN']
values = te.cm_values(matrix=matrix)
values_df = pd.DataFrame(values,columns=values_col, index=tag_columns)
values_df

Unnamed: 0,TP,FP,FN,TN
ADJ,86,57,54,2368
ADV,165,63,27,2310
CONJ,112,45,1,2407
DET,145,3,55,2362
NOUN,371,59,223,1912
NUM,54,1,3,2507
OTH,0,159,0,2406
PART,474,4,66,2021
PRON,159,32,26,2348
SYM,229,0,0,2336


In [16]:
recalls = recall_score(y_true=y_true, y_pred=y_pred, average=None)
precisions = precision_score(y_true=y_true, y_pred=y_pred, average=None)
fscores = f1_score(y_true=y_true, y_pred=y_pred, average=None)

recall_series = pd.Series(recalls, name='Recall', index=tag_columns, dtype=float)
precision_series = pd.Series(precisions, name='Precision', index=tag_columns, dtype=float)
fscore_series = pd.Series(fscores, name='F1 Score', index=tag_columns, dtype=float)

pd.concat([recall_series, precision_series, fscore_series], axis=1)

Unnamed: 0,Recall,Precision,F1 Score
ADJ,0.614286,0.601399,0.607774
ADV,0.859375,0.723684,0.785714
CONJ,0.99115,0.713376,0.82963
DET,0.725,0.97973,0.833333
NOUN,0.624579,0.862791,0.724609
NUM,0.947368,0.981818,0.964286
OTH,0.0,0.0,0.0
PART,0.877778,0.991632,0.931238
PRON,0.859459,0.832461,0.845745
SYM,1.0,1.0,1.0


In [17]:
recall = recall_score(y_true=y_true, y_pred=y_pred, average='micro')
precision = precision_score(y_true=y_true, y_pred=y_pred, average='micro')
fscore = f1_score(y_true=y_true, y_pred=y_pred, average='micro')

pd.DataFrame([recall, precision, fscore], index=['Recall', 'Precision', 'F1 Score'], columns=['Overall'])

Unnamed: 0,Overall
Recall,0.778947
Precision,0.778947
F1 Score,0.778947
