# SDTR data testing and evaluation

```
created by: Muhammad Sulton Tauhid
sources: 
    1. https://stackoverflow.com/questions/44827930/evaluation-in-a-spacy-ner-model
```

In [4]:
import spacy
import json
from spacy import displacy
from IPython.display import display

# load the model of AJB page 1 and AJB page 2 & 3

In [5]:
ajb_nlp_model1 = spacy.load("./ajb_1_model/")
ajb_nlp_model23 = spacy.load("./ajb_23_model/")

# import the test text and save to variable

In [6]:
doccano_data_ajb_1 = []
doccano_data_ajb_23 = []
spacy_data_ajb_1 = []
spacy_data_ajb_23 = []

with open(r"../../dataset/export/ajb_1_test.jsonl", "r") as read_file:
    for line in read_file:
        data = json.loads(line)
        doccano_data_ajb_1.append(data)

with open(r"../../dataset/export/ajb_23_test.jsonl", "r") as read_file:
    for line in read_file:
        data = json.loads(line)
        doccano_data_ajb_23.append(data)

for entry in doccano_data_ajb_1:
    entities = []
    for e in entry['label']:
        entities.append((e[0], e[1], e[2]))
    spacy_entry = (entry['data'], { "entities": entities })
    spacy_data_ajb_1.append(spacy_entry)
    
for entry in doccano_data_ajb_23:
    entities = []
    for e in entry['label']:
        entities.append((e[0], e[1], e[2]))
    spacy_entry = (entry['data'], { "entities": entities })
    spacy_data_ajb_23.append(spacy_entry)

# test the data for AJB page 1

In [7]:
results_test_ajb1 = []

for data in spacy_data_ajb_1:
    doc = ajb_nlp_model1(data[0])
    results_test_ajb1.append(doc)

displacy.render(results_test_ajb1, style='ent', page=True)

# evaluate performance score of AJB page 1

In [46]:
import pandas as pd
from spacy.scorer import Scorer
from spacy.training.example import Example

examples = []
scorer = Scorer(ajb_nlp_model1)

for text, entities in spacy_data_ajb_1:
    doc = ajb_nlp_model1(text)
    example = Example.from_dict(doc, entities)
    examples.append(example)
    
    result = scorer.score([example])
    
    dataframe_result = pd.json_normalize(result).transpose()
    dataframe_result.columns = ["score"]
    
    print("Data test: {0}".format(text.split("\t")[0]))
    display(dataframe_result)
    print("\n")
    
# evaluation performance of all test text
results = scorer.score(examples)

dataframe_results = pd.json_normalize(results).transpose()
dataframe_results.columns = ['score']

print("Overall data test")
display(dataframe_results)

Data test: AJB1_1989_T_1


Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.0
ents_r,0.0
ents_f,0.0
ents_per_type.penjual_nama.p,0.0
ents_per_type.penjual_nama.r,0.0
ents_per_type.penjual_nama.f,0.0




Data test: AJB1_1990_T_2


Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.625
ents_r,0.454545
ents_f,0.526316
ents_per_type.no_ajb.p,1.0
ents_per_type.no_ajb.r,1.0
ents_per_type.no_ajb.f,1.0




Data test: AJB1_2008_T_3


Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.857143
ents_r,0.923077
ents_f,0.888889
ents_per_type.no_ajb.p,0.5
ents_per_type.no_ajb.r,1.0
ents_per_type.no_ajb.f,0.666667




Overall data test


Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.62963
ents_r,0.53125
ents_f,0.576271
ents_per_type.penjual_nama.p,0.666667
ents_per_type.penjual_nama.r,0.666667
ents_per_type.penjual_nama.f,0.666667


# test the data for AJB page 2 & 3

In [28]:
results_test_ajb23 = []

for data in spacy_data_ajb_23:
    doc = ajb_nlp_model23(data[0])
    results_test_ajb23.append(doc)

displacy.render(results_test_ajb23, style='ent', page=True)

# evaluate performance score of AJB page 2 & 3

In [47]:
examples = []
scorer = Scorer(ajb_nlp_model23)

for text, entities in spacy_data_ajb_23:
    doc = ajb_nlp_model23(text)
    example = Example.from_dict(doc, entities)
    examples.append(example)
    
    result = scorer.score([example]) 
    
    dataframe_result = pd.json_normalize(result).transpose()
    dataframe_result.columns = ["score"]

    
    print("Data test: {0}".format(text.split("\t")[0]))
    display(dataframe_result)
    print("\n")
    
# evaluation performance of all test text
results = scorer.score(examples)

dataframe_results = pd.json_normalize(results).transpose()
dataframe_results.columns = ["score"]


print("Overall data test")
display(dataframe_results)

Data test: AJB2_1988_T_1




Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.333333
ents_r,0.222222
ents_f,0.266667
ents_per_type.no_kohir_girik.p,0.0
ents_per_type.no_kohir_girik.r,0.0
ents_per_type.no_kohir_girik.f,0.0




Data test: AJB2_1989_T_2


Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.555556
ents_r,0.555556
ents_f,0.555556
ents_per_type.no_kohir_girik.p,0.5
ents_per_type.no_kohir_girik.r,0.5
ents_per_type.no_kohir_girik.f,0.5




Data test: AJB2_1990_T_3


Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.666667
ents_r,0.6
ents_f,0.631579
ents_per_type.no_persil.p,1.0
ents_per_type.no_persil.r,1.0
ents_per_type.no_persil.f,1.0




Data test: AJB2_2008_T_4




Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,1.0
ents_r,1.0
ents_f,1.0
ents_per_type.no_persil.p,0.0
ents_per_type.no_persil.r,0.0
ents_per_type.no_persil.f,0.0




Data test: AJB3_2008_T_5




Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,1.0
ents_r,1.0
ents_f,1.0
ents_per_type.no_kohir_girik.p,0.0
ents_per_type.no_kohir_girik.r,0.0
ents_per_type.no_kohir_girik.f,0.0




Overall data test


Unnamed: 0,score
token_acc,1.0
token_p,1.0
token_r,1.0
token_f,1.0
ents_p,0.65625
ents_r,0.583333
ents_f,0.617647
ents_per_type.no_kohir_girik.p,0.5
ents_per_type.no_kohir_girik.r,0.4
ents_per_type.no_kohir_girik.f,0.444444
