developed by Patricia Klinger, modified by Sebastian Gampe

In [1]:
import pandas as pd
import random
import os
import numpy as np
from cnt.model import DesignEstimator
from cnt.annotate import (annotate, annotate_single_design, 
                          annotate_designs, 
                          extract_string_from_annotation)
from cnt.io import (load_entities_from_file, load_entities_from_db, 
                    load_designs, replace_left_right)
from cnt.train_test import train_test_annotate
from cnt.extract_relation import path
from cnt.evaluate import score_precision_recall, score_accuracy
from spacy import displacy

In [2]:
# load designs from CNT database
english_designs = load_designs()
english_designs.head()

  cursor.execute('SELECT @@tx_isolation')


Unnamed: 0,DesignID,DesignEng
0,1,Diademed head of deified Alexander the Great w...
1,2,Diademed head of deified Alexander the Great w...
2,3,"Altar entwined by serpent, head to left."
3,4,"Altar entwined by serpent, head to right."
4,5,"Altar on two levels, flaming."


In [3]:
# create dictionary entities: key = label, value = entities
mysql_connection = "mysql://cnt:rJnW6m7kZR@localhost:3306/thrakien_cnt"
entities = {
    "PERSON": load_entities_from_db("nlp_list_person", mysql_connection),
    "OBJECT": load_entities_from_db("nlp_list_obj", mysql_connection),
    "ANIMAL": load_entities_from_db("nlp_list_animal", mysql_connection),
    "PLANT": load_entities_from_db("nlp_list_plant", mysql_connection)
}
entities["PERSON"][:3]

  cursor.execute('SELECT @@tx_isolation')


['Agrippina minor', 'Agrippina maior', 'Alexander III']

In [4]:
annotated_designs = annotate_designs(entities, english_designs)
annotated_designs = annotated_designs[
    annotated_designs.annotations.map(len) > 0]
annotated_designs.head()

Unnamed: 0,DesignEng,DesignID,annotations
0,Diademed head of deified Alexander the Great w...,1,"[(25, 44, PERSON)]"
1,Diademed head of deified Alexander the Great w...,2,"[(25, 44, PERSON)]"
2,"Altar entwined by serpent, head to left.",3,"[(0, 5, PERSON), (0, 5, OBJECT), (18, 25, ANIM..."
3,"Altar entwined by serpent, head to right.",4,"[(0, 5, PERSON), (0, 5, OBJECT), (18, 25, ANIM..."
4,"Altar on two levels, flaming.",5,"[(0, 5, PERSON), (0, 5, OBJECT)]"


In [5]:
#extract entities from designs for train/test split
extract_entities_from_designs = annotated_designs.apply(
    lambda x: extract_string_from_annotation(x.annotations, x.DesignEng), axis=1)
extract_entities_from_designs.head()

0      [Alexander the Great]
1      [Alexander the Great]
2    [Altar, Altar, serpent]
3    [Altar, Altar, serpent]
4             [Altar, Altar]
dtype: object

In [6]:
# sklearn train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(annotated_designs[["DesignID", "DesignEng"]],
                                                    annotated_designs[["DesignID", "annotations"]], 
                                                    test_size=0.25)

In [7]:
# train estimator
n_rep = 3
my_estimator = DesignEstimator(n_rep)
my_estimator.fit(X_train, y_train.annotations)

In [8]:
train_score = score_accuracy(y_train.rename(columns = {"annotations" : "y"}), my_estimator.predict(X_train))
test_score = score_accuracy(y_test.rename(columns = {"annotations" : "y"}), my_estimator.predict(X_test))
res = {"n_rep": n_rep, "train_score": train_score,
       "test_score": test_score}

In [9]:
res

{'n_rep': 3,
 'test_score': 0.7899408284023669,
 'train_score': 0.8545574202040145}

In [10]:
precision, recall = score_precision_recall(y_test.rename(columns = {"annotations" : "y"}), my_estimator.predict(X_test))
res.update({"precision":precision, "recall":recall, "split": "random"})

In [11]:
precision

0.9818431911966987

In [12]:
recall

0.9451800847457628

In [13]:
y_pred = my_estimator.predict(X_test)
y_pred_predictions_only = y_pred["y"]
y_pred_as_doc = my_estimator.predict(X_test, as_doc=True)
y_pred_as_doc_predictions_only = y_pred_as_doc["y"]

In [14]:
prediction_output = pd.DataFrame({"X_test" : X_test["DesignEng"], 
                                  "y_test" : y_test["annotations"],
                                  "y_predict" : y_pred_predictions_only,
                                  "y_predict_as_doc" : 
                                      y_pred_as_doc_predictions_only})
prediction_output.head()

Unnamed: 0,X_test,y_predict,y_predict_as_doc,y_test
186,"Athena standing in biga to right, holding rein...","[(0, 6, PERSON), (19, 23, OBJECT), (42, 47, OB...","(Athena, standing, in, biga, to, right, ,, hol...","[(0, 6, PERSON), (19, 23, OBJECT), (42, 47, OB..."
2955,"Veiled bust of Demeter, right.","[(7, 11, OBJECT), (15, 22, PERSON)]","(Veiled, bust, of, Demeter, ,, right, .)","[(7, 11, OBJECT), (15, 22, PERSON)]"
1319,"To left city goddess standing right, wearing k...","[(8, 20, PERSON), (45, 53, OBJECT), (90, 97, P...","(To, left, city, goddess, standing, right, ,, ...","[(8, 20, PERSON), (45, 53, OBJECT), (90, 97, P..."
2086,Helmet from front surmounted by starand with a...,"[(0, 6, OBJECT)]","(Helmet, from, front, surmounted, by, starand,...","[(0, 6, OBJECT)]"
628,"Head of Helios facing, slightly to left.","[(8, 14, PERSON)]","(Head, of, Helios, facing, ,, slightly, to, le...","[(8, 14, PERSON)]"


In [15]:
len(prediction_output)

1014

In [16]:
correct_predictions = prediction_output[
    prediction_output.y_predict
    == prediction_output.y_test]
correct_predictions.head()

Unnamed: 0,X_test,y_predict,y_predict_as_doc,y_test
186,"Athena standing in biga to right, holding rein...","[(0, 6, PERSON), (19, 23, OBJECT), (42, 47, OB...","(Athena, standing, in, biga, to, right, ,, hol...","[(0, 6, PERSON), (19, 23, OBJECT), (42, 47, OB..."
2955,"Veiled bust of Demeter, right.","[(7, 11, OBJECT), (15, 22, PERSON)]","(Veiled, bust, of, Demeter, ,, right, .)","[(7, 11, OBJECT), (15, 22, PERSON)]"
1319,"To left city goddess standing right, wearing k...","[(8, 20, PERSON), (45, 53, OBJECT), (90, 97, P...","(To, left, city, goddess, standing, right, ,, ...","[(8, 20, PERSON), (45, 53, OBJECT), (90, 97, P..."
2086,Helmet from front surmounted by starand with a...,"[(0, 6, OBJECT)]","(Helmet, from, front, surmounted, by, starand,...","[(0, 6, OBJECT)]"
628,"Head of Helios facing, slightly to left.","[(8, 14, PERSON)]","(Head, of, Helios, facing, ,, slightly, to, le...","[(8, 14, PERSON)]"


In [17]:
len(correct_predictions)

801

In [18]:
wrong_predictions = prediction_output[
    prediction_output.y_predict
    != prediction_output.y_test]
wrong_predictions.head()

Unnamed: 0,X_test,y_predict,y_predict_as_doc,y_test
2110,"Nude, bearded Heracles standing facing, head t...","[(14, 22, PERSON), (63, 67, OBJECT), (83, 87, ...","(Nude, ,, bearded, Heracles, standing, facing,...","[(14, 22, PERSON), (63, 67, OBJECT), (83, 92, ..."
2696,"Athena standing left, wearing helmet and long ...","[(0, 6, PERSON), (30, 36, OBJECT), (46, 52, OB...","(Athena, standing, left, ,, wearing, helmet, a...","[(0, 6, PERSON), (30, 36, OBJECT), (46, 52, OB..."
3645,Apis-bull advancing right; a disc between the ...,"[(0, 9, PERSON), (29, 33, OBJECT), (59, 65, OB...","(Apis, -, bull, advancing, right, ;, a, disc, ...","[(0, 4, PERSON), (0, 9, ANIMAL), (29, 33, OBJE..."
506,"Galley to right with six rowers; on stern, hel...","[(0, 6, OBJECT), (36, 41, OBJECT), (57, 62, OB...","(Galley, to, right, with, six, rowers, ;, on, ...","[(0, 6, OBJECT), (36, 41, OBJECT), (71, 80, OB..."
1805,"Nude Apollo standing facing, head to left, cro...","[(5, 11, PERSON), (79, 83, OBJECT), (99, 105, ...","(Nude, Apollo, standing, facing, ,, head, to, ...","[(5, 11, PERSON), (79, 83, OBJECT), (99, 105, ..."


In [19]:
len(wrong_predictions)

213

In [20]:
colors = {'OBJECT': '#99FF33', 'ANIMAL' : '#FFFF33'}
options = {'ent': ['OBJECT', 'ANIMAL'], 'colors': colors}
displacy.render(correct_predictions.y_predict_as_doc, 
                style='ent', jupyter=True, options=options)

In [21]:
displacy.render(wrong_predictions.y_predict_as_doc, 
                style='ent', jupyter=True, options=options)

In [22]:
# load designs again from CNT database and use the trained model on them
cnt_designs = load_designs()
cnt_designs.head()

cnt_pred = my_estimator.predict_clear(cnt_designs)

cnt_pred_predictions_only = cnt_pred["y"]


#prediction_output = pd.DataFrame({"X_test" : cnt_designs["DesignEng"], 
                                 # "y_test" : cnt_pred["annotations"]})
#prediction_output.head()

  cursor.execute('SELECT @@tx_isolation')


In [24]:
cnt_designs.head()

Unnamed: 0,DesignID,DesignEng
0,1,Diademed head of deified Alexander the Great w...
1,2,Diademed head of deified Alexander the Great w...
2,3,"Altar entwined by serpent, head to left."
3,4,"Altar entwined by serpent, head to right."
4,5,"Altar on two levels, flaming."


In [26]:
cnt_pred_predictions_only = cnt_pred["y"]
cnt_prediction_output = pd.DataFrame({"DesignID" : cnt_designs["DesignID"],
                                      "X_test" : cnt_designs["DesignEng"], 
                                  "y_predict" : cnt_pred_predictions_only})
cnt_prediction_output.head()
#cnt_prediction_output["y_predict"]

Unnamed: 0,DesignID,X_test,y_predict
0,1,Diademed head of deified Alexander the Great w...,"[(Alexander the Great, PERSON)]"
1,2,Diademed head of deified Alexander the Great w...,"[(Alexander the Great, PERSON), (truncation, O..."
2,3,"Altar entwined by serpent, head to left.","[(Altar, OBJECT), (serpent, ANIMAL)]"
3,4,"Altar entwined by serpent, head to right.","[(Altar, OBJECT), (serpent, ANIMAL)]"
4,5,"Altar on two levels, flaming.","[(Altar, OBJECT)]"


In [29]:
cnt_ner_output = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred.iterrows()
                    for relation in relation_list],
            columns=["DesignID", "Entity", "Label_Entity"])

cnt_ner_output.to_sql("cnt_pipeline_ner", 
                           "mysql://cnt:rJnW6m7kZR@localhost:3306/thrakien_cnt", 
                           if_exists="replace", index=False)

  cursor.execute('SELECT @@tx_isolation')
