In [372]:
import sys
sys.path.append('../')
import pandas as pd
import random
import os
import numpy as np
from cnt.model import DesignEstimator, save_ner_model, load_ner_model,save_ner_model_v2, load_ner_model_v2
from cnt.annotate import (annotate, annotate_single_design, 
                          annotate_designs, 
                          extract_string_from_annotation, split_alternativenames)
from cnt.evaluate import Metrics
from cnt.create_rdf_graph import create_graph

import spacy
from cnt.io import  Database_Connection
import warnings
warnings.filterwarnings('ignore')

import pathlib
temp = pathlib.PosixPath
pathlib.PosixPath = pathlib.WindowsPath

In [373]:
import pathlib
import mysql.connector

### Define the column names for the id and design column 

In [374]:
id_col = "id"
design_col_en = "design_en"
design_col_de = "design_de"

In [375]:
mydb = mysql.connector.connect(
host = "localhost",
user = "root",
password = "0Skate1188!",
database="thrakien_d4n4_2"
)
cursor = mydb.cursor(buffered=True)


# Load coin data

In [376]:
# single or multiple coins from the db
#coin_arr = [3941, 3914, 37103, 17220, 201, 208, 945, 946, 947]
coin_arr = [3941]

# manual entering of the data

# Named Entity Recognition

### Define the path and name of the model

In [377]:
model_directory_eng =  "cnt/trained_model/ner/english/"
model_name_eng = "english_cno"

model_directory_ger =  "cnt/trained_model/ner/german/"
model_name_ger = "german_cno"

----

### Load the model

In [378]:
model_ner_eng = load_ner_model_v2(model_directory_eng, model_name_eng, id_col, design_col_en)
model_ner_ger = load_ner_model_v2(model_directory_ger, model_name_ger, id_col, design_col_de)

----

### Load designs

In [379]:
dc =  Database_Connection("mysql+mysqlconnector://root:0Skate1188!@localhost/thrakien_d4n4_2")

designs_eng = dc.load_design_with_id("data_designs", coin_arr, [id_col, design_col_en])
designs_de = dc.load_design_with_id("data_designs", coin_arr, [id_col, design_col_de])

designs = pd.merge(designs_eng, designs_de)

-----

### Prediction

In [380]:

predictions = []
predictions_en = []
for sent in designs[design_col_en]:
    print(sent)
    prediction_eng = model_ner_eng.predict_single_sentence_clear(sent, as_doc=True)
    predictions_en.append(prediction_eng)
    predictions.append(prediction_eng)


predictions_de = []
for sent in designs[design_col_de]:
    print(sent)
    prediction_ger = model_ner_ger.predict_single_sentence_clear(sent, as_doc=True)
    predictions_de.append(prediction_ger)
    predictions.append(prediction_ger)

Asclepius standing facing, head left, holding serpent-staff in right hand. Border of dots.
Bust of youthful Anchialos, right, wearing taenia. Border of dots.
Asklepios stehend von vorn, Kopf nach links, mit der Rechten sich auf den Schlangenstab stützend, über dem linken Arm Gewand. Perlkreis.
Brustbild des jugendlichen Anchialos nach rechts mit Taenia und Gewand. Perlkreis.


In [381]:
from spacy import displacy
for pred in predictions:
    colors = {'PERSON': 'mediumpurple','OBJECT': 'greenyellow', 'ANIMAL' : 'orange', 'PLANT': 'salmom', 'VERBS': 'skyblue'}
    options = {'ent': ['PERSON', 'OBJECT', 'ANIMAL', 'PLANT'], 'colors': colors}
    displacy.render(pred, 
                    style='ent', jupyter=True, options=options)

# Relation Extraction

In [382]:
from cnt.model import load_pipeline, predict_re_single_sentence_eng, predict_re_single_sentence_ger

### Define the path and name of the model

In [383]:
re_model_directory = "cnt/trained_model/re/"
re_model_name_eng= "english_cno"
re_model_name_ger = "german_cno"

### Load the model

In [384]:
model_re_eng = load_pipeline(re_model_directory, re_model_name_eng)
model_re_ger = load_pipeline(re_model_directory, re_model_name_ger)

### Use the ``predict_re_single_sentence`` function for predicting on a single sentence

In [385]:
predictions = []
for sent in designs[design_col_en]:
    prediction_eng = predict_re_single_sentence_eng(model_re_eng, sent)

    if len(prediction_eng) == 0:
        predictions.append("No Relation.")
    else:
        predictions.append(prediction_eng)

for sent in designs[design_col_de]:
    prediction_ger = predict_re_single_sentence_ger(model_re_ger, sent)

    if len(prediction_ger) == 0:
        predictions.append("No Relation.")
    else:
         predictions.append(prediction_ger)
   

In [386]:
predictions

[[('Asclepius', 'PERSON', 'holding', 'serpent', 'ANIMAL'),
  ('Asclepius', 'PERSON', 'holding', 'staff', 'OBJECT')],
 [('Anchialos', 'PERSON', 'wearing', 'taenia', 'OBJECT')],
 [('Asklepios', 'PERSON', 'stützen', 'Schlangenstab', 'OBJECT')],
 [('Anchialos', 'PERSON', 'tragen', 'Gewand', 'OBJECT')]]

----

### Upload data to database

NER

In [387]:
upload = True
if upload ==True:
    dc =  Database_Connection("mysql+mysqlconnector://root:0Skate1188!@localhost/thrakien_d4n4_2")
    
    cnt_pred_en = model_ner_eng.predict_clear(designs_eng)
    cnt_pred_de = model_ner_ger.predict_clear(designs_de)

    cnt_pred_predictions_only_en = cnt_pred_en["y"]
    cnt_pred_predictions_only_de = cnt_pred_de["y"]
    
    cnt_ner_output_en = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred_en.iterrows()
                    for relation in relation_list],
            columns=["DesignID", "Entity", "Label_Entity"])
    cnt_ner_output_de = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred_de.iterrows()
                    for relation in relation_list],
            columns=["DesignID", "Entity", "Label_Entity"])

    cnt_ner_output_en.to_sql("cnt_pipeline_ner", dc.mysql_connection, 
                           if_exists="replace", index=False)
    cnt_ner_output_de.to_sql("cnt_pipeline_ner", dc.mysql_connection, 
                           if_exists="replace", index=False)

RE

In [388]:
if upload ==True:
    dc =  Database_Connection("mysql+mysqlconnector://root:0Skate1188!@localhost/thrakien_d4n4_2")

    cnt_pred_en = model_re_eng.predict(designs_eng)
    cnt_pred_de = model_re_ger.predict(designs_de)

    cnt_pipeline_output_en = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred_en.iterrows()
                    for relation in relation_list],
            columns=["design_id", "person", "label_Person", "relation", "object",
                     "label_object"])
    cnt_pipeline_output_de = pd.DataFrame([(str(designid), *relation) for  _, (designid, relation_list) in cnt_pred_de.iterrows()
                    for relation in relation_list],
            columns=["design_id", "person", "label_Person", "relation", "object",
                     "label_object"])

    cnt_pipeline_output_en.to_sql("cnt_pipeline_output",dc.mysql_connection,if_exists="replace", index=False)
    cnt_pipeline_output_de.to_sql("cnt_pipeline_output",dc.mysql_connection,if_exists="replace", index=False)

----

### Create rdf graphs for the coins

In [389]:
create_graph(coin_arr)

Working on ID:  3941
Working on ID:  3941
Working on ID:  3941
Working on ID:  3941
