In [1]:
# Import libraries
from flair.data import Sentence
from flair.models import SequenceTagger
from cassis import *



## Import cas and typesystem

> These two files are generated from the Dkpro using OpenNLPSegmenter

In [2]:
tsfile = 'ts1.xml'
csfile = './output1.xmi/document.txt.xmi'

TOKEN_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
SENTENCE_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"

# Load Typesystem
with open(tsfile, 'rb') as f:
    typesystem = load_typesystem(f)

# Load Cas
with open(csfile, 'rb') as f:
    cas = load_cas_from_xmi(f, typesystem=typesystem)

In [3]:
tokens = list(cas.select(TOKEN_TYPE))
words = [cas.get_covered_text(token) for token in tokens]
tokens

  


[de_tudarmstadt_ukp_dkpro_core_api_segmentation_type_Token(xmiID=4, parent=None, lemma=None, stem=None, pos=None, morph=None, id=None, form=None, syntacticFunction=None, begin=0, end=8, type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'),
 de_tudarmstadt_ukp_dkpro_core_api_segmentation_type_Token(xmiID=5, parent=None, lemma=None, stem=None, pos=None, morph=None, id=None, form=None, syntacticFunction=None, begin=9, end=11, type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'),
 de_tudarmstadt_ukp_dkpro_core_api_segmentation_type_Token(xmiID=6, parent=None, lemma=None, stem=None, pos=None, morph=None, id=None, form=None, syntacticFunction=None, begin=12, end=17, type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'),
 de_tudarmstadt_ukp_dkpro_core_api_segmentation_type_Token(xmiID=7, parent=None, lemma=None, stem=None, pos=None, morph=None, id=None, form=None, syntacticFunction=None, begin=18, end=21, type='de.tudarmstadt.ukp.dkpro.core.api.segmentati

In [4]:
print(cas.sofa_string)

Courtney de Souza has been living with her daughter Susan in Berlin for about 6 years .


## Implementation of FlairNLP Named Entity Recognition on the sofa_string generated from the cas

In [5]:
model_name = 'ner'
sentence = Sentence(cas.sofa_string)

nlp = SequenceTagger.load(model_name)
nlp.predict(sentence)

2020-06-17 17:06:19,731 loading file /home/allan/snap/jupyter/6/.flair/models/en-ner-conll03-v0.4.pt


[Sentence: "Courtney de Souza has been living with her daughter Susan in Berlin for about 6 years ."   [− Tokens: 17  − Token-Labels: "Courtney <B-PER> de <I-PER> Souza <E-PER> has been living with her daughter Susan <S-PER> in Berlin <S-LOC> for about 6 years ."]]

In [6]:
# Get annotation type
AnnotationType = typesystem.get_type(TOKEN_TYPE)
NERType = typesystem.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity")

In [7]:
i = 0
for span in sentence.get_spans('ner'):
    cas_sentence = AnnotationType(begin = span.start_pos, 
                                  end = span.end_pos)
    if sentence.get_spans('ner')[i].tag == 'PER':
        val = 'person'
    if sentence.get_spans('ner')[i].tag == 'LOC':
        val = 'location'
    ner_annotation = NERType(begin = span.start_pos,
                             end = span.end_pos,
                             value = val)
    cas.add_annotation(cas_sentence)
    cas.add_annotation(ner_annotation)
    i=i+1
xmi = cas.to_xmi('./output2.xmi', pretty_print=True)

In [8]:
csfile = './output2.xmi'
# Load Cas
with open(csfile, 'rb') as f:
    cas = load_cas_from_xmi(f, typesystem=typesystem)

In [9]:
tokens = list(cas.select("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity"))
words = [cas.get_covered_text(token) for token in tokens]
tokens

  


[de_tudarmstadt_ukp_dkpro_core_api_ner_type_NamedEntity(xmiID=22, value='person', identifier=None, begin=0, end=17, type='de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity'),
 de_tudarmstadt_ukp_dkpro_core_api_ner_type_NamedEntity(xmiID=24, value='person', identifier=None, begin=52, end=57, type='de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity'),
 de_tudarmstadt_ukp_dkpro_core_api_ner_type_NamedEntity(xmiID=26, value='location', identifier=None, begin=61, end=67, type='de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity')]

## Import the new cas

> This file is generated from the DKPro-Cassis which has FlairNLP NER annotations

In [13]:
tsfile = 'ts3.xml'
csfile = './output3.xmi/document.txt.xmi'

POS = "de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger"
SENTENCE_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"

# Load Typesystem
with open(tsfile, 'rb') as f:
    typesystem = load_typesystem(f)

# Load Cas
with open(csfile, 'rb') as f:
    cas = load_cas_from_xmi(f, typesystem=typesystem)

In [14]:
tokens = list(cas.select(POS))
words = [cas.get_covered_text(token) for token in tokens]
tokens

Exception: Type with name [de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger] not found!