In [1]:
# Import libraries
from flair.data import Sentence
from flair.models import SequenceTagger
from cassis import *

## Import cas and typesystem

These two files are generated from the Dkpro using OpenNLPSegmenter

flair ver. 0.5

dkpro-cassis ver. 0.2.9

In [3]:
tsfile = 'TypeSystem.xml'
csfile = 'OpenNlpSegmenter.xmi'

TOKEN_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
SENTENCE_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"

# Load Typesystem
with open(tsfile, 'rb') as f:
    typesystem = load_typesystem(f)

# Load Cas
with open(csfile, 'rb') as f:
    cas = load_cas_from_xmi(f, typesystem=typesystem)

In [4]:
tokens = list(cas.select(TOKEN_TYPE))
words = [cas.get_covered_text(token) for token in tokens]
tokens[:3]

  


[de_tudarmstadt_ukp_dkpro_core_api_segmentation_type_Token(xmiID=12, parent=None, lemma=None, stem=None, pos=None, morph=None, id=None, form=None, syntacticFunction=None, begin=0, end=5, type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'),
 de_tudarmstadt_ukp_dkpro_core_api_segmentation_type_Token(xmiID=13, parent=None, lemma=None, stem=None, pos=None, morph=None, id=None, form=None, syntacticFunction=None, begin=6, end=9, type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'),
 de_tudarmstadt_ukp_dkpro_core_api_segmentation_type_Token(xmiID=14, parent=None, lemma=None, stem=None, pos=None, morph=None, id=None, form=None, syntacticFunction=None, begin=10, end=14, type='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token')]

In [5]:
print(cas.sofa_string)

Obama was born in Honolulu, Hawaii, making him the first president not born in North America .
After graduating from Columbia University in 1983, he worked as a community organizer in Chicago .
In 1988, he enrolled in Harvard Law School, where he was the first black person to head the Harvard Law Review .
After graduating, he became a civil rights attorney and an academic, teaching constitutional law at the University of Chicago Law School from 1992 to 2004 .
Turning to elective politics, he represented the 13th district from 1997 until 2004 in the Illinois Senate, when he ran for the U.S. Senate. Obama received national attention in 2004 with his March Senate primary win, his well-received July Democratic National Convention keynote address, and his landslide November election to the Senate .
In 2008, he was nominated for president a year after his presidential campaign began, and after close primary campaigns against Hillary Clinton .
Obama was elected over Republican John McCain and

## Implementation of FlairNLP Named Entity Recognition on the sofa_string generated from the cas

In [2]:
cas = Cas(typesystem=load_dkpro_core_typesystem())

for t in cas.typesystem.get_types():
    print(t)

Type(name='de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.Morpheme', supertypeName='uima.tcas.Annotation', description=None, _children={}, _features={'morphTag': Feature(name='morphTag', rangeTypeName='uima.cas.String', description=None, elementType=None, multipleReferencesAllowed=None, _has_reserved_name=False)}, _inherited_features={'begin': Feature(name='begin', rangeTypeName='uima.cas.Integer', description=None, elementType=None, multipleReferencesAllowed=None, _has_reserved_name=False), 'end': Feature(name='end', rangeTypeName='uima.cas.Integer', description=None, elementType=None, multipleReferencesAllowed=None, _has_reserved_name=False), 'sofa': Feature(name='sofa', rangeTypeName='uima.cas.Sofa', description=None, elementType=None, multipleReferencesAllowed=None, _has_reserved_name=False)})
Type(name='de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Div', supertypeName='uima.tcas.Annotation', description='Document structure element.', _children={'de.tudarmstadt.ukp.dkp

In [3]:
document = "Obama was born in Honolulu, Hawaii, making him the first president not born in North America . After graduating from Columbia University in 1983, he worked as a community organizer in Chicago . In 1988, he enrolled in Harvard Law School, where he was the first black person to head the Harvard Law Review . After graduating, he became a civil rights attorney and an academic, teaching constitutional law at the University of Chicago Law School from 1992 to 2004 . Turning to elective politics, he represented the 13th district from 1997 until 2004 in the Illinois Senate, when he ran for the U.S. Senate. Obama received national attention in 2004 with his March Senate primary win, his well-received July Democratic National Convention keynote address, and his landslide November election to the Senate . In 2008, he was nominated for president a year after his presidential campaign began, and after close primary campaigns against Hillary Clinton . Obama was elected over Republican John McCain and was inaugurated alongside Joe Biden on January 20, 2009 . Nine months later, he was named the 2009 Nobel Peace Prize laureate ."

In [4]:
model_name = 'ner'
sentence = Sentence(document)

nlp = SequenceTagger.load(model_name)
nlp.predict(sentence)

2020-06-27 14:31:05,344 loading file /home/allan/.flair/models/en-ner-conll03-v0.4.pt


[Sentence: "Obama was born in Honolulu, Hawaii, making him the first president not born in North America . After graduating from Columbia University in 1983, he worked as a community organizer in Chicago . In 1988, he enrolled in Harvard Law School, where he was the first black person to head the Harvard Law Review . After graduating, he became a civil rights attorney and an academic, teaching constitutional law at the University of Chicago Law School from 1992 to 2004 . Turning to elective politics, he represented the 13th district from 1997 until 2004 in the Illinois Senate, when he ran for the U.S. Senate. Obama received national attention in 2004 with his March Senate primary win, his well-received July Democratic National Convention keynote address, and his landslide November election to the Senate . In 2008, he was nominated for president a year after his presidential campaign began, and after close primary campaigns against Hillary Clinton . Obama was elected over Republican Joh

In [5]:
print('The following NER tags are found:')

for entity in sentence.get_spans('ner'):
    print(entity)

The following NER tags are found:
Span [1]: "Obama"   [− Labels: PER (0.9999)]
Span [5,6]: "Honolulu, Hawaii,"   [− Labels: LOC (0.9625)]
Span [15,16]: "North America"   [− Labels: LOC (0.9967)]
Span [21,22]: "Columbia University"   [− Labels: ORG (0.9744)]
Span [32]: "Chicago"   [− Labels: LOC (1.0)]
Span [39,40,41]: "Harvard Law School,"   [− Labels: LOC (0.8482)]
Span [52,53,54]: "Harvard Law Review"   [− Labels: ORG (0.8027)]
Span [72,73,74,75,76]: "University of Chicago Law School"   [− Labels: ORG (0.8778)]
Span [97,98]: "Illinois Senate,"   [− Labels: LOC (0.6585)]
Span [104]: "U.S."   [− Labels: LOC (0.9955)]
Span [106]: "Obama"   [− Labels: PER (0.9999)]
Span [115]: "Senate"   [− Labels: ORG (1.0)]
Span [121,122,123]: "Democratic National Convention"   [− Labels: MISC (0.8813)]
Span [133]: "Senate"   [− Labels: ORG (1.0)]
Span [155,156]: "Hillary Clinton"   [− Labels: PER (0.9616)]
Span [158]: "Obama"   [− Labels: PER (0.9999)]
Span [162]: "Republican"   [− Labels: MISC (1.0)]

In [12]:
# Get annotation type
NERType = cas.typesystem.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity")
#AnnotationType = cas.typesystem.get_type(NERType)

In [13]:
i = 0
for span in sentence.get_spans('ner'):
    if span.tag == 'PER':
        val = 'person'
    if span.tag == 'LOC':
        val = 'location'
    if span.tag == 'ORG':
        val = "organization"
    if span.tag == 'MISC':
        val = "miscellaneous"
    ner_annotation = NERType(begin = span.start_pos,
                             end = span.end_pos,
                             value = val)
    cas.add_annotation(ner_annotation)
    i=i+1
xmi = cas.to_xmi('./flairNLP.xmi', pretty_print=True)

In [14]:
csfile = 'flairNLP.xmi'
# Load Cas
with open(csfile, 'rb') as f:
    cas = load_cas_from_xmi(f, typesystem=typesystem)

AttributeError: 'Cas' object has no attribute 'get_type'

In [11]:
tokens = list(cas.select("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity"))
words = [cas.get_covered_text(token) for token in tokens]
tokens[:4]

  


[de_tudarmstadt_ukp_dkpro_core_api_ner_type_NamedEntity(xmiID=217, value='person', identifier=None, begin=0, end=5, type='de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity'),
 de_tudarmstadt_ukp_dkpro_core_api_ner_type_NamedEntity(xmiID=219, value='location', identifier=None, begin=18, end=35, type='de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity'),
 de_tudarmstadt_ukp_dkpro_core_api_ner_type_NamedEntity(xmiID=221, value='location', identifier=None, begin=79, end=92, type='de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity'),
 de_tudarmstadt_ukp_dkpro_core_api_ner_type_NamedEntity(xmiID=223, value='organization', identifier=None, begin=117, end=136, type='de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity')]

## Import the new cas

This file is generated from the DKPro-Cassis which has FlairNLP NER annotations

In [12]:
csfile = 'OpenNlpPosTagger.xmi'
PosTagger = "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS"

# Load Cas
with open(csfile, 'rb') as f:
    cas = load_cas_from_xmi(f, typesystem=typesystem)

In [13]:
tokens = list(cas.select(PosTagger))
words = [cas.get_covered_text(token) for token in tokens]
tokens[:4]

  


[de_tudarmstadt_ukp_dkpro_core_api_lexmorph_type_pos_POS_ADP(xmiID=527, begin=15, end=17, PosValue='IN', coarseValue='ADP', type='de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_ADP'),
 de_tudarmstadt_ukp_dkpro_core_api_lexmorph_type_pos_POS_ADP(xmiID=540, begin=76, end=78, PosValue='IN', coarseValue='ADP', type='de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_ADP'),
 de_tudarmstadt_ukp_dkpro_core_api_lexmorph_type_pos_POS_ADP(xmiID=545, begin=95, end=100, PosValue='IN', coarseValue='ADP', type='de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_ADP'),
 de_tudarmstadt_ukp_dkpro_core_api_lexmorph_type_pos_POS_ADP(xmiID=547, begin=112, end=116, PosValue='IN', coarseValue='ADP', type='de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_ADP')]