In [5]:
# Import libraries
from flair.data import Sentence
from flair.models import SequenceTagger
from cassis import *

### FlairNLP test

In [34]:
sentence = Sentence('Coutney de Souza has been living with her daughter Susan in Berlin for about 6 years .')

In [35]:
tagger = SequenceTagger.load('ner')

2020-06-01 14:00:36,774 loading file /home/allan/.flair/models/en-ner-conll03-v0.4.pt


In [36]:
tagger.predict(sentence)

[Sentence: "Coutney de Souza has been living with her daughter Susan in Berlin for about 6 years ."   [− Tokens: 17  − Token-Labels: "Coutney <B-PER> de <I-PER> Souza <E-PER> has been living with her daughter Susan <S-PER> in Berlin <S-LOC> for about 6 years ."]]

In [37]:
print(sentence)
print('The following NER tags are found:')

for entity in sentence.get_spans('ner'):
    print(entity)

Sentence: "Coutney de Souza has been living with her daughter Susan in Berlin for about 6 years ."   [− Tokens: 17  − Token-Labels: "Coutney <B-PER> de <I-PER> Souza <E-PER> has been living with her daughter Susan <S-PER> in Berlin <S-LOC> for about 6 years ."]
The following NER tags are found:
Span [1,2,3]: "Coutney de Souza"   [− Labels: PER (0.984)]
Span [10]: "Susan"   [− Labels: PER (0.9995)]
Span [12]: "Berlin"   [− Labels: LOC (1.0)]


In [38]:
print(sentence.to_tagged_string())

Coutney <B-PER> de <I-PER> Souza <E-PER> has been living with her daughter Susan <S-PER> in Berlin <S-LOC> for about 6 years .


### Cassis flair integration testing

In [1]:
# Import libraries
from flair.data import Sentence
from flair.models import SequenceTagger
from cassis import *

In [2]:
tsfile = 'ts.xml'
csfile = './out.xml/document.txt.xmi'

TOKEN_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
SENTENCE_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"

In [3]:
with open(tsfile, 'rb') as f:
    typesystem = load_typesystem(f)

In [4]:
with open(csfile, 'rb') as f:
    cas = load_cas_from_xmi(f, typesystem=typesystem)

In [5]:
AnnotationType = typesystem.get_type(TOKEN_TYPE)

In [6]:
tokens = list(cas.select(TOKEN_TYPE))

In [55]:
cas.get_view()

TypeError: get_view() missing 1 required positional argument: 'name'

In [7]:
words = [cas.get_covered_text(token) for token in tokens]

  """Entry point for launching an IPython kernel.


In [8]:
print(words)

['Courtney', 'de', 'Souza', 'has', 'been', 'living', 'with', 'her', 'daughter', 'Susan', 'in', 'Berlin', 'for', 'about', '6', 'years', '.']


In [9]:
for sentence in cas.select(SENTENCE_TYPE):
    for token in cas.select_covered(TOKEN_TYPE, sentence):
        print(token.get_covered_text())

        # Annotation values can be accessed as properties
        print('Token: begin={0}, end={1}'.format(token.begin, token.end))

Courtney
Token: begin=0, end=8
de
Token: begin=9, end=11
Souza
Token: begin=12, end=17
has
Token: begin=18, end=21
been
Token: begin=22, end=26
living
Token: begin=27, end=33
with
Token: begin=34, end=38
her
Token: begin=39, end=42
daughter
Token: begin=43, end=51
Susan
Token: begin=52, end=57
in
Token: begin=58, end=60
Berlin
Token: begin=61, end=67
for
Token: begin=68, end=71
about
Token: begin=72, end=77
6
Token: begin=78, end=79
years
Token: begin=80, end=85
.
Token: begin=86, end=87


### TODO
- Annotate the sentence with NER from FlairNLP

In [10]:
sentence = ''
for word in words:
    sentence += word + ' '

In [11]:
print(sentence)

Courtney de Souza has been living with her daughter Susan in Berlin for about 6 years . 


In [12]:
sentence = Sentence(sentence)

In [13]:
model_name = 'ner'
nlp = SequenceTagger.load(model_name)

2020-06-03 18:06:01,170 loading file /home/allan/.flair/models/en-ner-conll03-v0.4.pt


In [14]:
nlp.predict(sentence)

[Sentence: "Courtney de Souza has been living with her daughter Susan in Berlin for about 6 years ."   [− Tokens: 17  − Token-Labels: "Courtney <B-PER> de <I-PER> Souza <E-PER> has been living with her daughter Susan <S-PER> in Berlin <S-LOC> for about 6 years ."]]

In [15]:
for entity in sentence.get_spans('ner'):
    print(entity)

Span [1,2,3]: "Courtney de Souza"   [− Labels: PER (0.9992)]
Span [10]: "Susan"   [− Labels: PER (0.9996)]
Span [12]: "Berlin"   [− Labels: LOC (1.0)]


In [16]:
print('The following NER tags are found:')

sentence.to_dict(tag_type='ner')

The following NER tags are found:


{'text': 'Courtney de Souza has been living with her daughter Susan in Berlin for about 6 years .',
 'labels': [],
 'entities': [{'text': 'Courtney de Souza',
   'start_pos': 0,
   'end_pos': 17,
   'labels': [PER (0.9992)]},
  {'text': 'Susan', 'start_pos': 52, 'end_pos': 57, 'labels': [PER (0.9996)]},
  {'text': 'Berlin', 'start_pos': 61, 'end_pos': 67, 'labels': [LOC (1.0)]}]}

In [17]:
dict = sentence.to_dict(tag_type='ner')

In [18]:
dict['entities'][0]['end_pos']

17

In [19]:
len(dict['entities'])

3

In [20]:
sentence.to_tagged_string()

'Courtney <B-PER> de <I-PER> Souza <E-PER> has been living with her daughter Susan <S-PER> in Berlin <S-LOC> for about 6 years .'

In [21]:
sentence.to_plain_string()

'Courtney de Souza has been living with her daughter Susan in Berlin for about 6 years .'

In [34]:
token = sentence[4]
tag = token.get_tag('ner')
print(tag.value)
sentence.get_spans('ner')

O


[<PER-span (1,2,3): "Courtney de Souza">,
 <PER-span (10): "Susan">,
 <LOC-span (12): "Berlin">]

In [28]:
for sent in range(len(sentence)):
    token = sentence[sent]
    tag = token.get_tag('ner')
    wort = str(token).split()
    print(f'"{wort[2]}: {tag.value}"')

"Courtney: B-PER"
"de: I-PER"
"Souza: E-PER"
"has: O"
"been: O"
"living: O"
"with: O"
"her: O"
"daughter: O"
"Susan: S-PER"
"in: O"
"Berlin: S-LOC"
"for: O"
"about: O"
"6: O"
"years: O"
".: O"


In [49]:
# Add the new feature NER from FlairNLP
typesystem.add_feature(type_=AnnotationType, name='ner', rangeTypeName='String')

i = 0
length = len(sentence)
for sent in range(len(sentence)):
    token = sentence[sent]
    tag = sentence[i].get_tag('ner')
    fields = {'begin': tokens[sent].begin,
              'end': tokens[sent].end,
              'pos': tag.value }
    annotation = AnnotationType(**fields)
    cas.add_annotation(annotation)
    i=i+1
xmi = cas.to_xmi()

Exception: Type with name [String] not found!