In [7]:
from flair.data import Sentence
from flair.models import SequenceTagger

In [2]:
sentence = Sentence('I love Berlin .')

In [3]:
tagger = SequenceTagger.load('ner')

In [4]:
tagger.predict(sentence)

[Sentence: "I love Berlin ." - 4 Tokens]

In [5]:
print(sentence)

Sentence: "I love Berlin ." - 4 Tokens


In [6]:
for entity in sentence.get_spans('ner'):
    print(entity)

LOC-span [3]: "Berlin"


In [7]:
sentence = Sentence('The grass is green .')
print(sentence)

Sentence: "The grass is green ." - 5 Tokens


In [10]:
print(sentence.get_token(4))

Token: 4 green


In [11]:
print(sentence[3])

Token: 4 green


In [12]:
for token in sentence:
    print(token)

Token: 1 The
Token: 2 grass
Token: 3 is
Token: 4 green
Token: 5 .


In [15]:
sentence = Sentence('The grass is green.', use_tokenizer = True)
print(sentence)

Sentence: "The grass is green ." - 5 Tokens


In [18]:
sentence[3].add_tag('ner', 'color')

In [20]:
print(sentence.to_tagged_string())

The grass is green <color> .


In [22]:
from flair.data import Label

In [23]:
tag: Label = sentence[3].get_tag('ner')

In [26]:
print(f'"{sentence[3]}" is tagged as "{tag.value}" with confidence score "{tag.score}"')

"Token: 4 green" is tagged as "color" with confidence score "1.0"


In [34]:
sentence = Sentence('France is the current World Cup winner.')

In [36]:
sentence.add_label('sports')

In [38]:
sentence.add_labels(['sports', 'world cup'])

In [39]:
print(sentence)

Sentence: "France is the current World Cup winner." - 7 Tokens


In [40]:
for label in sentence.labels:
    print(label)

sports (1.0)
sports (1.0)
world cup (1.0)


In [41]:
sentence = Sentence('France is the current World Cup winner', labels=['sports', 'world cup'])
print(sentence)
for label in sentence.labels:
    print(label)

Sentence: "France is the current World Cup winner" - 7 Tokens
sports (1.0)
world cup (1.0)


In [42]:
from flair.models import SequenceTagger

In [43]:
tagger = SequenceTagger.load('ner')

In [45]:
sentence = Sentence('George Washington went to Washington .')

In [47]:
tagger.predict(sentence)

[Sentence: "George Washington went to Washington ." - 6 Tokens]

In [48]:
print(sentence.to_tagged_string())

George <B-PER> Washington <E-PER> went to Washington <S-LOC> .


In [49]:
for entity in sentence.get_spans('ner'):
    print(entity)

PER-span [1,2]: "George Washington"
LOC-span [5]: "Washington"


In [51]:
print(sentence.to_dict(tag_type='ner'))

{'text': 'George Washington went to Washington .', 'labels': [], 'entities': [{'text': 'George Washington', 'start_pos': 0, 'end_pos': 17, 'type': 'PER', 'confidence': 0.999337375164032}, {'text': 'Washington', 'start_pos': 26, 'end_pos': 36, 'type': 'LOC', 'confidence': 0.9998500347137451}]}


In [53]:
tagger = SequenceTagger.load('frame')

In [57]:
sentence_1 = Sentence('George returned to Berlin to return his hat .')
sentence_2 = Sentence('He had a look at different hats .')

In [61]:
tagger.predict(sentence_1)

[Sentence: "George returned to Berlin to return his hat ." - 9 Tokens]

In [62]:
tagger.predict(sentence_2)

[Sentence: "He had a look at different hats ." - 8 Tokens]

In [63]:
print(sentence_1.to_tagged_string())
print(sentence_2.to_tagged_string())

George returned <return.01> to Berlin to return <return.02> his hat .
He had <have.LV> a look <look.01> at different hats .


In [64]:
text = 'This is a sentence. This is another sentence. I love Berlin.'

In [66]:
from segtok.segmenter import split_single

In [69]:
sentences = [Sentence(sent, use_tokenizer=True) for sent in split_single(text)]

In [70]:
sentences

[Sentence: "This is a sentence ." - 5 Tokens,
 Sentence: "This is another sentence ." - 5 Tokens,
 Sentence: "I love Berlin ." - 4 Tokens]

In [74]:
tagger: SequenceTagger = SequenceTagger.load('ner')
tagger.predict(sentences)

[Sentence: "This is a sentence ." - 5 Tokens,
 Sentence: "This is another sentence ." - 5 Tokens,
 Sentence: "I love Berlin ." - 4 Tokens]

In [5]:
from flair.embeddings import WordEmbeddings

# init embedding
glove_embedding = WordEmbeddings('glove')

In [8]:
sentence = Sentence('The grass is green .')

In [10]:
glove_embedding.embed(sentence)

[Sentence: "The grass is green ." - 5 Tokens]

In [11]:
for token in sentence:
    print(token)
    print(token.embedding)

Token: 1 The
tensor([-0.0382, -0.2449,  0.7281, -0.3996,  0.0832,  0.0440, -0.3914,  0.3344,
        -0.5755,  0.0875,  0.2879, -0.0673,  0.3091, -0.2638, -0.1323, -0.2076,
         0.3340, -0.3385, -0.3174, -0.4834,  0.1464, -0.3730,  0.3458,  0.0520,
         0.4495, -0.4697,  0.0263, -0.5415, -0.1552, -0.1411, -0.0397,  0.2828,
         0.1439,  0.2346, -0.3102,  0.0862,  0.2040,  0.5262,  0.1716, -0.0824,
        -0.7179, -0.4153,  0.2033, -0.1276,  0.4137,  0.5519,  0.5791, -0.3348,
        -0.3656, -0.5486, -0.0629,  0.2658,  0.3020,  0.9977, -0.8048, -3.0243,
         0.0125, -0.3694,  2.2167,  0.7220, -0.2498,  0.9214,  0.0345,  0.4674,
         1.1079, -0.1936, -0.0746,  0.2335, -0.0521, -0.2204,  0.0572, -0.1581,
        -0.3080, -0.4162,  0.3797,  0.1501, -0.5321, -0.2055, -1.2526,  0.0716,
         0.7056,  0.4974, -0.4206,  0.2615, -1.5380, -0.3022, -0.0734, -0.2831,
         0.3710, -0.2522,  0.0162, -0.0171, -0.3898,  0.8742, -0.7257, -0.5106,
        -0.5203, -0.1459,  

In [17]:
from flair.embeddings import CharacterEmbeddings

# init embedding
embedding = CharacterEmbeddings()

# create a sentence
sentence = Sentence('The grass is green .')

# embed words in sentence
embedding.embed(sentence)

[Sentence: "The grass is green ." - 5 Tokens]

In [18]:
from flair.embeddings import WordEmbeddings, CharacterEmbeddings

glove_embedding = WordEmbeddings('glove')
character_embeddings = CharacterEmbeddings()

In [22]:
from flair.embeddings import StackedEmbeddings

stacked_embeddings = StackedEmbeddings(
    embeddings = [glove_embedding, character_embeddings])

In [24]:
sentence = Sentence('The grass is green .')
stacked_embeddings.embed(sentence)

In [25]:
stacked_embeddings

StackedEmbeddings(
  (list_embedding_0): WordEmbeddings()
  (list_embedding_1): CharacterEmbeddings(
    (char_embedding): Embedding(275, 25)
    (char_rnn): LSTM(25, 25, bidirectional=True)
  )
)

In [26]:
for token in sentence:
    print(token)
    print(token.embedding)

Token: 1 The
tensor([-3.8194e-02, -2.4487e-01,  7.2812e-01, -3.9961e-01,  8.3172e-02,
         4.3953e-02, -3.9141e-01,  3.3440e-01, -5.7545e-01,  8.7459e-02,
         2.8787e-01, -6.7310e-02,  3.0906e-01, -2.6384e-01, -1.3231e-01,
        -2.0757e-01,  3.3395e-01, -3.3848e-01, -3.1743e-01, -4.8336e-01,
         1.4640e-01, -3.7304e-01,  3.4577e-01,  5.2041e-02,  4.4946e-01,
        -4.6971e-01,  2.6280e-02, -5.4155e-01, -1.5518e-01, -1.4107e-01,
        -3.9722e-02,  2.8277e-01,  1.4393e-01,  2.3464e-01, -3.1021e-01,
         8.6173e-02,  2.0397e-01,  5.2624e-01,  1.7164e-01, -8.2378e-02,
        -7.1787e-01, -4.1531e-01,  2.0335e-01, -1.2763e-01,  4.1367e-01,
         5.5187e-01,  5.7908e-01, -3.3477e-01, -3.6559e-01, -5.4857e-01,
        -6.2892e-02,  2.6584e-01,  3.0205e-01,  9.9775e-01, -8.0481e-01,
        -3.0243e+00,  1.2540e-02, -3.6942e-01,  2.2167e+00,  7.2201e-01,
        -2.4978e-01,  9.2136e-01,  3.4514e-02,  4.6745e-01,  1.1079e+00,
        -1.9358e-01, -7.4575e-02,  2.3

       grad_fn=<CatBackward>)


In [30]:
from flair.embeddings import WordEmbeddings, CharLMEmbeddings, DocumentPoolEmbeddings, Sentence

glove_embedding = WordEmbeddings('glove')
charlm_embedding_forward = CharLMEmbeddings('news-forward')
charlm_embedding_backward = CharLMEmbeddings('news-backward')
document_embeddings = DocumentPoolEmbeddings([glove_embedding, 
                                              charlm_embedding_forward, 
                                              charlm_embedding_backward])

  after removing the cwd from sys.path.
  """


In [35]:
sentence = Sentence('The grass is green . And the sky is blue .')

In [37]:
document_embeddings.embed(sentence)

In [40]:
print(sentence.get_embedding())

tensor([[-0.3197,  0.2621,  0.4037,  ..., -0.0008, -0.0051, -0.0109]])


In [41]:
document_embeddings = DocumentPoolEmbeddings([glove_embedding, 
                                             charlm_embedding_backward,
                                             charlm_embedding_forward],
                                            mode = 'min')

In [43]:
from flair.embeddings import WordEmbeddings, DocumentLSTMEmbeddings

glove_embedding = WordEmbeddings('glove')
document_embeddings = DocumentLSTMEmbeddings([glove_embedding])

In [45]:
sentence = Sentence('The grass is green . And the sky is blue .')
document_embeddings.embed(sentence)

In [46]:
print(sentence.get_embedding())

tensor([[ 0.0000, -0.2567, -0.3857,  0.0000,  0.0000,  0.4679, -0.0000, -0.0000,
         -0.0000,  0.0413,  0.3378, -0.0000, -0.0000, -0.0000,  0.6527, -0.6511,
          1.0144, -0.1377,  0.5243, -0.5654,  0.0000, -0.0236,  0.1107,  0.0000,
         -0.7132, -0.5130, -0.3489, -0.5734,  0.7072,  0.1158, -0.3548,  0.0000,
          0.0000, -0.1011,  0.0743,  0.5346,  0.2456,  0.3685,  0.0000,  0.1319,
         -0.6749, -0.0000,  0.0000, -0.3798,  0.4302,  0.0000,  0.1881,  0.4432,
         -0.0000,  0.6083, -0.2418,  0.5634, -0.7348,  0.7113, -0.3781, -0.4040,
          0.7722, -0.6238,  0.8772,  0.0000,  0.5456,  0.4980,  0.0000,  0.1653,
         -0.0000,  0.0553, -0.8303,  0.5382, -0.0000,  0.0000,  0.1737, -0.2544,
         -1.0751,  0.0816,  0.0000, -0.6108,  0.0000,  0.7551, -0.0000, -0.0000,
         -0.0000,  0.0000, -0.2756,  0.0173,  0.0000, -0.0000,  0.0904,  0.0000,
          0.3185, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000,  0.1771, -0.4003,
          0.0000,  0.0000, -

In [55]:
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher

corpus = NLPTaskDataFetcher.load_corpus(NLPTask.UD_ENGLISH)

NameError: name 'NLPTask' is not defined

In [56]:
from flair.models import TextClassifier
from flair.data import Sentence
classifier = TextClassifier.load('en-sentiment')
sentence = Sentence('Flair is pretty neat!')
classifier.predict(sentence)
# print sentence with predicted labels
print('Sentence above is: ', sentence.labels)

RuntimeError: storage has wrong size: expected -1862414276 got 22700