In [1]:
# install the requirements
!pip install flair

Collecting flair
[?25l  Downloading https://files.pythonhosted.org/packages/60/8f/51d1f2eb5f9b09beb1e6858b4c174a087bc3b05893438e2cd3ffbf9c6e8b/flair-0.5.1-py3-none-any.whl (201kB)
[K     |████████████████████████████████| 204kB 2.7MB/s 
Collecting deprecated>=1.2.4
  Downloading https://files.pythonhosted.org/packages/76/a1/05d7f62f956d77b23a640efc650f80ce24483aa2f85a09c03fb64f49e879/Deprecated-1.2.10-py2.py3-none-any.whl
Collecting transformers>=3.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl (769kB)
[K     |████████████████████████████████| 778kB 8.3MB/s 
Collecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 11.4MB/s 
[?25hCollecting sqlitedict>=1.

# `flair`: la librería NLP de Zalando Research

La compañia Zalando tiene necesidades de aplicar NLP en distintos ámbitos y su equipo de investigaación ha liberado recientemente [`flair`](https://github.com/zalandoresearch/flair), su librería de NLP.

`flair` permite acceder a funcionalidades muy interesantes para procesar lenguaje natural, algunas de ellas muy modernas como:

- [etiquetar morfo-sintácticamente](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_2_TAGGING.md)
- extraer entidades
- clasificar automáticamente texto
- entrenar tus propios modelos para [construir otros clasificadores](https://towardsdatascience.com/text-classification-with-state-of-the-art-nlp-library-flair-b541d7add21f)
- [cargar vectores de palabras en decenas de lenguas](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md)
- [usar vectores contextuales como BERT, ELMo](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)

Veamos cómo podemos acceder a algunas de sus funcionalidades.

## Análisis morfo-sintáctico

Para analizar sintácticamente un texto, necesitamos cargar un etiquetador con un modelo concreto de información morfo-sintáctica. Por ejemplo, uno capaz de analizar varias lenguas.


In [2]:
from flair.data import Sentence
from flair.models import SequenceTagger

# cargamos el analizador multi-idioma
tagger = SequenceTagger.load("pos-multi-fast")

2020-08-08 10:55:42,057 https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/models-v0.4/UPOS-multi-fast/pos-multi-fast.pt not found in cache, downloading to /tmp/tmpwdf5elav


100%|██████████| 72105507/72105507 [00:08<00:00, 8037189.29B/s]

2020-08-08 10:55:52,227 copying /tmp/tmpwdf5elav to cache at /root/.flair/models/pos-multi-fast.pt





2020-08-08 10:55:52,329 removing temp file /tmp/tmpwdf5elav
2020-08-08 10:55:52,933 loading file /root/.flair/models/pos-multi-fast.pt


In [3]:
sentence1 = Sentence(
    "Facebook nació hace década y media tras una noche de copas de Mark Zuckerberg. "
)
tagger.predict(sentence1)
# imprimimos el análisis
print(sentence1.to_tagged_string())

sentence2 = Sentence(
    "Grand débat national: suivez Emmanuel Macron en direct de Bordeaux. "
)
tagger.predict(sentence2)
# imprimimos el análisis
print(sentence2.to_tagged_string())

sentence3 = Sentence(
    "Hier an der Zufahrt zur Startrampe 39A, wo vor 50 Jahren die gigantischen Saturn-Raketen der Apollo-Mondmissionen im Schneckentempo vorbeigefahren sind, prangen nun die blauen Lettern des Raumfahrtunternehmens von Elon Musk an einem Hangar."
)
tagger.predict(sentence3)
# imprimimos el análisis
print(sentence3.to_tagged_string())

Facebook <PROPN> nació <VERB> hace <VERB> década <NOUN> y <CCONJ> media <NOUN> tras <ADP> una <DET> noche <NOUN> de <ADP> copas <NOUN> de <ADP> Mark <PROPN> Zuckerberg. <PROPN>
Grand <ADJ> débat <NOUN> national: <ADJ> suivez <VERB> Emmanuel <PROPN> Macron <PROPN> en <ADP> direct <NOUN> de <ADP> Bordeaux. <PROPN>
Hier <ADV> an <ADP> der <DET> Zufahrt <NOUN> zur <ADP> Startrampe <NOUN> 39A, <PROPN> wo <ADV> vor <ADP> 50 <NUM> Jahren <NOUN> die <DET> gigantischen <ADJ> Saturn-Raketen <NOUN> der <DET> Apollo-Mondmissionen <NOUN> im <ADP> Schneckentempo <NOUN> vorbeigefahren <VERB> sind, <AUX> prangen <VERB> nun <ADV> die <DET> blauen <ADJ> Lettern <NOUN> des <DET> Raumfahrtunternehmens <NOUN> von <ADP> Elon <PROPN> Musk <PROPN> an <ADP> einem <DET> Hangar. <NOUN>


## Reconocimiento de entidades

Para el reconocimiento de entidades varios modelos en diferentes lenguas. Aquí probamos con uno entrenado solo para inglés.

In [4]:
from flair.data import Sentence
from flair.models import SequenceTagger

# cargamos el reconocedor de entidades
tagger = SequenceTagger.load("ner-fast")

2020-08-08 10:56:39,141 https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/models-v0.4/NER-conll03--h256-l1-b32-p3-0.5-%2Bglove%2Bnews-forward-fast%2Bnews-backward-fast-normal-locked0.5-word0.05--release_4/en-ner-fast-conll03-v0.4.pt not found in cache, downloading to /tmp/tmp39v38ukl


100%|██████████| 256774339/256774339 [00:24<00:00, 10356009.84B/s]

2020-08-08 10:57:05,176 copying /tmp/tmp39v38ukl to cache at /root/.flair/models/en-ner-fast-conll03-v0.4.pt





2020-08-08 10:57:05,851 removing temp file /tmp/tmp39v38ukl
2020-08-08 10:57:06,077 loading file /root/.flair/models/en-ner-fast-conll03-v0.4.pt


In [5]:
# analizamos una oración
sentence = Sentence(
    "Behind closed doors, freshman Rep. Alexandria Ocasio-Cortez threatened to put those voting with Republicans “on a list” for a primary challenge in the 2020 election."
)
tagger.predict(sentence)

# imprimimos el análisis
print(sentence.to_tagged_string())

# iteramos por la entidades
for entity in sentence.get_spans("ner"):
    print(entity)

# o imprimimos la estructura de datos con el análisis completo
print(sentence.to_dict(tag_type="ner"))

Behind closed doors, freshman Rep. Alexandria <B-PER> Ocasio-Cortez <E-PER> threatened to put those voting with Republicans <S-MISC> “on a list” for a primary challenge in the 2020 election.
Span [6,7]: "Alexandria Ocasio-Cortez"   [− Labels: PER (0.9574)]
Span [14]: "Republicans"   [− Labels: MISC (0.9999)]
{'text': 'Behind closed doors, freshman Rep. Alexandria Ocasio-Cortez threatened to put those voting with Republicans “on a list” for a primary challenge in the 2020 election.', 'labels': [], 'entities': [{'text': 'Alexandria Ocasio-Cortez', 'start_pos': 35, 'end_pos': 59, 'labels': [PER (0.9574)]}, {'text': 'Republicans', 'start_pos': 96, 'end_pos': 107, 'labels': [MISC (0.9999)]}]}


In [None]:
entity.tag

## Análisis de Opinión

También podemos utilizar un clasificador de textos y cargar el modelo entrenado con sentimiento, para poder detectar opiniones positivas y negativas.

In [6]:
from flair.models import TextClassifier
from flair.data import Sentence

classifier = TextClassifier.load("en-sentiment")

2020-08-08 10:57:28,847 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert.pt not found in cache, downloading to /tmp/tmpkbz632va


100%|██████████| 266170364/266170364 [00:26<00:00, 9873925.56B/s]

2020-08-08 10:57:56,652 copying /tmp/tmpkbz632va to cache at /root/.flair/models/sentiment-en-mix-distillbert.pt





2020-08-08 10:57:57,327 removing temp file /tmp/tmpkbz632va
2020-08-08 10:57:57,566 loading file /root/.flair/models/sentiment-en-mix-distillbert.pt


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




In [None]:
sentence1 = Sentence("I love ice-cream!")
classifier.predict(sentence1)
print("La frase '{}' es {}".format(sentence1.to_plain_string(), sentence1.labels))

sentence2 = Sentence("Don't ever go to this restaurant. The food was horrible :-(")
classifier.predict(sentence2)
print("La frase '{}' es {}".format(sentence2.to_plain_string(), sentence2.labels))