# HUGGING FACE

In [21]:
pip install transformers

Collecting transformersNote: you may need to restart the kernel to use updated packages.


ERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

huggingface-hub 0.0.16 requires packaging>=20.9, but you'll have packaging 20.4 which is incompatible.



  Downloading transformers-4.10.0-py3-none-any.whl (2.8 MB)
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
Collecting huggingface-hub>=0.0.12
  Downloading huggingface_hub-0.0.16-py3-none-any.whl (50 kB)
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp38-cp38-win_amd64.whl (2.0 MB)
Installing collected packages: sacremoses, huggingface-hub, tokenizers, transformers
Successfully installed huggingface-hub-0.0.16 sacremoses-0.0.45 tokenizers-0.10.3 transformers-4.10.0


In [34]:
pip install sentencepiece

Collecting sentencepieceNote: you may need to restart the kernel to use updated packages.
  Downloading sentencepiece-0.1.96-cp38-cp38-win_amd64.whl (1.1 MB)
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.1.96



In [1]:
import sentencepiece
from transformers import pipeline

# SENTIMENT BETO (TWEETER SPANISH STATE OF THE ART)

In [8]:
# Sentyment Spanish 
nlp_esp = pipeline('sentiment-analysis', model='finiteautomata/beto-sentiment-analysis')

In [9]:
print(nlp_esp("No estoy muy seguro"))

[{'label': 'NEU', 'score': 0.9955778121948242}]


In [10]:
print(nlp_esp("Es una muy buena decisión"))

[{'label': 'POS', 'score': 0.9985349178314209}]


# PREGUNTAS-RESPUESTAS DISTILLED

In [6]:
from transformers import *

# Important!: By now the QA pipeline is not compatible with fast tokenizer, but they are working on it. So that pass the object to the tokenizer {"use_fast": False} as in the following example:

nlp = pipeline(
    'question-answering', 
    model='mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',
    tokenizer=(
        'mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',  
        {"use_fast": False}
    )
)

nlp(
    {
        'question': '¿Para qué lenguaje está trabajando?',
        'context': 'Manuel Romero está colaborando activamente con huggingface/transformers ' +
                    'para traer el poder de las últimas técnicas de procesamiento de lenguaje natural al idioma español'
    }
)

{'score': 0.9469615817070007, 'start': 163, 'end': 169, 'answer': 'español'}

# SENTIMENT MULTILINGUAL (STARS)

In [21]:
sentiment = pipeline(task = 'sentiment-analysis',
                     model = 'nlptown/bert-base-multilingual-uncased-sentiment',
                     tokenizer = 'nlptown/bert-base-multilingual-uncased-sentiment')

Downloading:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/669M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [22]:
sentiment("horribles resultados")

[{'label': '1 star', 'score': 0.8418631553649902}]

In [25]:
sentiment("fantasticos")

[{'label': '5 stars', 'score': 0.8580984473228455}]

In [26]:
sentiment("quizá es un buen resultado pero no estoy seguro")

[{'label': '3 stars', 'score': 0.5648916959762573}]

# ZERO-SHOT CLASSIFICATION (TEMÁTICAS)

In [11]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification", 
                       model="Recognai/bert-base-spanish-wwm-cased-xnli")


In [5]:
classifier("El autor se perfila, a los 50 años de su muerte, como uno de los grandes de su siglo",
           candidate_labels=["cultura", "sociedad", "economia", "salud", "deportes"]
)

{'sequence': 'El autor se perfila, a los 50 años de su muerte, como uno de los grandes de su siglo',
 'labels': ['cultura', 'sociedad', 'economia', 'salud', 'deportes'],
 'scores': [0.3889744281768799,
  0.22997383773326874,
  0.16584309935569763,
  0.12057644128799438,
  0.09463217109441757]}

In [7]:
classifier("La transacción es positiva de cara a la banca",
           candidate_labels=["finanzas", "salud", "deportes"]
)

{'sequence': 'La transacción es positiva de cara a la banca',
 'labels': ['finanzas', 'salud', 'deportes'],
 'scores': [0.9643514156341553, 0.02298904024064541, 0.012659518979489803]}

# SHAP EXPLAINER

In [1]:
import shap
import transformers

In [2]:
# load a transformers pipeline model
model_esp = transformers.pipeline('sentiment-analysis', model='finiteautomata/beto-sentiment-analysis', return_all_scores=True)

# explain the model on two sample inputs
explainer = shap.Explainer(model_esp) 

In [3]:
shap_values = explainer(["Hoy quizá pueda ser un gran día", "nuevos soles en la guía 205. Se acaba de presentar este año."])

# visualize the first prediction's explanation for the POSITIVE output class
shap.plots.text(shap_values[0, :, "POS"])
shap.plots.text(shap_values[1, :, "POS"])