# Transformers

In [1]:
import spacy
from spacy import displacy

nlp_en_lg = spacy.load("en_core_web_lg")

text_sample = """As regulators, official bodies, and general users come to depend on AI-based dynamic systems, clearer accountability will be required for automated decision-making processes to ensure trust and transparency. Evidence of this requirement gaining more momentum can be seen with the launch of the first global conference exclusively dedicated to this emerging discipline, the International Joint Conference on Artificial Intelligence: Workshop on Explainable Artificial Intelligence (XAI).[63]

The European Union introduced a right to explanation in the General Data Protection Right (GDPR) as an attempt to deal with the potential problems stemming from the rising importance of algorithms. The implementation of the regulation began in 2018. However, the right to explanation in GDPR covers only the local aspect of interpretability. In the United States, insurance companies are required to be able to explain their rate and coverage decisions.[64]
"""

doc = nlp_en_lg(text_sample)
print(type(doc))

displacy.render(doc, style = "ent")


  from .autonotebook import tqdm as notebook_tqdm


<class 'spacy.tokens.doc.Doc'>


In [2]:
# Named Entity Recognition (NER) with BERT

nlp_en_trf = spacy.load("en_core_web_trf")
doc = nlp_en_trf(text_sample)
displacy.render(doc, style = "ent") 



In [4]:
# NER swedish

nlp_swe = spacy.load("sv_core_news_sm")

text_sample_swe = """
Grannlandet Norge har kommit långt med att elektrifiera sin bilflotta. Om ett år kommer nybilsförsäljningen i Norge vara uppe i 100 procent bilar med sladd. Min kollega , techkorrespondenten Alexander Norén berättar att det som förbluffade honom när han åkte till Norge för att få förklaringen till elbilsboomen där var hur starka de ekonomiska incitamenten är, att det för många är en plånboksfråga att dumpa fossilbilen. 
"""

doc = nlp_swe(text_sample_swe)
displacy.render(doc, "ent")

In [5]:
entities = {f"{entity}": entity.label_ for entity in doc.ents}
entities

{'Norge': 'LOC', 'Alexander Norén': 'PRS'}

## Hugging face

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("marma/bert-base-swedish-cased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("marma/bert-base-swedish-cased-sentiment")
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(50325, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [7]:
from transformers import pipeline

sentiment = pipeline('sentiment-analysis', model='marma/bert-base-swedish-cased-sentiment')
sentiment("bättre än kattskit")

[{'label': 'POSITIVE', 'score': 0.9939852356910706}]

In [8]:
sentences = ["Jag älskar dig sådär mycket", "Du är helt okej",
             "Matematik", "Statistik",
             "Glaset är halvfullt",
             "Glaset är halvtomt", "Jag har ätit pannkaka",
             "När du tar av dig skorna blir allt skönt", 
             "Gillar du pannkaka?"]

for sentence in sentences:
    label, score = sentiment(sentence)[0]["label"], sentiment(sentence)[0]["score"] 
    print(f"{sentence}: {label}, {score:.3f} ")

Jag älskar dig sådär mycket: POSITIVE, 0.999 
Du är helt okej: POSITIVE, 0.999 
Matematik: POSITIVE, 0.987 
Statistik: POSITIVE, 0.984 
Glaset är halvfullt: NEGATIVE, 0.997 
Glaset är halvtomt: NEGATIVE, 0.998 
Jag har ätit pannkaka: NEGATIVE, 0.998 
När du tar av dig skorna blir allt skönt: POSITIVE, 0.998 
Gillar du pannkaka?: NEGATIVE, 0.997 


## GPT-2

In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForCausalLM.from_pretrained("gpt2")