In [1]:
import pandas as pd
from transformers import pipeline

# Sentiment Classification

In [3]:
classifier = pipeline(task="sentiment-analysis")
sents = ["I love you", "I hate you",
         "I loved the music, but the story is poor."]
results = classifier(sents)

df = pd.concat([
    pd.DataFrame(sents, columns=['text']),
    pd.DataFrame(results)
], axis=1)
df


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


Unnamed: 0,text,label,score
0,I love you,POSITIVE,0.999866
1,I hate you,NEGATIVE,0.999113
2,"I loved the music, but the story is poor.",NEGATIVE,0.999332


In [4]:
# German
modelname = 'oliverguhr/german-sentiment-bert'
classifier = pipeline("sentiment-analysis", model=modelname)
sents = ["Liebe Freude und Glück", "Ich hasse dich",
         "Ich liebe die Musik, aber der Plot ist eher dürftig."]
classifier(sents)



[{'label': 'positive', 'score': 0.9685909748077393},
 {'label': 'negative', 'score': 0.9926037192344666},
 {'label': 'negative', 'score': 0.8034197092056274}]

# Token classification (>1GB)

In [6]:
token_classifier = pipeline('token-classification')
text = "European Union countries finally reached a deal to wean off Russian oil, their most significant effort yet to hit the Russian economy over the war in Ukraine, though the impact will be softened by an exemption for pipeline oil, a concession to landlocked holdouts, most notably Hungary."
token_classifier(text)

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english)


[{'entity': 'I-ORG',
  'score': 0.9974419,
  'index': 1,
  'word': 'European',
  'start': 0,
  'end': 8},
 {'entity': 'I-ORG',
  'score': 0.9983657,
  'index': 2,
  'word': 'Union',
  'start': 9,
  'end': 14},
 {'entity': 'I-MISC',
  'score': 0.9993268,
  'index': 12,
  'word': 'Russian',
  'start': 60,
  'end': 67},
 {'entity': 'I-MISC',
  'score': 0.99934334,
  'index': 23,
  'word': 'Russian',
  'start': 118,
  'end': 125},
 {'entity': 'I-LOC',
  'score': 0.9996989,
  'index': 29,
  'word': 'Ukraine',
  'start': 150,
  'end': 157},
 {'entity': 'I-LOC',
  'score': 0.9997489,
  'index': 55,
  'word': 'Hungary',
  'start': 278,
  'end': 285}]

# Summarization (>1GB)

In [7]:
summarizer = pipeline("summarization")
summarizer(text, min_length=5, max_length=20)

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 (https://huggingface.co/sshleifer/distilbart-cnn-12-6)


[{'summary_text': ' EU countries reach deal to wean off Russian oil, most significant effort yet to hit'}]

# Text-text generation

In [8]:
text2text_generator = pipeline("text2text-generation")
text2text_generator(
    "question: What is 42 ? context: 42 is the answer to life, the universe and everything")

No model was supplied, defaulted to t5-base (https://huggingface.co/t5-base)


[{'generated_text': 'the answer to life, the universe and everything'}]

# Fill-Mask

In [9]:
unmasker = pipeline('fill-mask', model='bert-base-uncased')
unmasker("Hello I'm a [MASK] model.")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'score': 0.10731089860200882,
  'token': 4827,
  'token_str': 'fashion',
  'sequence': "hello i'm a fashion model."},
 {'score': 0.08774491399526596,
  'token': 2535,
  'token_str': 'role',
  'sequence': "hello i'm a role model."},
 {'score': 0.05338399112224579,
  'token': 2047,
  'token_str': 'new',
  'sequence': "hello i'm a new model."},
 {'score': 0.046672288328409195,
  'token': 3565,
  'token_str': 'super',
  'sequence': "hello i'm a super model."},
 {'score': 0.027095867320895195,
  'token': 2986,
  'token_str': 'fine',
  'sequence': "hello i'm a fine model."}]