In [7]:
#!pip install transformers, datasets

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Quicktour: https://huggingface.co/docs/transformers/quicktour
Pipeline method lets us use any huggingface model with a few lines of code.


# Sentiment Classification

In [44]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# find models for tasks at: https://huggingface.co/models
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [45]:
from transformers import pipeline
classifier = pipeline("sentiment-analysis", model=model_name, tokenizer=model_name)

## Example usage for inference

In [46]:
results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
for result in results:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

label: POSITIVE, with score: 0.9998
label: NEGATIVE, with score: 0.5309


## Example usage over entire dataset

In [52]:
# find datasets for tasks at: https://huggingface.co/datasets
from datasets import load_dataset
dataset = load_dataset("sst2", split="test")
text, labels = dataset["sentence"], dataset["label"]



In [53]:
results = classifier(text)

In [54]:
results

[{'label': 'NEGATIVE', 'score': 0.9996720552444458},
 {'label': 'NEGATIVE', 'score': 0.999679684638977},
 {'label': 'POSITIVE', 'score': 0.9094692468643188},
 {'label': 'POSITIVE', 'score': 0.9996354579925537},
 {'label': 'POSITIVE', 'score': 0.9994922876358032},
 {'label': 'POSITIVE', 'score': 0.9998762607574463},
 {'label': 'NEGATIVE', 'score': 0.9989668130874634},
 {'label': 'POSITIVE', 'score': 0.9998730421066284},
 {'label': 'POSITIVE', 'score': 0.8087006211280823},
 {'label': 'NEGATIVE', 'score': 0.9996316432952881},
 {'label': 'NEGATIVE', 'score': 0.9996846914291382},
 {'label': 'POSITIVE', 'score': 0.9589256644248962},
 {'label': 'POSITIVE', 'score': 0.9670690298080444},
 {'label': 'POSITIVE', 'score': 0.9996347427368164},
 {'label': 'POSITIVE', 'score': 0.9987275004386902},
 {'label': 'POSITIVE', 'score': 0.999619722366333},
 {'label': 'POSITIVE', 'score': 0.9993390440940857},
 {'label': 'POSITIVE', 'score': 0.9998441934585571},
 {'label': 'POSITIVE', 'score': 0.99934142827987

In [4]:
# Sentiment analysis pipeline
analyzer = pipeline("sentiment-analysis")

# Question answering pipeline, specifying the checkpoint identifier
oracle = pipeline("question-answering",\
                  model="distilbert-base-cased-distilled-squad",\
                  tokenizer="bert-base-cased")

# Named entity recognition pipeline, passing in a specific model and tokenizer
model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
recognizer = pipeline("ner", model=model, tokenizer=tokenizer)