### Simple pipeline

In [1]:
from transformers import pipeline

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [2]:
classifier = pipeline("sentiment-analysis")

res = classifier("I've been waiting for a HuggingFace course my whole life")

print(res)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


[{'label': 'POSITIVE', 'score': 0.9516069293022156}]


### Defining a model in the pipeline

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [4]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [5]:
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

res = classifier("I've been waiting for a HuggingFace course my whole life")

print(res)

[{'label': 'POSITIVE', 'score': 0.9516069293022156}]


### Tokenizer

In [6]:
sequence = "Using a Transformer network is simple"
res = tokenizer(sequence)
print(res)

{'input_ids': [101, 2478, 1037, 10938, 2121, 2897, 2003, 3722, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [7]:
tokens = tokenizer.tokenize(sequence)
print(tokens)

['using', 'a', 'transform', '##er', 'network', 'is', 'simple']


In [8]:
ids = tokenizer.convert_tokens_to_ids(tokens)
print(ids)

[2478, 1037, 10938, 2121, 2897, 2003, 3722]


In [9]:
decoded_string = tokenizer.decode(ids)
print(decoded_string)

using a transformer network is simple


### Using Pytorch

In [10]:
import torch
import torch.nn.functional as F

In [11]:
X_train = ["I've been waiting for a HuggingFace course my whole life",
                 "Python is great!"]

In [12]:
batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
print(batch)

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,   102],
        [  101, 18750,  2003,  2307,   999,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}


In [13]:
with torch.no_grad():
    outputs = model(**batch)
    print(outputs)
    predictions = F.softmax(outputs.logits, dim=1)
    print(predictions)
    labels = torch.argmax(predictions, dim=1)
    print(labels)


SequenceClassifierOutput(loss=None, logits=tensor([[-1.4683,  1.5105],
        [-4.2745,  4.6111]]), hidden_states=None, attentions=None)
tensor([[4.8393e-02, 9.5161e-01],
        [1.3835e-04, 9.9986e-01]])
tensor([1, 1])


### Save a model

In [14]:
save_directory= "saved"
tokenizer.save_pretrained(save_directory)
model.save_pretrained(save_directory)

### Load a model

In [15]:
tok = AutoTokenizer.from_pretrained(save_directory)
mod = AutoModelForSequenceClassification.from_pretrained(save_directory)