<a href="https://colab.research.google.com/github/rexian/ML/blob/main/pytorch_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
#from torch import nn
#import torch.optim as optim
#import numpy as np
#import pandas as pd

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
classifier = pipeline("sentiment-analysis", model=model_name)
#classifier = pipeline("sentiment-analysis")
res = classifier(["I've been waiting for a HuggingFace course my whole life.", "Today we have less traffic on the road"])

for result in res:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")



Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


label: POSITIVE, with score: 0.9598
label: NEGATIVE, with score: 0.9983


In [11]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
classifier = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokens = tokenizer.tokenize("I've been waiting for a HuggingFace course my whole life.")
ids = tokenizer.convert_tokens_to_ids(tokens)
inputs = tokenizer("I've been waiting for a HuggingFace course my whole life.")
print(f' Tokens: {tokens}')
print(f' Token Ids: {ids}')
print(f' Input Ids: {inputs}')

 Tokens: ['i', "'", 've', 'been', 'waiting', 'for', 'a', 'hugging', '##face', 'course', 'my', 'whole', 'life', '.']
 Token Ids: [1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
 Input Ids: {'input_ids': [101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [16]:
X_train = ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!"]
batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
print(batch)
#
with torch.no_grad():
    outputs = classifier(**batch, labels=torch.tensor([1, 0]))
    print(outputs)
    predictions = F.softmax(outputs.logits, dim=1)
    print(predictions)
    labels = torch.argmax(predictions, dim=1)
    print(labels)
    labels = [classifier.config.id2label[label_id] for label_id in labels.tolist()]
    print(labels)

# Save pre-trained model
save_directory = "saved"
tokenizer.save_pretrained(save_directory)
classifier.save_pretrained(save_directory)

tokenizer = AutoTokenizer.from_pretrained(save_directory)
classifier = AutoModelForSequenceClassification.from_pretrained(save_directory)


{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
SequenceClassifierOutput(loss=tensor(0.0208), logits=tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464]]), hidden_states=None, attentions=None)
tensor([[4.0195e-02, 9.5980e-01],
        [9.9946e-01, 5.4418e-04]])
tensor([1, 0])
['POSITIVE', 'NEGATIVE']
