In [1]:
DATA_PATH = "cars_data/Scraped_Car_Review_dodge.csv"

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import polars as pl
import torch

  from .autonotebook import tqdm as notebook_tqdm


### Running Pipelines

In [4]:
from transformers import pipeline

#### Sentiment Analysis

In [9]:
sentiment_classifier = pipeline(task="text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

text_input = "I really loved this product!"
sentiment_classifier(text_input)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'label': 'positive', 'score': 0.9838449954986572}]

In [13]:
text_input = "Joe went to the park."
sentiment_classifier(text_input)

[{'label': 'neutral', 'score': 0.837684690952301}]

In [14]:
text_input = "This product was terrible and it didn't work at all!"
sentiment_classifier(text_input)

[{'label': 'negative', 'score': 0.9467262625694275}]

#### Zero-shot text classification

In [16]:
zs_text_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [34]:
reviews = pl.read_csv(DATA_PATH)["Review"].to_list()
reviews[1000]

" I have had multiple problems with this car ever since I bought it. A few months after I got it, we had heavy rains and I discovered it leaks around the tires, the belts squeal when they get wet (going through a puddle on the road), and it smelled (probably from previous owner). The CD player is temperamental, the automatic starter rarely works, and I have had multiple transmission problems.  Had to completely replace it this past spring, and now the gears don't line up quite right. Not to mention the alloy wheel rims will dent on the slightest hole, and rims were impossible to find because of the size. Cute car. Fun to drive. Not worth the headache!"

In [45]:
candidate_labels = [
    "Quality",     
    "Value for Money",
    "Durability",  
    "Ease of Use", 
    "Customer Service",
    "Delivery",    
    "Packaging",   
    "Product Features", 
    "Comparison",  
    "Repeat Purchase", 
    "Recommendation", 
    "Safety",      
    "Environmental Impact"
]

review = "Highly recommend! This product was easy to use and well worth the price."

zs_text_classifier(review, candidate_labels, multilabel=True)

{'sequence': 'Highly recommend! This product was easy to use and well worth the price.',
 'labels': ['Value for Money',
  'Recommendation',
  'Quality',
  'Ease of Use',
  'Repeat Purchase',
  'Comparison',
  'Product Features',
  'Safety',
  'Delivery',
  'Durability',
  'Environmental Impact',
  'Customer Service',
  'Packaging'],
 'scores': [0.3853776454925537,
  0.2707996964454651,
  0.16785353422164917,
  0.11737918108701706,
  0.01666966639459133,
  0.01307186484336853,
  0.009786058217287064,
  0.004795122891664505,
  0.003954175394028425,
  0.0038748993538320065,
  0.002544761635363102,
  0.0024244911037385464,
  0.0014688721857964993]}

### Customizing Pipelines With Auto Classes

In [42]:
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

text = "I love using the Transformers library!"
encoded_input = tokenizer(text, return_tensors='pt')

with torch.no_grad():
    output = model(**encoded_input)

scores = output.logits[0]
probabilities = torch.softmax(scores, dim=0)
predicted_class = probabilities.argmax().item()

print(f"Predicted class: {predicted_class}")
print(f"Probabilities: {probabilities.tolist()}")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Predicted class: 2
Probabilities: [0.0026470276061445475, 0.010737836360931396, 0.9866151213645935]


In [43]:
full_pipeline = pipeline(model=model_name)
full_pipeline(text)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'label': 'positive', 'score': 0.9866151213645935}]

In [None]:
reviews_list = pl.read_csv(DATA_PATH)["Review"].to_list()