# CS 39AA - Notebook 14: Transformers for Airline Tweets w/ WandB

We'll now revisit the Airline Tweet dataset and try using one of the large pre-trained models that is available on huggingface.co. 

Note that this is roughly the same as Assign 5 Starter on Kaggle except that now we are using [Weights and Biases](https://wandb.ai) to track the training and performance of the model, and to automatically experiment with various hyperparameter values. 

In [1]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import os

from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, TrainingArguments, Trainer
from datasets import Dataset, load_metric

In [2]:
# install wandb
#!pip install -q wandb

In [3]:
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mgeinitz[0m ([33mmsudenver[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
df = pd.read_csv("./data/trainA.csv")
df.head()

Unnamed: 0,sentiment,text
0,positive,@JetBlue @JayVig I like the inflight snacks! I...
1,positive,@VirginAmerica thanks guys! Sweet route over t...
2,negative,@USAirways Your exchange/credit policies are w...
3,negative,@USAirways but in the meantime I'll be sleepin...
4,negative,@VirginAmerica hold times at call center are a...


In [5]:
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f"device: {device}")

device: mps


In [6]:
MODEL_NAME = "bert-base-cased"
#MODEL_NAME = "bert-base-uncased"
#MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
MAX_LENGTH=50

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
#model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3, max_length=MAX_LENGTH, output_attentions=False, output_hidden_states=False)

In [7]:
classes = df.sentiment.unique().tolist()
class_tok2idx = dict((v, k) for k, v in enumerate(classes))
class_idx2tok = dict((k, v) for k, v in enumerate(classes))
print(class_tok2idx)
print(class_idx2tok)

{'positive': 0, 'negative': 1, 'neutral': 2}
{0: 'positive', 1: 'negative', 2: 'neutral'}


In [8]:
def get_model():
    return AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        config=AutoConfig.from_pretrained(MODEL_NAME, num_labels=3, max_length=MAX_LENGTH, output_attentions=False, output_hidden_states=False),
    )

In [9]:
df['label'] = df['sentiment'].apply(lambda x: class_tok2idx[x])
df.head()

Unnamed: 0,sentiment,text,label
0,positive,@JetBlue @JayVig I like the inflight snacks! I...,0
1,positive,@VirginAmerica thanks guys! Sweet route over t...,0
2,negative,@USAirways Your exchange/credit policies are w...,1
3,negative,@USAirways but in the meantime I'll be sleepin...,1
4,negative,@VirginAmerica hold times at call center are a...,1


In [10]:
model = get_model()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
sequence_0 = "@united I will never fly with you again. Period."
seq0_tokens = tokenizer(sequence_0, return_tensors="pt")
print(f"number of tokens in seq0 is {len(seq0_tokens['input_ids'].flatten())}")
print(seq0_tokens)
torch.round(F.softmax(model(**seq0_tokens).logits, -1), decimals=3)

number of tokens in seq0 is 14
{'input_ids': tensor([[  101,   137, 10280,   146,  1209,  1309,  4689,  1114,  1128,  1254,
           119, 16477,   119,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


tensor([[0.3210, 0.4050, 0.2740]], grad_fn=<RoundBackward1>)

In [12]:
sequence_1 = "@united Delayed flight, but you did the best you could. Thank you united crew."
seq1_tokens = tokenizer(sequence_1, return_tensors="pt")
print(f"number of tokens in seq1 is {len(seq1_tokens['input_ids'].flatten())}")
torch.round(F.softmax(model(**seq1_tokens).logits, -1), decimals=3)

number of tokens in seq1 is 22


tensor([[0.3140, 0.3950, 0.2910]], grad_fn=<RoundBackward1>)

In [13]:
ds_raw = Dataset.from_pandas(df[['label','text']])
ds_raw[0]

{'label': 0,
 'text': "@JetBlue @JayVig I like the inflight snacks! I'm flying with you guys on 2/28! #JVMChat"}

In [14]:
os.environ["TOKENIZERS_PARALLELISM"] = "true"

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=MAX_LENGTH)

ds = ds_raw.map(tokenize_function, batched=True)

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [15]:
ds[0]['text']

"@JetBlue @JayVig I like the inflight snacks! I'm flying with you guys on 2/28! #JVMChat"

In [16]:
ds = ds.shuffle(seed=42)
ds[0]['text']

'@AmericanAir 11 out of 11 delayed flights, you suck and getting worse'

In [17]:
train_prop = 0.85
ds_train = ds.select(range(int(len(ds)*train_prop)))
ds_eval = ds.select(range(int(len(ds)*train_prop), len(ds)))
print(f"len(ds_train) = {len(ds_train)}")
print(f"len(ds_eval) = {len(ds_eval)}")

len(ds_train) = 8500
len(ds_eval) = 1500


In [18]:
os.environ["WANDB_DISABLED"] = "false"
os.environ["WANDB_LOG_MODEL"] = "true"
os.environ["WANDB_PROJECT"] = "airline_tweets_w_bert"
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["WANDB_NOTEBOOK_NAME"] = "nb14_airline_tweets_w_wandb.ipynb"


In [19]:
# define sweep config: 

# method
sweep_config = {
    'method': 'random'
}


# hyperparameters
parameters_dict = {
    'epochs': {
        'value': 3
        },
    'batch_size': {
        'values': [8, 32, 128]
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-3
    },
    'weight_decay': {
        'values': [0.0, 0.2, 0.4, 0.6]
    },
}


sweep_config['parameters'] = parameters_dict

In [20]:
sweep_id = wandb.sweep(sweep_config, project=os.environ["WANDB_PROJECT"])

Create sweep with ID: lkfspp35
Sweep URL: https://wandb.ai/msudenver/airline_tweets_w_bert/sweeps/lkfspp35


In [21]:
#model.to(device)

In [22]:
def compute_metrics(eval_pred):
    metrics = dict()

    accuracy_metric = load_metric('accuracy')
    precision_metric = load_metric('precision')
    recall_metric = load_metric('recall')
    f1_metric = load_metric('f1')

    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    
    metrics.update(accuracy_metric.compute(predictions=preds, references=labels))
    metrics.update(precision_metric.compute(predictions=preds, references=labels, average='weighted'))
    metrics.update(recall_metric.compute(predictions=preds, references=labels, average='weighted'))
    metrics.update(f1_metric.compute(predictions=preds, references=labels, average='weighted'))
    
    return metrics


In [23]:
def train(config=None):
    with wandb.init(config=config):
        # set sweep configuration
        config = wandb.config

        # set training arguments
        training_args = TrainingArguments(
            output_dir='/Users/steve/models/airline_tweets_w_bert',
            report_to='wandb',  # Turn on Weights & Biases logging
            num_train_epochs=config.epochs,
            learning_rate=config.learning_rate,
            weight_decay=config.weight_decay,
            per_device_train_batch_size=config.batch_size,
            per_device_eval_batch_size=16,
            save_strategy='epoch', #'steps',
            evaluation_strategy='epoch', #'steps',
            eval_steps=200,
            logging_strategy='epoch',
            load_best_model_at_end=True,
            remove_unused_columns=True,
            use_mps_device=True
        )


        # define training loop
        trainer = Trainer(
            model_init=get_model,
            args=training_args,
            train_dataset=ds_train,
            eval_dataset=ds_eval,
            compute_metrics=compute_metrics
        )


        # start training loop
        trainer.train()

In [24]:
wandb.agent(sweep_id, train, count=4)

[34m[1mwandb[0m: Agent Starting Run: avxkfndb with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 5.0808901654664586e-05
[34m[1mwandb[0m: 	weight_decay: 0.2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/201 [00:00<?, ?it/s]

{'loss': 0.5099, 'learning_rate': 3.387260110310972e-05, 'epoch': 1.0}


  0%|          | 0/94 [00:00<?, ?it/s]

  accuracy_metric = load_metric('accuracy')


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.58k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.52k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

{'eval_loss': 0.37446337938308716, 'eval_accuracy': 0.8613333333333333, 'eval_precision': 0.8632445419980804, 'eval_recall': 0.8613333333333333, 'eval_f1': 0.8614066949988384, 'eval_runtime': 19.6969, 'eval_samples_per_second': 76.154, 'eval_steps_per_second': 4.772, 'epoch': 1.0}
{'loss': 0.267, 'learning_rate': 1.693630055155486e-05, 'epoch': 2.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.3593176305294037, 'eval_accuracy': 0.8713333333333333, 'eval_precision': 0.8682639606971388, 'eval_recall': 0.8713333333333333, 'eval_f1': 0.8689023419673294, 'eval_runtime': 5.8801, 'eval_samples_per_second': 255.097, 'eval_steps_per_second': 15.986, 'epoch': 2.0}
{'loss': 0.1573, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.38947680592536926, 'eval_accuracy': 0.874, 'eval_precision': 0.8730120536849371, 'eval_recall': 0.874, 'eval_f1': 0.8734648284015577, 'eval_runtime': 5.7628, 'eval_samples_per_second': 260.289, 'eval_steps_per_second': 16.311, 'epoch': 3.0}
{'train_runtime': 258.4537, 'train_samples_per_second': 98.664, 'train_steps_per_second': 0.778, 'train_loss': 0.3114297686524652, 'epoch': 3.0}


0,1
eval/accuracy,▁▇█
eval/f1,▁▅█
eval/loss,▅▁█
eval/precision,▁▅█
eval/recall,▁▇█
eval/runtime,█▁▁
eval/samples_per_second,▁██
eval/steps_per_second,▁██
train/epoch,▁▁▅▅███
train/global_step,▁▁▅▅███

0,1
eval/accuracy,0.874
eval/f1,0.87346
eval/loss,0.38948
eval/precision,0.87301
eval/recall,0.874
eval/runtime,5.7628
eval/samples_per_second,260.289
eval/steps_per_second,16.311
train/epoch,3.0
train/global_step,201.0


[34m[1mwandb[0m: Agent Starting Run: br2qwu33 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 3.8700801861063394e-05
[34m[1mwandb[0m: 	weight_decay: 0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/798 [00:00<?, ?it/s]

{'loss': 0.4466, 'learning_rate': 2.5800534574042263e-05, 'epoch': 1.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.37492772936820984, 'eval_accuracy': 0.8633333333333333, 'eval_precision': 0.8637180702076916, 'eval_recall': 0.8633333333333333, 'eval_f1': 0.8622190140441168, 'eval_runtime': 6.579, 'eval_samples_per_second': 227.999, 'eval_steps_per_second': 14.288, 'epoch': 1.0}
{'loss': 0.2262, 'learning_rate': 1.2900267287021131e-05, 'epoch': 2.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.38521337509155273, 'eval_accuracy': 0.8733333333333333, 'eval_precision': 0.8711926646205331, 'eval_recall': 0.8733333333333333, 'eval_f1': 0.8713723457168263, 'eval_runtime': 6.1682, 'eval_samples_per_second': 243.182, 'eval_steps_per_second': 15.239, 'epoch': 2.0}
{'loss': 0.1082, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.48106861114501953, 'eval_accuracy': 0.88, 'eval_precision': 0.8778881704271996, 'eval_recall': 0.88, 'eval_f1': 0.8784918275683194, 'eval_runtime': 5.901, 'eval_samples_per_second': 254.192, 'eval_steps_per_second': 15.929, 'epoch': 3.0}
{'train_runtime': 351.416, 'train_samples_per_second': 72.564, 'train_steps_per_second': 2.271, 'train_loss': 0.260318834978835, 'epoch': 3.0}


0,1
eval/accuracy,▁▅█
eval/f1,▁▅█
eval/loss,▁▂█
eval/precision,▁▅█
eval/recall,▁▅█
eval/runtime,█▄▁
eval/samples_per_second,▁▅█
eval/steps_per_second,▁▅█
train/epoch,▁▁▅▅███
train/global_step,▁▁▅▅███

0,1
eval/accuracy,0.88
eval/f1,0.87849
eval/loss,0.48107
eval/precision,0.87789
eval/recall,0.88
eval/runtime,5.901
eval/samples_per_second,254.192
eval/steps_per_second,15.929
train/epoch,3.0
train/global_step,798.0


[34m[1mwandb[0m: Agent Starting Run: 3pf7rykn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001812091702484652
[34m[1mwandb[0m: 	weight_decay: 0.4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/798 [00:00<?, ?it/s]

{'loss': 0.6932, 'learning_rate': 0.00012080611349897679, 'epoch': 1.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.6167246103286743, 'eval_accuracy': 0.76, 'eval_precision': 0.7661409209791649, 'eval_recall': 0.76, 'eval_f1': 0.7525490080606401, 'eval_runtime': 6.43, 'eval_samples_per_second': 233.282, 'eval_steps_per_second': 14.619, 'epoch': 1.0}
{'loss': 0.7149, 'learning_rate': 6.040305674948839e-05, 'epoch': 2.0}


  0%|          | 0/94 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8123822212219238, 'eval_accuracy': 0.6646666666666666, 'eval_precision': 0.44251990215699355, 'eval_recall': 0.6646666666666666, 'eval_f1': 0.5313074355893738, 'eval_runtime': 6.157, 'eval_samples_per_second': 243.626, 'eval_steps_per_second': 15.267, 'epoch': 2.0}
{'loss': 0.7898, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/94 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7550939917564392, 'eval_accuracy': 0.664, 'eval_precision': 0.44237116154873163, 'eval_recall': 0.664, 'eval_f1': 0.5309871794871796, 'eval_runtime': 6.053, 'eval_samples_per_second': 247.81, 'eval_steps_per_second': 15.529, 'epoch': 3.0}
{'train_runtime': 347.3659, 'train_samples_per_second': 73.41, 'train_steps_per_second': 2.297, 'train_loss': 0.732642286104666, 'epoch': 3.0}


0,1
eval/accuracy,█▁▁
eval/f1,█▁▁
eval/loss,▁█▆
eval/precision,█▁▁
eval/recall,█▁▁
eval/runtime,█▃▁
eval/samples_per_second,▁▆█
eval/steps_per_second,▁▆█
train/epoch,▁▁▅▅███
train/global_step,▁▁▅▅███

0,1
eval/accuracy,0.664
eval/f1,0.53099
eval/loss,0.75509
eval/precision,0.44237
eval/recall,0.664
eval/runtime,6.053
eval/samples_per_second,247.81
eval/steps_per_second,15.529
train/epoch,3.0
train/global_step,798.0


[34m[1mwandb[0m: Agent Starting Run: pkwstgc8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 9.03424054167964e-05
[34m[1mwandb[0m: 	weight_decay: 0.4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/798 [00:00<?, ?it/s]

{'loss': 0.4656, 'learning_rate': 6.0228270277864264e-05, 'epoch': 1.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.3800070881843567, 'eval_accuracy': 0.8586666666666667, 'eval_precision': 0.8565652003455206, 'eval_recall': 0.8586666666666667, 'eval_f1': 0.8572427378836052, 'eval_runtime': 6.5873, 'eval_samples_per_second': 227.709, 'eval_steps_per_second': 14.27, 'epoch': 1.0}
{'loss': 0.2357, 'learning_rate': 3.0114135138932132e-05, 'epoch': 2.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.3922038674354553, 'eval_accuracy': 0.8533333333333334, 'eval_precision': 0.8486748438625915, 'eval_recall': 0.8533333333333334, 'eval_f1': 0.8490752404352403, 'eval_runtime': 6.2513, 'eval_samples_per_second': 239.951, 'eval_steps_per_second': 15.037, 'epoch': 2.0}
{'loss': 0.085, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.598406195640564, 'eval_accuracy': 0.862, 'eval_precision': 0.8587029949007171, 'eval_recall': 0.862, 'eval_f1': 0.8595683172403535, 'eval_runtime': 6.0685, 'eval_samples_per_second': 247.178, 'eval_steps_per_second': 15.49, 'epoch': 3.0}
{'train_runtime': 342.7683, 'train_samples_per_second': 74.394, 'train_steps_per_second': 2.328, 'train_loss': 0.2621106958030758, 'epoch': 3.0}


0,1
eval/accuracy,▅▁█
eval/f1,▆▁█
eval/loss,▁▁█
eval/precision,▇▁█
eval/recall,▅▁█
eval/runtime,█▃▁
eval/samples_per_second,▁▅█
eval/steps_per_second,▁▅█
train/epoch,▁▁▅▅███
train/global_step,▁▁▅▅███

0,1
eval/accuracy,0.862
eval/f1,0.85957
eval/loss,0.59841
eval/precision,0.8587
eval/recall,0.862
eval/runtime,6.0685
eval/samples_per_second,247.178
eval/steps_per_second,15.49
train/epoch,3.0
train/global_step,798.0


In [41]:
api = wandb.Api()
sweep = api.sweep(f"msudenver/airline_tweets_w_bert/sweeps/{sweep_id}")

In [47]:
best_run = sweep.best_run(order='eval/accuracy')
best_parameters = best_run.config
print(best_parameters)
print(f"\n    batch size: {best_parameters['batch_size']}")
print(f"   learning rate: {best_parameters['learning_rate']}")
print(f"   weight decay: {best_parameters['weight_decay']}")

[34m[1mwandb[0m: Sorting runs by -summary_metrics.eval/accuracy



    batch size: 32
   learning rate: 3.8700801861063394e-05
   weight decay: 0


Next we'll load one of the model checkpoints from the runs above and see how the results compare to those at the start of the notebook (where we added the untrained classification layer to BERT). Note that in practice we would want to take the best hyperparameter values that we saw above to train one final model (which we'd train longer, or with even more data, etc.).

In [53]:
model = AutoModelForSequenceClassification.from_pretrained('/Users/steve/models/airline_tweets_w_bert/checkpoint-67')

In [54]:
model.to('cpu')

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [55]:
# recall the label encodings:
class_tok2idx

{'positive': 0, 'negative': 1, 'neutral': 2}

In [56]:
print(f"going to classify the (negative) tweet: '{sequence_0}'")
torch.round(F.softmax(model(**seq0_tokens).logits, -1), decimals=3)

going to classify the (negative) tweet: '@united I will never fly with you again. Period.'


tensor([[0.0290, 0.8410, 0.1300]], grad_fn=<RoundBackward1>)

In [57]:
print(f"going to classify the (positive) tweet: '{sequence_1}'")
torch.round(F.softmax(model(**seq1_tokens).logits, -1), decimals=3)

going to classify the (positive) tweet: '@united Delayed flight, but you did the best you could. Thank you united crew.'


tensor([[0.8320, 0.1420, 0.0260]], grad_fn=<RoundBackward1>)