In [None]:
from datasets import load_dataset

dataset = load_dataset("cardiffnlp/tweet_eval", "sentiment")
dataset_train = dataset["train"]
dataset_eval = dataset["validation"]

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels =3)

In [None]:
dataset_tokenized = tokenizer(
      dataset["train"]["text"],
      padding = "max_length",
      max_length = 512,
)

dataset_tokenized_validation = tokenizer(
      dataset["validation"]["text"],
      padding = "max_length",
      max_length = 512,
)

In [None]:
from datasets import Dataset

Dataset_train = Dataset.from_dict({
    "input_ids": dataset_tokenized["input_ids"],
    "attention_mask" : dataset_tokenized["attention_mask"],
    "labels" : dataset["train"]["label"],
})

Dataset_validation = Dataset.from_dict({
    "input_ids": dataset_tokenized_validation["input_ids"],
    "attention_mask" : dataset_tokenized_validation["attention_mask"],
    "labels" : dataset["validation"]["label"],
})

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

In [None]:
import torch

def compute_metrics(eval_pred):
  logits, labels = eval_pred
  preds = np.argmax(logits, axis=-1)
  return {"accuracy:": accuracy_score(labels,preds)}

def predict_sentiment_batch(texts):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predictions = torch.argmax(probs, dim=-1)
    labels = ["negative", "neutral", "positive"]
    return [labels[i] for i in predictions]

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./model",
    per_device_train_batch_size= 8,
    per_device_eval_batch_size = 8,
    num_train_epochs= 1,
    eval_strategy = "epoch"
    )

trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = Dataset_train,
    eval_dataset= Dataset_validation,
    tokenizer = tokenizer,
    compute_metrics=compute_metrics,
)

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"
trainer.train()

In [None]:
model.save_pretrained("./model")
tokenizer.save_pretrained("./model")

In [None]:
print(predict_sentiment_batch(["I love you"]))