<a href="https://colab.research.google.com/github/raphael-saltiel/AI-Project/blob/main/SentimentalAnalysisWithDistilbert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Step1. Import and Load Data

In [None]:
!pip install -q transformers
!pip install -q datasets

In [None]:
from datasets import load_dataset
emotions = load_dataset("emotion")

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Step2. Preprocess Data

In [None]:
from transformers import AutoTokenizer

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

In [None]:
emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)

In [None]:
from transformers import AutoModelForSequenceClassification
num_labels = 6
model = (AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels).to(device))

In [None]:
emotions_encoded["train"].features

In [None]:
emotions_encoded.set_format("torch", columns=["input_ids", "attention_mask", "label"])
emotions_encoded["train"].features

In [None]:
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}

In [None]:
from transformers import Trainer, TrainingArguments

batch_size = 64
logging_steps = len(emotions_encoded["train"]) // batch_size
training_args = TrainingArguments(output_dir="results",
                                  num_train_epochs=8,
                                  learning_rate=2e-5,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  load_best_model_at_end=True,
                                  metric_for_best_model="f1",
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  save_strategy="no",
                                  disable_tqdm=False)

In [None]:
from transformers import Trainer

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=emotions_encoded["train"],
                  eval_dataset=emotions_encoded["validation"])
trainer.train();

In [None]:
results = trainer.evaluate()
results

In [None]:
preds_output = trainer.predict(emotions_encoded["validation"])
preds_output.metrics

In [None]:
import numpy as np
from sklearn.metrics import plot_confusion_matrix
y_valid = np.array(emotions_encoded["validation"]["label"])
y_preds = np.argmax(preds_output.predictions, axis=1)
labels = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']
plot_confusion_matrix(y_preds, y_valid, labels)

In [None]:
model.save_pretrained('./model')
tokenizer.save_pretrained('./model')

In [None]:
!transformers-cli login

In [None]:
!sudo apt-get install git-lfs

In [None]:
!git config --global user.email "bhadreshpsavani@gmail.com"
!git config --global user.name "*****"
!git config --global user.password "****"

In [None]:
model.push_to_hub('bert-base-uncased-emotion')

In [None]:
tokenizer.push_to_hub('bert-base-uncased-emotion')