In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch.utils.data import DataLoader, Dataset
from torch import nn, optim
from sklearn.model_selection import train_test_split
from pathlib import Path
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

import optuna
import wandb
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

wandb.login(key="d12da696b882ebdf6b786d182d46febc1a77dcdb")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib  # לשמירת ה-LabelEncoder (אופציונלי)

# --- טעינת הדאטה ---
df = pd.read_csv("/kaggle/input/traindataset/processed_train.csv")

# שינוי שם עמודת הטקסט
df = df.rename(columns={'fully_clean_text': 'Tweet'})

# קידוד התוויות ממחרוזות למספרים
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['Sentiment'])

# שמירת המיפוי לשימוש עתידי (אופציונלי)
joblib.dump(label_encoder, "label_encoder.pkl")

# הצגת המיפוי
for i, label in enumerate(label_encoder.classes_):
    print(f"{i} → {label}")

# --- חלוקה ל-Train / Eval 
train_df, eval_df = train_test_split(
    df[['Tweet', 'label']],          # keep only what the model needs
    test_size=0.2,                   # 80/20 split; change if you like
    random_state=42,
    stratify=df['label']             # keep class balance
)

# שמירה של רק העמודות הדרושות למודל
train_df = train_df[['Tweet', 'label']]
eval_df = eval_df[['Tweet', 'label']]


# שמירת קבצים
train_df.to_csv("train_data.csv", index=False)
eval_df.to_csv("eval_data.csv", index=False)



0 → Extremely Negative
1 → Extremely Positive
2 → Negative
3 → Neutral
4 → Positive


In [4]:
#לשקול להחזיר את אמוג'ים, hashtags, mentions############
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# נשתמש במודל BERTweet-Large של VinAI
model_name = "vinai/bertweet-large"

# חשוב! BERTweet לא תומך בטוקניזר המהיר (Fast tokenizer)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

# טען את המודל למטרת Classification עם 5 תוויות
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5).to(device)

model  # הצגת מבנה המודל


config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

2025-08-11 15:33:53.126895: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754926433.318829      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754926433.380453      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=Tru

In [5]:
class TweetDataset(Dataset):
    def __init__(self, dataframe, tokenizer):
        self.texts = dataframe['Tweet'].tolist()
        self.labels = dataframe['label'].tolist() 
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            padding='max_length',
            truncation=True,
            max_length=110,  
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [6]:
def early_stop_check(patience, best_acc, best_acc_epoch, current_acc, current_epoch):
    """
    עצירה מוקדמת לפי Val Accuracy בלבד.
    מחזיר: best_acc, best_acc_epoch, early_stop_flag
    """
    early_stop_flag = False
    if current_acc > best_acc:
        best_acc = current_acc
        best_acc_epoch = current_epoch
    elif current_epoch - best_acc_epoch > patience:
        early_stop_flag = True
    return best_acc, best_acc_epoch, early_stop_flag


In [7]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch
from torch import nn

# חישוב משקלי תוויות לפי הופעה בפועל — על ה-TRAIN בלבד
train_labels = train_df['label'].values
classes = np.unique(train_labels)

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=train_labels
)

# הפיכה לטנסור לשימוש בתוך CrossEntropyLoss
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float, device=device)

# פונקציית הפסד עם משקלים
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

# אופציונלי: להדפיס כדי לדעת מה קיבלת
print("Classes:", classes)
print("Class weights:", class_weights)


Classes: [0 1 2 3 4]
Class weights: [1.49848801 1.2401726  0.82850966 1.07523382 0.71978462]


In [8]:
def train_model_with_hyperparams(model, train_loader, val_loader, optimizer, criterion, epochs, patience, trial):
    best_val_accuracy = 0.0
    best_val_accuracy_epoch = 0
    early_stop_flag = False
    best_model_state = None

    for epoch in range(1, epochs + 1):
        model.train()
        train_loss = 0.0
        total_train_samples = 0
        correct_train_predictions = 0

        for batch in train_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = criterion(logits, labels)

            loss.backward()
            optimizer.step()

            train_loss += loss.item() * input_ids.size(0)
            total_train_samples += input_ids.size(0)
            correct_train_predictions += (logits.argmax(dim=1) == labels).sum().item()

        train_loss /= total_train_samples
        train_accuracy = correct_train_predictions / total_train_samples

        model.eval()
        val_loss = 0.0
        total_val_samples = 0
        correct_val_predictions = 0
        all_val_labels = []
        all_val_preds = []

        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                loss = criterion(logits, labels)

                val_loss += loss.item() * input_ids.size(0)
                total_val_samples += input_ids.size(0)
                correct_val_predictions += (logits.argmax(dim=1) == labels).sum().item()

                all_val_labels.extend(labels.cpu().numpy())
                all_val_preds.extend(logits.argmax(dim=1).cpu().numpy())

        val_loss /= total_val_samples
        val_accuracy = correct_val_predictions / total_val_samples

        val_precision = precision_score(all_val_labels, all_val_preds, average='weighted', zero_division=0)
        val_recall = recall_score(all_val_labels, all_val_preds, average='weighted', zero_division=0)
        val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted', zero_division=0)

        best_val_accuracy, best_val_accuracy_epoch, early_stop_flag = early_stop_check(
            patience, best_val_accuracy, best_val_accuracy_epoch, val_accuracy, epoch)

        if val_accuracy == best_val_accuracy:
            best_model_state = model.state_dict()

        wandb.log({
            "Epoch": epoch,
            "Train Loss": train_loss,
            "Train Accuracy": train_accuracy,
            "Validation Loss": val_loss,
            "Validation Accuracy": val_accuracy,
            "Validation Precision": val_precision,
            "Validation Recall": val_recall,
            "Validation F1": val_f1
        })

        if early_stop_flag:
            break

    if best_model_state is not None:
        torch.save(best_model_state, f"best_model_trial_{trial.number}.pt")

    return best_val_accuracy


In [9]:
# Objective Function for Optuna
def objective(trial):
    # === Hyperparameter suggestions ===
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-4, log=True)
    patience = trial.suggest_int("patience", 2, 5)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    num_layers = trial.suggest_int("num_layers", 1, 3)

    # === Tokenizer and Dataset ===
    tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-large", use_fast=False)

    train_dataset = TweetDataset(train_df, tokenizer)
    val_dataset = TweetDataset(eval_df, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # === Load BERTweet Model ===
    model = AutoModelForSequenceClassification.from_pretrained(
        "vinai/bertweet-large", num_labels=5
    ).to(device)

    # === Freeze all layers first ===
    for param in model.roberta.parameters():
        param.requires_grad = False

    # === Unfreeze the last `num_layers` of encoder ===
    for param in model.roberta.encoder.layer[-num_layers:].parameters():
        param.requires_grad = True

    # === Unfreeze the classification head ===
    for param in model.classifier.parameters():
        param.requires_grad = True

    # === Define loss with class weights ===
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

    # === Optimizer ===
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # === Initialize W&B for tracking ===
    wandb.init(
        project="bertweet-sentiment",
        config={
            "learning_rate": learning_rate,
            "weight_decay": weight_decay,
            "patience": patience,
            "batch_size": batch_size,
            "num_layers": num_layers,
            "architecture": "BERTweet-Large",
            "dataset": "covid-tweets"
        },
        name=f"trial_{trial.number}"
    )

    # === Train and evaluate ===
    best_val_accuracy = train_model_with_hyperparams(
        model, train_loader, val_loader, optimizer, criterion, epochs=10,
        patience=patience, trial=trial
    )

    wandb.finish()
    return best_val_accuracy

In [10]:
# יצירת Study של Optuna - למקסם Validation Accuracy
study = optuna.create_study(
    study_name="BERT_Accuracy_Study",
    direction="maximize"
)

# הרצה של 5 ניסויים
study.optimize(objective, n_trials=5)

# הדפסת התוצאה הטובה ביותר
print(f"\nBest Validation Accuracy: {study.best_value:.4f}")
print("Best hyperparameters:", study.best_params)

# שמירת התוצאות (אופציונלי)
joblib.dump(study, "optuna_berttweet_accuracy_study.pkl")


[I 2025-08-11 15:34:11,078] A new study created in memory with name: BERT_Accuracy_Study
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▆▅▄▃▃▂▂▁▁
Validation Accuracy,▁▄▅▅▄█▇██▇
Validation F1,▁▅▅▅▄█▇██▇
Validation Loss,▆▃▂▂▅▁▄▄▆█
Validation Precision,▁▄▆▅▅█▇███
Validation Recall,▁▄▅▅▄█▇██▇

0,1
Epoch,10.0
Train Accuracy,0.87829
Train Loss,0.28987
Validation Accuracy,0.67949
Validation F1,0.67449
Validation Loss,1.01193
Validation Precision,0.68735
Validation Recall,0.67949


[I 2025-08-11 16:45:03,664] Trial 0 finished with value: 0.6950803701899659 and parameters: {'learning_rate': 4.468780967980255e-05, 'weight_decay': 2.7116136370918185e-05, 'patience': 2, 'batch_size': 128, 'num_layers': 3}. Best is trial 0 with value: 0.6950803701899659.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▄▅▆▆▇▇███
Train Loss,█▅▄▃▃▂▂▂▁▁
Validation Accuracy,▁▁▄▅▆▆██▇▇
Validation F1,▁▂▄▅▆▅██▇▇
Validation Loss,█▆▄▃▂▂▂▁▁▁
Validation Precision,▁▃▄▅▆▆█▇█▇
Validation Recall,▁▁▄▅▆▆██▇▇

0,1
Epoch,10.0
Train Accuracy,0.65053
Train Loss,0.77415
Validation Accuracy,0.61617
Validation F1,0.60519
Validation Loss,0.8399
Validation Precision,0.62621
Validation Recall,0.61617


[I 2025-08-11 17:47:36,750] Trial 1 finished with value: 0.6295664880662445 and parameters: {'learning_rate': 4.9786157368274445e-05, 'weight_decay': 2.230465513571302e-05, 'patience': 2, 'batch_size': 64, 'num_layers': 1}. Best is trial 0 with value: 0.6950803701899659.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▆▅▄▃▃▂▂▁▁
Validation Accuracy,▁▂▇▆▆▇█▇██
Validation F1,▁▂▇▆▆▇▇▇██
Validation Loss,█▆▁▁▁▂▃▆▄▇
Validation Precision,▁▂▆▆▆▇▇▇▇█
Validation Recall,▁▂▇▆▆▇█▇██

0,1
Epoch,10.0
Train Accuracy,0.83334
Train Loss,0.39355
Validation Accuracy,0.67414
Validation F1,0.67313
Validation Loss,0.9757
Validation Precision,0.6813
Validation Recall,0.67414


[I 2025-08-11 19:00:33,255] Trial 2 finished with value: 0.674135411592791 and parameters: {'learning_rate': 1.5072481269449607e-05, 'weight_decay': 7.010303283144758e-05, 'patience': 5, 'batch_size': 32, 'num_layers': 3}. Best is trial 0 with value: 0.6950803701899659.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
Epoch,▁▂▄▅▇█
Train Accuracy,▂▅▆▁▆█
Train Loss,█▄▃▂▁▁
Validation Accuracy,██▂▁█▂
Validation F1,██▂▁█▂
Validation Loss,▇█▁▁▁▃
Validation Precision,██▂▁█▂
Validation Recall,██▂▁█▂

0,1
Epoch,6.0
Train Accuracy,0.21054
Train Loss,1.61375
Validation Accuracy,0.18607
Validation F1,0.05838
Validation Loss,1.62073
Validation Precision,0.03462
Validation Recall,0.18607


[I 2025-08-11 19:44:24,560] Trial 3 finished with value: 0.27788602045786653 and parameters: {'learning_rate': 0.0005056541174507694, 'weight_decay': 5.831294042677587e-06, 'patience': 4, 'batch_size': 32, 'num_layers': 3}. Best is trial 0 with value: 0.6950803701899659.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
Epoch,▁▂▃▄▅▆▇█
Train Accuracy,▂▂▁▇▄▇▅█
Train Loss,█▅▃▃▂▂▁▁
Validation Accuracy,▆▁▂█▁▂▂█
Validation F1,▆▁▂█▁▂▂█
Validation Loss,▅█▄▂▂▁▁▂
Validation Precision,▆▁▂█▁▂▂█
Validation Recall,▆▁▂█▁▂▂█

0,1
Epoch,8.0
Train Accuracy,0.20984
Train Loss,1.61217
Validation Accuracy,0.27789
Validation F1,0.12086
Validation Loss,1.61374
Validation Precision,0.07722
Validation Recall,0.27789


[I 2025-08-11 20:42:52,801] Trial 4 finished with value: 0.27788602045786653 and parameters: {'learning_rate': 0.00037662452485454344, 'weight_decay': 1.141876508702439e-06, 'patience': 3, 'batch_size': 32, 'num_layers': 3}. Best is trial 0 with value: 0.6950803701899659.



Best Validation Accuracy: 0.6951
Best hyperparameters: {'learning_rate': 4.468780967980255e-05, 'weight_decay': 2.7116136370918185e-05, 'patience': 2, 'batch_size': 128, 'num_layers': 3}


['optuna_berttweet_accuracy_study.pkl']