In [1]:
# prompt: drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Fine-Tune LLaMA 2 (7B) on Your Sentiment Dataset in Colab

# 1. Install dependencies

!pip install -q --upgrade transformers datasets accelerate peft bitsandbytes trl

# 2. Imports
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from datasets import load_dataset, Dataset
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    IntervalStrategy
)
from huggingface_hub import notebook_login

# Ensure PEFT imports
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
# Authenticate to Hugging Face interactively
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
balanced_tweets = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Sentiment_tweeter/Balanced_Tweets.csv')
SEED = 42


# Use 80% for training, 20% for testing
train_texts, test_texts, train_labels, test_labels = train_test_split(
    balanced_tweets['CleanTweet'],
    balanced_tweets['Sentiment'],
    test_size=0.2,
    random_state=SEED,
    stratify=balanced_tweets['Sentiment']
)
train_dataset = Dataset.from_dict({
    'CleanTweet': train_texts,
    'Sentiment': train_labels
})
test_dataset = Dataset.from_dict({
    'CleanTweet': test_texts,
    'Sentiment': test_labels
})

In [10]:
# 5. Load tokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Ensure padding token is defined
tokenizer.pad_token = tokenizer.eos_token

In [11]:
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    num_labels=3
)
# Reduce memory by checkpointing
model.gradient_checkpointing_enable()
# Ensure pad token ID
model.config.pad_token_id = tokenizer.eos_token_id



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-chat-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:

# 6. Prepare model for k-bit training and apply LoRA
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=["q_proj", "v_proj"],
    task_type="SEQ_CLS"
)
# now apply PEFT
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

In [13]:
def tokenize_function(examples):
    """
    Μετατρέπει τα κείμενα σε tokens χρησιμοποιώντας τον tokenizer του προεκπαιδευμένου μοντέλου.

    Κάθε κείμενο μετατρέπεται σε ακολουθία tokens με padding μέχρι το μέγιστο μήκος (max_length)
    και με περικοπή (truncation) ώστε να μην ξεπερνά το όριο των 512 tokens.

    :param examples: Λεξικό που περιέχει το κείμενο υπό το κλειδί 'text'.
    :return: Λεξικό με τα tokenized αποτελέσματα.
    """
    return tokenizer(
        examples['CleanTweet'],
        padding="max_length",  # Προσθήκη padding ώστε όλα τα sequences να έχουν το ίδιο μήκος.
        truncation=True,       # Ενεργοποίηση του truncation για να περιοριστούν τα sequences στο max_length.
        max_length=512,        # Ορισμός μέγιστου μήκους ακολουθίας.
    )

# Εφαρμογή της συνάρτησης tokenization στο σύνολο εκπαίδευσης και δοκιμής σε παρτίδες.
train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Μετονομασία της στήλης "variety" σε "labels", όπως απαιτείται από το μοντέλο της Hugging Face.
train_dataset = train_dataset.rename_column("Sentiment", "labels")
test_dataset = test_dataset.rename_column("Sentiment", "labels")

# Ορισμός του format των δεδομένων σε PyTorch tensors για να μπορούν να χρησιμοποιηθούν από το μοντέλο.
train_dataset.set_format("torch")
test_dataset.set_format("torch")

Map:   0%|          | 0/7200 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

In [14]:
# Define compute_metrics function

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="macro")
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

In [12]:
# Ορισμός του callback για early stopping, ώστε να σταματήσει η εκπαίδευση εάν δεν υπάρχει βελτίωση.
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=2,             # Αριθμός εποχών χωρίς βελτίωση πριν τη διακοπή της εκπαίδευσης.
    early_stopping_threshold=0.001         # Κατώφλι βελτίωσης που πρέπει να επιτευχθεί για να θεωρηθεί ότι υπάρχει πρόοδος.
)

In [15]:

# 10. Training arguments
training_args = TrainingArguments(
    output_dir="./lora_results",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=5,
    learning_rate=1e-4,
    eval_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    fp16=True,
)

In [16]:
# 11. Trainer setup and training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)



No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [19]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [17]:
# 9. Train the model
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msavsko08[0m ([33msavsko08-university-of-patras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7797,0.744764,0.685556,0.698072,0.685556,0.684613
2,0.6077,0.594036,0.758889,0.760501,0.758889,0.758809
3,0.3782,0.631465,0.764444,0.766664,0.764444,0.764088
4,0.213,0.83359,0.761111,0.763983,0.761111,0.760592
5,0.0955,1.12794,0.755,0.75496,0.755,0.754941


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=2250, training_loss=0.4732389710744222, metrics={'train_runtime': 16169.9587, 'train_samples_per_second': 2.226, 'train_steps_per_second': 0.139, 'total_flos': 7.171542614016e+17, 'train_loss': 0.4732389710744222, 'epoch': 5.0})

In [19]:
results = trainer.evaluate()
print(results)

{'eval_loss': 0.6314646601676941, 'eval_accuracy': 0.7644444444444445, 'eval_precision': 0.7666638694079801, 'eval_recall': 0.7644444444444445, 'eval_f1': 0.7640882708345714, 'eval_runtime': 274.3357, 'eval_samples_per_second': 6.561, 'eval_steps_per_second': 6.561, 'epoch': 5.0}


In [18]:

# After training:
model.save_pretrained('/content/drive/MyDrive/llama2-sentiment-finetuned')
tokenizer.save_pretrained('/content/drive/MyDrive/llama2-sentiment-finetuned')

('/content/drive/MyDrive/llama2-sentiment-finetuned/tokenizer_config.json',
 '/content/drive/MyDrive/llama2-sentiment-finetuned/special_tokens_map.json',
 '/content/drive/MyDrive/llama2-sentiment-finetuned/tokenizer.model',
 '/content/drive/MyDrive/llama2-sentiment-finetuned/added_tokens.json',
 '/content/drive/MyDrive/llama2-sentiment-finetuned/tokenizer.json')

In [20]:

from sklearn.metrics import classification_report
import numpy as np

# Πρόβλεψη στο test set χρησιμοποιώντας τον εκπαιδευμένο trainer.
predictions = trainer.predict(test_dataset)

# Εξαγωγή των αληθινών ετικετών (labels) από το test_dataset.
y_true = np.array([example['labels'] for example in test_dataset])

# Εξαγωγή των προβλεπόμενων ετικετών: επιλέγουμε την κλάση με τη μεγαλύτερη τιμή πιθανοτήτων για κάθε δείγμα.
y_pred = np.argmax(predictions.predictions, axis=1)


# Εκτύπωση του Classification Report για την αξιολόγηση της απόδοσης του μοντέλου.
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=['Negative', 'Neutral', 'Positive']))

Classification Report:
              precision    recall  f1-score   support

    Negative       0.76      0.76      0.76       600
     Neutral       0.74      0.82      0.78       600
    Positive       0.80      0.71      0.76       600

    accuracy                           0.76      1800
   macro avg       0.77      0.76      0.76      1800
weighted avg       0.77      0.76      0.76      1800

