In [None]:
import re
from datasets import load_dataset
import torch
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType
import numpy as np
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix
import torch
import gradio as gr

In [None]:
# Text cleaning(Remove non-ASCII characters, keep x20~x7E)
def clean_text(text):
    return re.sub(r"[^\x20-\x7E]", "", text)

# Load CSV
file_path = "/Users/tim/Desktop/self-learning/LLM_tune/all-data.csv"

dataset = load_dataset(
    "csv",
    data_files={"full": file_path},
    encoding="ISO-8859-1",
    column_names=["sentiment", "text"]
)["full"]

print("Dataset loaded. First example:")
print(dataset[0])

# Label mapping & cleaning
def map_label_and_clean(example):
    mapping = {"negative": 0, "neutral": 1, "positive": 2}
    example["text"] = clean_text(example["text"])
    example["labels"] = mapping[example["sentiment"]]
    return example

dataset = dataset.map(map_label_and_clean)

# Split data (80% train, 10% val, 10% test)
split_dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = split_dataset["train"]
temp_dataset = split_dataset["test"]

temp_split = temp_dataset.train_test_split(test_size=0.5, seed=42)
validation_dataset = temp_split["train"]
test_dataset = temp_split["test"]

print("\nDataset split:")
print("Train rows:", len(train_dataset))
print("Validation rows:", len(validation_dataset))
print("Test rows:", len(test_dataset))


Dataset loaded. First example:
{'sentiment': 'neutral', 'text': 'According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .'}

Dataset split:
Train rows: 3876
Validation rows: 485
Test rows: 485


In [None]:
# Load model & tokenizer
model_name = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token  # Avoid padding error

# Tokenization
def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=256  # Reduce memory usage
    )
    tokenized["labels"] = examples["labels"]
    return tokenized

train_tokenized = train_dataset.map(tokenize_function, batched=True)
validation_tokenized = validation_dataset.map(tokenize_function, batched=True)
test_tokenized = test_dataset.map(tokenize_function, batched=True)

print("\nTokenized train first example:")
print(train_tokenized[0])


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Map:   0%|          | 0/3876 [00:00<?, ? examples/s]

Map:   0%|          | 0/485 [00:00<?, ? examples/s]

Map:   0%|          | 0/485 [00:00<?, ? examples/s]


Tokenized train first example:
{'sentiment': 'neutral', 'text': "Under the terms of the agreement , Bunge will acquire Raisio 's Keiju , Makuisa and Pyszny Duet brands and manufacturing plants in Finland and Poland .", 'labels': 1, 'input_ids': [128000, 16648, 279, 3878, 315, 279, 9306, 1174, 426, 14208, 690, 21953, 432, 2852, 822, 364, 82, 6706, 64274, 1174, 40424, 9425, 64, 323, 393, 73445, 3919, 423, 14127, 16097, 323, 15266, 11012, 304, 37355, 323, 28702, 662, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 12

In [None]:
# Load Llama 3.2-1B Instruct
base_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    trust_remote_code=True,
)

# LoRA Configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_CLS  # Sequence classification
)

model = get_peft_model(base_model, lora_config)
print("LoRA model created for Llama 3.2-1B-Instruct.")


config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
LoRA model created for Llama 3.2-1B-Instruct.


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [None]:
# Define evaluation metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

# Training parameters
training_args = TrainingArguments(
    output_dir="./llama3-lora-finetuned",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    per_device_train_batch_size=1,  # Recommended for M2/M1
    per_device_eval_batch_size=1,
    num_train_epochs=10,  # EarlyStoppingCallback will stop early if needed
    learning_rate=2e-5,
    weight_decay=0.01,
    fp16=False,  # False for Apple MPS
    push_to_hub=False,
    logging_steps=50,
)

# Early Stopping (stops if no improvement for 2 evaluations)
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=2,
    early_stopping_threshold=0.0
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=validation_tokenized,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

# Start fine-tuning
train_result = trainer.train()
trainer.save_model("./llama3-lora-finetuned")
print("Training complete. Best model saved.")


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7093,0.822303,0.851546,0.846872
2,0.5809,0.681426,0.874227,0.873473
3,0.6919,0.807535,0.874227,0.874901
4,0.4164,1.165405,0.865979,0.861126
5,0.0085,0.982706,0.872165,0.869927


Training complete. Best model saved.


In [None]:
# Evaluate on test set
test_results = trainer.evaluate(eval_dataset=test_tokenized)
print("\nTest set evaluation results:")
print(test_results)

# Confusion matrix
predictions = trainer.predict(test_tokenized)
pred_labels = np.argmax(predictions.predictions, axis=-1)
true_labels = predictions.label_ids

cm = confusion_matrix(true_labels, pred_labels)
print("\nConfusion Matrix:")
print(cm)



Test set evaluation results:
{'eval_loss': 0.9143152832984924, 'eval_accuracy': 0.8494845360824742, 'eval_f1': 0.8505455053859209, 'eval_runtime': 65.5536, 'eval_samples_per_second': 7.399, 'eval_steps_per_second': 7.399, 'epoch': 5.0}

Confusion Matrix:
[[ 48   9   3]
 [  7 239  36]
 [  0  18 125]]


In [None]:
# Gradio Interface

device = torch.device("mps")  
model.to(device)

# Labels & inference function
label_names = ["negative", "neutral", "positive"]

def predict_sentiment(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=512
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        pred_label = outputs.logits.argmax(dim=-1).item()
    
    return label_names[pred_label]

# Create Gradio interface
demo = gr.Interface(
    fn=predict_sentiment,
    inputs="text",
    outputs="text",
    title="Llama 3.2-1B-Instruct + LoRA + EarlyStopping"
)

# Launch interface
demo.launch()


* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


