<a href="https://colab.research.google.com/github/teresaliau/DSA4213Assignement3/blob/main/Experiments%2Binteractve.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
from google.colab import drive
drive.mount('/content/drive')

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import json


print("Loading tokenizer")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Load models
model_path_full = '/content/drive/MyDrive/DSA4213_Models/model_full_finetuned'
model_path_lora = '/content/drive/MyDrive/DSA4213_Models/model_lora_adapter'

# Load models
model_full = AutoModelForSequenceClassification.from_pretrained(model_path_full, num_labels=6)
model_lora = AutoModelForSequenceClassification.from_pretrained(model_path_lora, num_labels=6)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading tokenizer


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_full.to(device)
model_lora.to(device)

# Define label names
label_names = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

# Tokenization function
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

# Sample dataset (use your actual dataset here)
dataset = [
    {"text": "I'm so excited about this!"},
    {"text": "I feel really sad and alone"},
    {"text": "I love you so much"},
    {"text": "This makes me so angry!"}
]

# Tokenize the dataset (assuming your dataset is a list of dicts)
print("\nTokenizing datasets...")
tokenized_datasets = [tokenize_function(ex) for ex in dataset]

# Prediction function
def predict_emotion(text):
    """Predict emotion for your text using both Full FT and LoRA models"""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Full FT prediction
    model_full.eval()
    with torch.no_grad():
        outputs_full = model_full(**inputs)
        probs_full = torch.nn.functional.softmax(outputs_full.logits, dim=-1)[0]
        pred_full = torch.argmax(probs_full).item()

    # LoRA prediction
    model_lora.eval()
    with torch.no_grad():
        outputs_lora = model_lora(**inputs)
        probs_lora = torch.nn.functional.softmax(outputs_lora.logits, dim=-1)[0]
        pred_lora = torch.argmax(probs_lora).item()

    # Display results and compare predictions
    print(f"\n \"{text}\"")
    print(f"{'─'*60}")
    print(f"Full FT → {label_names[pred_full].upper():10s} ({probs_full[pred_full]*100:.1f}%)")
    print(f"LoRA    → {label_names[pred_lora].upper():10s} ({probs_lora[pred_lora]*100:.1f}%)")

    if pred_full == pred_lora:
        print(f":D Agree: {label_names[pred_full].upper()}")
    else:
        print(f":( Disagree! {label_names[pred_full].upper()} vs {label_names[pred_lora].upper()}")

    return {
        'text': text,
        'full_ft': label_names[pred_full],
        'full_conf': float(probs_full[pred_full]),
        'lora': label_names[pred_lora],
        'lora_conf': float(probs_lora[pred_lora]),
        'agree': pred_full == pred_lora
    }

# Test examples
examples = [
    "I'm so excited about this!",
    "I feel really sad and alone",
    "I love you so much",
    "This makes me so angry!",
    "Oh, great. Another day of happiness, just wonderful!"
]



print("\nTesting examples:")
for ex in examples:
    predict_emotion(ex)




Tokenizing datasets...

Testing examples:

 "I'm so excited about this!"
────────────────────────────────────────────────────────────
Full FT → JOY        (99.8%)
LoRA    → JOY        (99.8%)
:D Agree: JOY

 "I feel really sad and alone"
────────────────────────────────────────────────────────────
Full FT → SADNESS    (99.9%)
LoRA    → SADNESS    (100.0%)
:D Agree: SADNESS

 "I love you so much"
────────────────────────────────────────────────────────────
Full FT → LOVE       (92.1%)
LoRA    → LOVE       (97.8%)
:D Agree: LOVE

 "This makes me so angry!"
────────────────────────────────────────────────────────────
Full FT → ANGER      (99.8%)
LoRA    → ANGER      (100.0%)
:D Agree: ANGER

 "Oh, great. Another day of happiness, just wonderful!"
────────────────────────────────────────────────────────────
Full FT → JOY        (99.8%)
LoRA    → JOY        (98.7%)
:D Agree: JOY


In [None]:
# Interactive input
print("\n" + "="*70)
print("Type sentences (Enter to stop):")
print("="*70)

user_tests = []
while True:
    text = input("\n  Sentence: ").strip()
    if not text:
        break
    result = predict_emotion(text)
    user_tests.append(result)

# Save user tests to a file
if user_tests:
    with open('user_predictions.json', 'w') as f:
        json.dump(user_tests, f, indent=2)
    print(f"\nSaved {len(user_tests)} predictions to user_predictions.json")

print("\nDemo complete!")