# ü§ñ Sarcasm + Sentiment Classification
This notebook trains a binary classifier to distinguish sarcastic or negative feedback from genuine positive feedback.

It combines:
- Sarcasm-labeled tweets (`tweet_eval`)
- Sentiment-labeled product reviews (`amazon_polarity`)

And uses a memory-optimized setup for Apple M1 8GB machines.

In [None]:
# üì¶ Install dependencies (if needed)
# !pip install transformers datasets scikit-learn

In [None]:
# üìö Import libraries
import random
import logging
import os
from typing import List, Dict
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
import torch
from sklearn.metrics import accuracy_score, f1_score

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


In [None]:
# üß† Device configuration for Apple M1 or fallback to CPU
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

if torch.backends.mps.is_available():
    torch_device = torch.device("mps")
    logger.info("Using device: MPS (Apple Silicon)")
elif torch.cuda.is_available():
    torch_device = torch.device("cuda")
    logger.info("Using device: CUDA")
else:
    torch_device = torch.device("cpu")
    logger.info("Using device: CPU")

In [None]:
# üì• Load and preprocess the dataset (reduced size for M1 memory)
def pre_labeled_datasets() -> List[Dict]:
    logger.info("Loading datasets with 8GB M1 optimizations...")
    sarcasm_ds = load_dataset("tweet_eval", "irony")
    sentiment_ds = load_dataset("amazon_polarity")

    data = []
    sarcasm_subset = sarcasm_ds["train"].select(range(2000))
    for example in sarcasm_subset:
        text = example["text"]
        if len(text) > 200: continue
        label = "Dislike" if example["label"] == 1 else "Like"
        data.append({"text": text, "label": label})

    sentiment_subset = sentiment_ds["train"].select(range(2000))
    for example in sentiment_subset:
        text = example.get("content") or example.get("text") or example.get("review") or list(example.values())[0]
        if len(text) > 200: continue
        label = "Like" if example["label"] == 1 else "Dislike"
        data.append({"text": text, "label": label})

    random.shuffle(data)
    logger.info(f"Total dataset size after filtering: {len(data)} examples")
    return data

In [None]:
# üè∑Ô∏è Map labels to numerical values for classification
LABEL_MAP = {"Dislike": 0, "Like": 1}

In [None]:
# ‚úÇÔ∏è Tokenization function (optimized with max_length=64 for memory)
def tokenize(example, tokenizer):
    encoding = tokenizer(
        example['text'],
        truncation=True,
        padding=False,
        max_length=64
    )
    encoding['label'] = LABEL_MAP[example['label']]
    return encoding

In [None]:
# ‚úÖ Define evaluation metrics (Accuracy & F1 Score)
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions)
    return {"accuracy": acc, "f1": f1}

In [None]:
# üöÄ Main training function
def train_model(model_name: str = "distilbert-base-uncased"):
    if torch.backends.mps.is_available():
        torch.mps.empty_cache()

    raw_data = pre_labeled_datasets()
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    dataset = Dataset.from_list(raw_data)
    dataset = dataset.map(lambda x: tokenize(x, tokenizer), batched=False, remove_columns=['text'])

    dataset = dataset.train_test_split(test_size=0.2)
    train_dataset = dataset["train"]
    eval_dataset = dataset["test"]

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, torch_dtype=torch.float32)

    training_args = TrainingArguments(
        output_dir="./results",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=1,
        per_device_eval_batch_size=1,
        gradient_accumulation_steps=8,
        num_train_epochs=2,
        weight_decay=0.01,
        logging_dir="./logs",
        logging_steps=50,
        report_to="none",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True,
        dataloader_pin_memory=False,
        dataloader_num_workers=0,
        remove_unused_columns=True,
        save_total_limit=1,
        fp16=False,
        eval_accumulation_steps=1,
        prediction_loss_only=False,
    )

    trainer_obj = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        processing_class=tokenizer,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer, pad_to_multiple_of=None)
    )

    try:
        if torch.backends.mps.is_available():
            torch.mps.empty_cache()
        trainer_obj.train()
        print("\nüìà Evaluating model...")
        metrics = trainer_obj.evaluate()
        print("\n‚úÖ Evaluation Results:", metrics)

        print("\nüíæ Saving final model to ./sarcasm_sentiment_model")
        model.save_pretrained("./sarcasm_sentiment_model")
        tokenizer.save_pretrained("./sarcasm_sentiment_model")
    except RuntimeError as e:
        if "out of memory" in str(e).lower():
            print("\n‚ö†Ô∏è MPS memory error: retrying on CPU")
            train_model_cpu_fallback()
        else:
            raise e

In [None]:
# üß† CPU fallback training (for extreme low-memory cases)
def train_model_cpu_fallback():
    torch_device = torch.device("cpu")
    model_name = "prajjwal1/bert-tiny"
    raw_data = pre_labeled_datasets()
    raw_data = random.sample(raw_data, min(len(raw_data), 2000))

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    dataset = Dataset.from_list(raw_data)
    dataset = dataset.map(lambda x: tokenize(x, tokenizer), remove_columns=['text'])
    dataset = dataset.train_test_split(test_size=0.2)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    model.to(torch_device)

    training_args = TrainingArguments(
        output_dir="./results",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=5e-5,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=2,
        weight_decay=0.01,
        logging_dir="./logs",
        logging_steps=50,
        report_to="none",
        load_best_model_at_end=True,
        save_total_limit=1,
    )

    trainer_obj = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        processing_class=tokenizer,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer)
    )

    trainer_obj.train()
    metrics = trainer_obj.evaluate()
    print("\n‚úÖ Evaluation Results (CPU):", metrics)

    model.save_pretrained("./sarcasm_sentiment_model")
    tokenizer.save_pretrained("./sarcasm_sentiment_model")

In [None]:
# ‚úÖ Run training
train_model()