In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    TrainingArguments, Trainer, DataCollatorWithPadding,
    AutoModel, AutoConfig
)
from datasets import load_dataset, Dataset, DatasetDict
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns

2025-10-17 23:09:47.354827: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-10-17 23:09:47.371561: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-10-17 23:09:47.377169: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-10-17 23:09:47.390568: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Using device: {device}")
torch.manual_seed(42)

PyTorch version: 2.9.0+cu128
CUDA available: True
Using device: cuda


<torch._C.Generator at 0x7f80fc013430>

In [5]:
def tokenization_example():
    """Demonstrate tokenization process"""
    print("\n=== TOKENIZATION EXAMPLE ===")
        
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        
    sample_texts = [
        "Hello, how are you today?",
        "The quick brown fox jumps over the lazy dog.",
        "Tokenization is the process of breaking text into tokens."
    ]
        
    for text in sample_texts:
        # Tokenize
        tokens = tokenizer.tokenize(text)
        token_ids = tokenizer.encode(text)
            
        print(f"Original text: {text}")
        print(f"Tokens: {tokens}")
        print(f"Token IDs: {token_ids}")
        print(f"Decoded: {tokenizer.decode(token_ids)}")
        print("-" * 50)
        
    # Batch tokenization
    print("\nBatch tokenization:")
    batch_encoding = tokenizer(sample_texts,padding=True,truncation=True,return_tensors="pt",max_length=128)
        
    print(f"Input IDs shape: {batch_encoding['input_ids'].shape}")
    print(f"Attention mask shape: {batch_encoding['attention_mask'].shape}")

tokenization_example()


=== TOKENIZATION EXAMPLE ===
Original text: Hello, how are you today?
Tokens: ['hello', ',', 'how', 'are', 'you', 'today', '?']
Token IDs: [101, 7592, 1010, 2129, 2024, 2017, 2651, 1029, 102]
Decoded: [CLS] hello, how are you today? [SEP]
--------------------------------------------------
Original text: The quick brown fox jumps over the lazy dog.
Tokens: ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog', '.']
Token IDs: [101, 1996, 4248, 2829, 4419, 14523, 2058, 1996, 13971, 3899, 1012, 102]
Decoded: [CLS] the quick brown fox jumps over the lazy dog. [SEP]
--------------------------------------------------
Original text: Tokenization is the process of breaking text into tokens.
Tokens: ['token', '##ization', 'is', 'the', 'process', 'of', 'breaking', 'text', 'into', 'token', '##s', '.']
Token IDs: [101, 19204, 3989, 2003, 1996, 2832, 1997, 4911, 3793, 2046, 19204, 2015, 1012, 102]
Decoded: [CLS] tokenization is the process of breaking text into tokens. [SEP]
-----

In [7]:
def model_comparison_example():
    """Compare different models on the same task"""
    print("\n=== MODEL COMPARISON EXAMPLE ===")
        
    models_to_compare = [
            "distilbert-base-uncased-finetuned-sst-2-english",
            "cardiffnlp/twitter-roberta-base-sentiment-latest",
            "nlptown/bert-base-multilingual-uncased-sentiment"
    ]
        
    test_texts = [
        "I absolutely love this product!",
        "This is the worst thing I've ever bought.",
        "It's okay, nothing special.",
        "Amazing quality and fast delivery!",
        "Terrible customer service experience."
    ]
        
    results = {}
        
    for model_name in models_to_compare:
        print(f"\nTesting model: {model_name}")
        try:
            from transformers import pipeline
            classifier = pipeline("sentiment-analysis", model=model_name)
                
            model_results = []
            for text in test_texts:
                result = classifier(text)
                model_results.append({
                    'text': text,
                    'label': result[0]['label'],
                    'score': result[0]['score']
                })
                
            results[model_name] = model_results
                
            # Show results for this model
            for result in model_results:
                print(f"  {result['text'][:30]}... -> {result['label']} ({result['score']:.3f})")
                    
        except Exception as e:
            print(f"  Error with {model_name}: {str(e)}")
        
    return results
model_comparison_example()


=== MODEL COMPARISON EXAMPLE ===

Testing model: distilbert-base-uncased-finetuned-sst-2-english


Device set to use cuda:0


  I absolutely love this product... -> POSITIVE (1.000)
  This is the worst thing I've e... -> NEGATIVE (1.000)
  It's okay, nothing special.... -> NEGATIVE (0.819)
  Amazing quality and fast deliv... -> POSITIVE (1.000)
  Terrible customer service expe... -> NEGATIVE (1.000)

Testing model: cardiffnlp/twitter-roberta-base-sentiment-latest


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


  I absolutely love this product... -> positive (0.984)
  This is the worst thing I've e... -> negative (0.953)
  It's okay, nothing special.... -> neutral (0.599)
  Amazing quality and fast deliv... -> positive (0.971)
  Terrible customer service expe... -> negative (0.933)

Testing model: nlptown/bert-base-multilingual-uncased-sentiment


Device set to use cuda:0


  I absolutely love this product... -> 5 stars (0.963)
  This is the worst thing I've e... -> 1 star (0.962)
  It's okay, nothing special.... -> 3 stars (0.847)
  Amazing quality and fast deliv... -> 5 stars (0.920)
  Terrible customer service expe... -> 1 star (0.759)


{'distilbert-base-uncased-finetuned-sst-2-english': [{'text': 'I absolutely love this product!',
   'label': 'POSITIVE',
   'score': 0.9998854398727417},
  {'text': "This is the worst thing I've ever bought.",
   'label': 'NEGATIVE',
   'score': 0.9997859597206116},
  {'text': "It's okay, nothing special.",
   'label': 'NEGATIVE',
   'score': 0.818959653377533},
  {'text': 'Amazing quality and fast delivery!',
   'label': 'POSITIVE',
   'score': 0.9998842477798462},
  {'text': 'Terrible customer service experience.',
   'label': 'NEGATIVE',
   'score': 0.9997796416282654}],
 'cardiffnlp/twitter-roberta-base-sentiment-latest': [{'text': 'I absolutely love this product!',
   'label': 'positive',
   'score': 0.9843719601631165},
  {'text': "This is the worst thing I've ever bought.",
   'label': 'negative',
   'score': 0.9526245594024658},
  {'text': "It's okay, nothing special.",
   'label': 'neutral',
   'score': 0.598635733127594},
  {'text': 'Amazing quality and fast delivery!',
   'l