In [3]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.

In [4]:
import copy
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForMaskedLM,
)
from datasets import load_dataset
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.stats import entropy

In [5]:
def tokenize_batch(tokenizer, texts, device, max_len=512):
    enc = tokenizer(
        texts,
        truncation=True,
        padding='max_length',
        max_length=max_len,
        return_tensors='pt'
    )
    return {k: v.to(device) for k,v in enc.items()}

In [6]:

def preprocess(text):
    toks, out = text.split(), []
    for t in toks:
        if t.startswith('@') and len(t)>1:
            out.append('@user')
        elif t.startswith('http'):
            out.append('http')
        else:
            out.append(t)
    return " ".join(out)

In [7]:
def evaluate(model, tokenizer, texts, labels, device, max_len=512):
    model.eval()
    preds = []
    with torch.no_grad():
        for t in texts:
            enc = tokenize_batch(tokenizer, [preprocess(t)], device, max_len)
            logits = model(**enc).logits
            preds.append(int(logits.argmax(dim=-1).cpu()))
    acc = sum(int(p==g) for p,g in zip(preds, labels)) / len(labels)
    return acc, preds

In [12]:
import copy
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForMaskedLM,
)
from datasets import load_dataset
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.stats import entropy

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import zipfile

# Unzip model


# Then load model and tokenizer from the extracted directory
model = AutoModelForSequenceClassification.from_pretrained("/content/model_sa_BEST/kaggle/working/cat_tweeteval_model")
tokenizer = AutoTokenizer.from_pretrained("/content/model_sa_BEST/kaggle/working/cat_tweeteval_model")

model.to(device)

# 1) Load CardiffNLP's pre-trained sentiment model & tokenizer
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer1  = AutoTokenizer.from_pretrained(MODEL_NAME)
model1      = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
model1.to(device)
ood_specs = {
      "FinancialPhraseBank":("takala/financial_phrasebank","sentences_allagree","sentence","label"),
      "IMDB":("imdb",None,"text","label"),
      "FiQA":("TheFinAI/fiqa-sentiment-classification",None,"sentence","score"),
      "StockTweet":("kekunh/stock-related-tweets-vol1",None,"text","label"),
      "Amazon":("amazon_polarity",None,"content","label"),
      "Yelp":("yelp_review_full",None,"text","label"),
    }
for name,(path,cfg,tc,lc) in ood_specs.items():
    ds = load_dataset(path, cfg, split=("test" if name=="IMDB" else "train"))
    txts = ds[tc]; lbs = ds[lc]
    if name=="FiQA":
        lbs = [0 if s<0 else 1 if s==0 else 2 for s in lbs]
    acc,_ = evaluate(model,tokenizer, txts, lbs, device)
    print(f"Finetuned Model:{name:20s}: {acc*100:.2f}%")
    acc,_ = evaluate(model1,tokenizer1, txts, lbs, device)
    print(f"base Model : {name:20s}: {acc*100:.2f}%")


Finetuned Model:FinancialPhraseBank : 66.30%
base Model : FinancialPhraseBank : 70.58%


README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Finetuned Model:IMDB                : 46.46%
base Model : IMDB                : 50.52%


README.md:   0%|          | 0.00/1.67k [00:00<?, ?B/s]

(…)-00000-of-00001-aeefa1eadf5be10b.parquet:   0%|          | 0.00/61.8k [00:00<?, ?B/s]

(…)-00000-of-00001-0fb9f3a47c7d0fce.parquet:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

(…)-00000-of-00001-51867fe1ac59af78.parquet:   0%|          | 0.00/13.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/822 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/234 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/117 [00:00<?, ? examples/s]

Finetuned Model:FiQA                : 22.51%
base Model : FiQA                : 35.04%


README.md:   0%|          | 0.00/309 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.96M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/53683 [00:00<?, ? examples/s]

Finetuned Model:StockTweet          : 0.00%


KeyboardInterrupt: 