In [None]:
import requests
print(requests.get("https://huggingface.co").status_code)


200


In [None]:
# !pip install -U \
#   transformers==4.28.1 \
#   huggingface_hub==0.15.1 \
#   tokenizers==0.12.1 \
#   requests==2.31.0 \
#   --no-deps


In [None]:
import pandas as pd
import numpy as np
import ast
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW  # Imported from torch instead of transformers
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup
)
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import os
os.environ["TRANSFORMERS_OFFLINE"] = "1"

# Strict Reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

Using device: cuda


In [None]:
import requests

try:
    r = requests.get("https://huggingface.co", timeout=5)
    print("Internet ON:", r.status_code)
except Exception as e:
    print("Internet OFF:", e)


Internet ON: 200


In [None]:
# Load Dataset
data_path = '/content/SEntFiN-v1.1_with_split.csv'
df_raw = pd.read_csv(data_path)

def preprocess_target_other(df):
    """
    Transforms headlines into 'Target'/'Other' masked instances strictly
    following SEntFiN methodology (Sources 649, 979).
    """
    processed_rows = []

    # Parse the dictionary string in 'Decisions' column
    df['Decisions'] = df['Decisions'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

    for idx, row in df.iterrows():
        original_sentence = row['Title']
        decisions = row['Decisions']

        # Get all entities in this headline
        all_entities = list(decisions.keys())

        # Create one instance per entity
        for target_entity, sentiment in decisions.items():
            # 1. Label Encoding
            if sentiment == "positive":
                label = 0
            elif sentiment == "negative":
                label = 1
            elif sentiment == "neutral":
                label = 2
            else:
                continue # Skip invalid labels

            # 2. Masking Logic (Source 649)
            # Create a copy of the sentence to modify
            masked_sentence = original_sentence

            # We must be careful not to replace substrings of other entities incorrectly.
            # Strategy: Replace Target with a placeholder, then Others with placeholders, then finalize.

            # Replace the Target Entity strictly
            # Note: A robust replace would use character indices, but dataset doesn't provide them.
            # We assume unique string matching as a baseline approximation.
            masked_sentence = masked_sentence.replace(target_entity, "TARGET_TOKEN")

            # Replace Other Entities
            for other_entity in all_entities:
                if other_entity != target_entity:
                    masked_sentence = masked_sentence.replace(other_entity, "OTHER_TOKEN")

            # Finalize Tokens
            masked_sentence = masked_sentence.replace("TARGET_TOKEN", "Target")
            masked_sentence = masked_sentence.replace("OTHER_TOKEN", "Other")

            # 3. Clean Punctuation (Source 648)
            # Simple removal of special chars, keeping basic structure
            masked_sentence = "".join([c for c in masked_sentence if c.isalnum() or c.isspace()])

            processed_rows.append({
                "sentence": masked_sentence,
                "label": label,
                "original_entity": target_entity,
                "split": row['split']
            })

    return pd.DataFrame(processed_rows)

# Apply Preprocessing
df_processed = preprocess_target_other(df_raw)

# Split according to the dataset's 'split' column
train_df = df_processed[df_processed['split'] == 'train'].reset_index(drop=True)
test_df = df_processed[df_processed['split'] == 'test'].reset_index(drop=True)

# Create 10% Validation Split from Train (Source 873)
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=SEED, stratify=train_df['label'])

print(f"Train Size: {len(train_df)}")
print(f"Val Size: {len(val_df)}")
print(f"Test Size: {len(test_df)}")
print("Sample Instance:", train_df.iloc[0]['sentence'])

Train Size: 9255
Val Size: 1029
Test Size: 3000
Sample Instance: FII buying in Target crosses limit no further purchase


In [None]:
class FinBERTDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=30): # Max len 30 from Source 880
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = row['sentence']
        label = row['label']

        # Tokenize (Source 852 - standard classification encoding)
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Initialize Tokenizer
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

train_dataset = FinBERTDataset(train_df, tokenizer)
val_dataset = FinBERTDataset(val_df, tokenizer)
test_dataset = FinBERTDataset(test_df, tokenizer)

# Batch Size 64 (Source 880)
# Note: If you hit OOM on Kaggle T4, reduce to 32, but 64 is the paper spec.
BATCH_SIZE = 64

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    "ProsusAI/finbert",
    num_labels=3,
    hidden_dropout_prob=0.2,    # Source 880
    attention_probs_dropout_prob=0.2 # Source 880
)
model.to(DEVICE)

# Hyperparameters
EPOCHS = 10
LR = 2e-5
total_steps = len(train_loader) * EPOCHS
warmup_steps = int(0.2 * total_steps) # Source 880: warm-up proportion 0.2

optimizer = AdamW(model.parameters(), lr=LR)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)

# Training Function
def train_epoch(model, data_loader, optimizer, scheduler, device):
    model.train()
    losses = []
    correct_predictions = 0

    for batch in tqdm(data_loader, desc="Training"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        losses.append(loss.item())

        preds = torch.argmax(outputs.logits, dim=1)
        correct_predictions += torch.sum(preds == labels)

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    return correct_predictions.double() / len(data_loader.dataset), np.mean(losses)

def eval_model(model, data_loader, device):
    model.eval()
    losses = []
    correct_predictions = 0

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            loss = outputs.loss
            losses.append(loss.item())

            preds = torch.argmax(outputs.logits, dim=1)
            correct_predictions += torch.sum(preds == labels)

    return correct_predictions.double() / len(data_loader.dataset), np.mean(losses)

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [None]:
best_accuracy = 0

for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1}/{EPOCHS}")
    print("-" * 10)

    train_acc, train_loss = train_epoch(model, train_loader, optimizer, scheduler, DEVICE)
    print(f"Train loss {train_loss} accuracy {train_acc}")

    val_acc, val_loss = eval_model(model, val_loader, DEVICE)
    print(f"Val   loss {val_loss} accuracy {val_acc}")

    # Save Best Model
    if val_acc > best_accuracy:
        torch.save(model.state_dict(), 'finbert_sentfin_best.pt')
        best_accuracy = val_acc
        print("=> Saved Best Model")

Epoch 1/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.8102547004305083 accuracy 0.6538087520259319
Val   loss 0.6320383355898016 accuracy 0.7444120505344994
=> Saved Best Model
Epoch 2/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d4f047fbd80>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d4f047fbd80>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Train loss 0.5872860583765753 accuracy 0.7587250135062129
Val   loss 0.5265926894019631 accuracy 0.8134110787172011
=> Saved Best Model
Epoch 3/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.456789971014549 accuracy 0.8232306861156132
Val   loss 0.46877950079300823 accuracy 0.8357628765792031
=> Saved Best Model
Epoch 4/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.3705157913010696 accuracy 0.8586709886547812
Val   loss 0.4005771445877412 accuracy 0.8629737609329445
=> Saved Best Model
Epoch 5/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.30743365524144006 accuracy 0.8858995137763371
Val   loss 0.37338754271759705 accuracy 0.8726919339164236
=> Saved Best Model
Epoch 6/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.26432085468851285 accuracy 0.9044840626688277


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d4f047fbd80>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d4f047fbd80>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Val   loss 0.38187581037773805 accuracy 0.8736637512147716
=> Saved Best Model
Epoch 7/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.22312296279545488 accuracy 0.9189627228525122
Val   loss 0.3948177695274353 accuracy 0.8843537414965986
=> Saved Best Model
Epoch 8/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.19928784742951394 accuracy 0.9308481901674771
Val   loss 0.39246098302743015 accuracy 0.880466472303207
Epoch 9/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d4f047fbd80>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d4f047fbd80>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Train loss 0.18329684043238903 accuracy 0.9375472717450027
Val   loss 0.39863657250123863 accuracy 0.8824101068999027
Epoch 10/10
----------


Training:   0%|          | 0/145 [00:00<?, ?it/s]

Train loss 0.16693402153664622 accuracy 0.9415451107509455
Val   loss 0.3989934233181617 accuracy 0.8824101068999027


In [None]:
from sklearn.metrics import classification_report, f1_score, accuracy_score

# 1. Load the Best Model Weights
model.load_state_dict(torch.load('finbert_sentfin_best.pt'))
model.to(DEVICE)
model.eval()

test_preds = []
test_labels = []

# 2. Inference on Test Set
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        labels = batch['labels'].to(DEVICE)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        preds = torch.argmax(outputs.logits, dim=1)

        test_preds.extend(preds.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())

# 3. Calculate Metrics
# Mapping: 0 -> Positive, 1 -> Negative, 2 -> Neutral (as defined in Preprocessing)
target_names = ['Positive', 'Negative', 'Neutral']

print("\n" + "="*60)
print("FINAL RESULTS: FinBERT on SEntFiN Test Set")
print("="*60)

# Overall Metrics
overall_acc = accuracy_score(test_labels, test_preds)
overall_f1 = f1_score(test_labels, test_preds, average='weighted')
print(f"Overall Accuracy: {overall_acc*100:.2f}%")
print(f"Overall F1-Score: {overall_f1*100:.2f}%")
print("-" * 60)

# Per-Class Metrics (Matching Table 5 in Paper)
report = classification_report(test_labels, test_preds, target_names=target_names, digits=4, output_dict=True)

print(f"{'Class':<12} | {'Accuracy (%)':<15} | {'F1-Score (%)':<15}")
print("-" * 48)

for label in target_names:
    # Note: In per-class context, Recall is the standard proxy for "Accuracy on class X"
    # (i.e. What % of actual Positives were predicted as Positive)
    acc = report[label]['recall'] * 100
    f1 = report[label]['f1-score'] * 100
    print(f"{label:<12} | {acc:<15.2f} | {f1:<15.2f}")

print("="*60)

Testing:   0%|          | 0/47 [00:00<?, ?it/s]


FINAL RESULTS: FinBERT on SEntFiN Test Set
Overall Accuracy: 86.77%
Overall F1-Score: 86.72%
------------------------------------------------------------
Class        | Accuracy (%)    | F1-Score (%)   
------------------------------------------------
Positive     | 89.77           | 88.11          
Negative     | 89.88           | 88.63          
Neutral      | 81.38           | 83.86          
