# **1. Dataset Glance**

In [7]:
import pandas as pd
import numpy as np
import ast
import re
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW  # Imported from torch instead of transformers
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    get_linear_schedule_with_warmup
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score
from tqdm.auto import tqdm
import os

# Strict Reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

Using device: cuda


In [8]:
def preprocess_sentfin_methodology(df):
    processed_rows = []
    
    # Parse the dictionary string in 'Decisions' column
    df['Decisions'] = df['Decisions'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    
    for idx, row in df.iterrows():
        original_headline = row['Title']
        decisions = row['Decisions']
        all_entities = list(decisions.keys())
        
        for target_entity, sentiment in decisions.items():
            # Label mapping: Positive: 0, Negative: 1, Neutral: 2
            label = {"positive": 0, "negative": 1, "neutral": 2}[sentiment]
            
            # Masking Logic
            masked_text = original_headline
            
            # 1. Replace Other entities first
            for ent in all_entities:
                if ent != target_entity:
                    masked_text = re.sub(rf'\b{re.escape(ent)}\b', 'Other', masked_text)
            
            # 2. Replace the Target entity
            masked_text = re.sub(rf'\b{re.escape(target_entity)}\b', 'Target', masked_text)
            
            # 3. Clean special characters (Paper section 3.2.1)
            masked_text = re.sub(r'[^\w\s]', '', masked_text)
            
            processed_rows.append({
                'sentence': masked_text,
                'label': label,
                'split': row['split']
            })
            
    return pd.DataFrame(processed_rows)

# Load raw data
raw_df = pd.read_csv('/kaggle/input/aspect-based-sentiment-analysis-for-financial-news/SEntFiN-v1.1_with_split.csv')
df_processed = preprocess_sentfin_methodology(raw_df)

# Splits
train_full = df_processed[df_processed['split'] == 'train'].reset_index(drop=True)
test_df = df_processed[df_processed['split'] == 'test'].reset_index(drop=True)

# 10% Validation split from train
train_df, val_df = train_test_split(train_full, test_size=0.1, random_state=SEED, stratify=train_full['label'])

print(f"Train: {len(train_df)} | Val: {len(val_df)} | Test: {len(test_df)}")

Train: 9255 | Val: 1029 | Test: 3000


In [9]:
class SEntFiNDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=30):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        encoding = self.tokenizer.encode_plus(
            row['sentence'],
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(row['label'], dtype=torch.long)
        }

# THE PAPER'S SPECIFIC CHECKPOINT FOR ROBERTA (B)
MODEL_NAME = "openai-community/roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

train_loader = DataLoader(SEntFiNDataset(train_df, tokenizer), batch_size=64, shuffle=True)
val_loader = DataLoader(SEntFiNDataset(val_df, tokenizer), batch_size=64)
test_loader = DataLoader(SEntFiNDataset(test_df, tokenizer), batch_size=64)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/624 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [10]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, 
    num_labels=3,
    hidden_dropout_prob=0.2,
    attention_probs_dropout_prob=0.2,
    ignore_mismatched_sizes=True 
)
model.to(DEVICE)

# Weight Decay 0.01 per RoBERTa (B) configuration
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)
total_steps = len(train_loader) * 10
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

best_acc = 0
for epoch in range(10):
    model.train()
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        optimizer.zero_grad()
        input_ids, mask, labels = batch['input_ids'].to(DEVICE), batch['attention_mask'].to(DEVICE), batch['labels'].to(DEVICE)
        outputs = model(input_ids, attention_mask=mask, labels=labels)
        outputs.loss.backward()
        optimizer.step()
        scheduler.step()
    
    # Validation
    model.eval()
    correct = 0
    with torch.no_grad():
        for batch in val_loader:
            input_ids, mask, labels = batch['input_ids'].to(DEVICE), batch['attention_mask'].to(DEVICE), batch['labels'].to(DEVICE)
            preds = torch.argmax(model(input_ids, attention_mask=mask).logits, dim=1)
            correct += (preds == labels).sum().item()
    
    val_acc = correct / len(val_df)
    print(f"Val Accuracy: {val_acc:.4f}")
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'roberta_sentfin_best.pt')
        print("Model Saved.")

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at openai-community/roberta-base-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at openai-community/roberta-base-openai-detector and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([2]) in the checkpoint and torch

Epoch 1:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.7862
Model Saved.


Epoch 2:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8056
Model Saved.


Epoch 3:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8309
Model Saved.


Epoch 4:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8552
Model Saved.


Epoch 5:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8698
Model Saved.


Epoch 6:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8698


Epoch 7:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8814
Model Saved.


Epoch 8:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8795


Epoch 9:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8776


Epoch 10:   0%|          | 0/145 [00:00<?, ?it/s]

Val Accuracy: 0.8785


In [15]:
# Load the Best Model Weights
model.load_state_dict(torch.load('roberta_sentfin_best.pt'))
model.to(DEVICE)
model.eval()

test_preds, test_labels = [], []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        input_ids, mask, labels = batch['input_ids'].to(DEVICE), batch['attention_mask'].to(DEVICE), batch['labels'].to(DEVICE)
        outputs = model(input_ids=input_ids, attention_mask=mask)
        preds = torch.argmax(outputs.logits, dim=1)
        test_preds.extend(preds.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())

target_names = ['Positive', 'Negative', 'Neutral']
print("\n" + "="*60)
print("FINAL RESULTS: RoBERTa (B) on SEntFiN Test Set")
print("="*60)

print(f"Overall Accuracy: {accuracy_score(test_labels, test_preds)*100:.2f}%")
print(f"Overall F1-Score: {f1_score(test_labels, test_preds, average='weighted')*100:.2f}%")
print("-" * 60)

report = classification_report(test_labels, test_preds, target_names=target_names, digits=4, output_dict=True)
print(f"{'Class':<12} | {'Accuracy (%)':<15} | {'F1-Score (%)':<15}")
print("-" * 48)

for label in target_names:
    acc = report[label]['recall'] * 100
    f1 = report[label]['f1-score'] * 100
    print(f"{label:<12} | {acc:<15.2f} | {f1:<15.2f}")
print("="*60)

Testing:   0%|          | 0/47 [00:00<?, ?it/s]


FINAL RESULTS: RoBERTa (B) on SEntFiN Test Set
Overall Accuracy: 86.43%
Overall F1-Score: 86.38%
------------------------------------------------------------
Class        | Accuracy (%)    | F1-Score (%)   
------------------------------------------------
Positive     | 88.92           | 88.22          
Negative     | 90.71           | 88.01          
Neutral      | 80.65           | 83.29          
