In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModel, TrainingArguments, Trainer, DataCollatorWithPadding

# 1. Create a custom dataset class that returns tokenized text,
#    numerical features, and labels.
class FinancialNewsDataset(Dataset):
    def __init__(self, texts, numerical_features, labels, tokenizer, max_length=128):
        self.texts = texts
        self.numerical_features = numerical_features
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        numerical = self.numerical_features[idx]
        label = self.labels[idx]
        
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors="pt"
        )
        # Remove extra batch dimension
        encoding = {k: v.squeeze(0) for k, v in encoding.items()}
        encoding["numerical"] = torch.tensor(numerical, dtype=torch.float)
        encoding["labels"] = torch.tensor(label, dtype=torch.long)
        return encoding

# 2. Define a custom multimodal model that fuses DeBERTa text representations
#    with an MLP processing the numerical features.
class MultimodalClassifier(nn.Module):
    def __init__(self, text_model_name, num_numerical_features, num_labels):
        super(MultimodalClassifier, self).__init__()
        # Load the pre-trained DeBERTa model
        self.text_model = AutoModel.from_pretrained(text_model_name)
        self.hidden_size = self.text_model.config.hidden_size
        
        # Define an MLP to process numerical features.
        self.numerical_mlp = nn.Sequential(
            nn.Linear(num_numerical_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        
        # Fusion and classification layers: concatenate text and numerical features.
        self.classifier = nn.Sequential(
            nn.Linear(self.hidden_size + 32, 64),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(64, num_labels)
        )
    
    def forward(self, input_ids, attention_mask, numerical, labels=None):
        # Process text through DeBERTa. Using the first token ([CLS] token) representation.
        outputs = self.text_model(input_ids=input_ids, attention_mask=attention_mask)
        text_repr = outputs.last_hidden_state[:, 0, :]  # (batch_size, hidden_size)
        
        # Process the numerical features.
        num_repr = self.numerical_mlp(numerical)  # (batch_size, 32)
        
        # Concatenate the representations.
        combined = torch.cat((text_repr, num_repr), dim=1)
        
        # Final classification.
        logits = self.classifier(combined)
        
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits, labels)
        
        return {"loss": loss, "logits": logits}

# 3. Prepare the tokenizer, dummy data, and dataset.
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")

# Dummy data – replace these with your actual news articles, numerical features, and labels.
texts = [
    "Financial markets rally as economic indicators improve.",
    "Stock prices decline amid economic uncertainty."
]
# Example numerical features (e.g., technical indicators); adjust dimension as needed.
numerical_features = [
    [0.5, 0.3, 0.1],
    [0.2, 0.4, 0.6]
]
# Binary labels: 1 for up, 0 for down.
labels = [1, 0]

# Create the dataset.
dataset = FinancialNewsDataset(texts, numerical_features, labels, tokenizer, max_length=128)

# 4. Instantiate the multimodal model.
num_numerical_features = 3  # Set according to your numerical data
num_labels = 2              # For binary classification
model = MultimodalClassifier("microsoft/deberta-v3-base", num_numerical_features, num_labels)

# 5. Define training arguments.
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    learning_rate=2e-5,
    weight_decay=0.01,
    evaluation_strategy="steps",
    logging_steps=10,
    save_steps=10,
    load_best_model_at_end=True
)

# Data collator to handle dynamic padding.
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 6. Instantiate the Trainer.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,  # Replace with your train dataset
    eval_dataset=dataset,   # Replace with your validation dataset
    data_collator=data_collator,
)

# 7. Fine-tune the model.
trainer.train()



Step,Training Loss,Validation Loss


TrainOutput(global_step=3, training_loss=0.6832706928253174, metrics={'train_runtime': 7.6976, 'train_samples_per_second': 0.779, 'train_steps_per_second': 0.39, 'total_flos': 0.0, 'train_loss': 0.6832706928253174, 'epoch': 3.0})