In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("nojedag/xlm-roberta-finetuned-financial-news-sentiment-analysis-european")
model = AutoModelForSequenceClassification.from_pretrained("nojedag/xlm-roberta-finetuned-financial-news-sentiment-analysis-european")

In [2]:
import shap
import torch

# Define a prediction function
def f(x):
    # Tokenize input texts with attention mask and return as PyTorch tensor
    encoded = [tokenizer.encode_plus(
        v,
        add_special_tokens=True,
        max_length=512,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    ) for v in x]
    
    # Stack all tensors into batches
    input_ids = torch.cat([e['input_ids'] for e in encoded])
    attention_mask = torch.cat([e['attention_mask'] for e in encoded])
    
    # Get model outputs with proper attention masking
    outputs = model(input_ids, attention_mask=attention_mask)[0]
    
    # Convert to probabilities
    probs = torch.nn.functional.softmax(outputs, dim=1)
    return probs

# Create an explainer object with a larger masker output
explainer = shap.Explainer(f, tokenizer, output_names=['Negative', 'Neutral', 'Positive'])

# Create more diverse example sentences for better explanation
example_texts = [
    "Les marchés financiers montrent des signes de reprise robuste, c'est très encourageant pour les investisseurs.",
    "Die wirtschaftlichen Aussichten sind düster, mit steigender Inflation und sinkenden Aktienkursen.",
    "Los resultados trimestrales de la empresa cumplen con las expectativas, sin grandes sorpresas."
]

# Generate and visualize SHAP values with more detailed plotting
shap_values = explainer(example_texts)
shap.plots.text(shap_values, display=True)  # Set display=False for better notebook integration




  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  33%|███▎      | 1/3 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 100%|██████████| 3/3 [05:13<00:00, 75.53s/it]

  0%|          | 0/380 [00:00<?, ?it/s]

PartitionExplainer explainer: 4it [07:22, 147.41s/it]                      
