In [1]:
import pandas as pd

In [2]:
# Load the data correctly
financial_headlines = pd.read_csv(
    "all_financial_sentiment_data.csv",
    names=["sentiment", "text"],  # Ensure column names match your CSV
    encoding="utf-8",
    encoding_errors="replace",
    header=None  # Add this if your CSV has no header row
)

# Now, set Y to the entire DataFrame (not a list of subsets)
Y = financial_headlines.copy()

# Verify the structure
print(Y.head())  # Should show columns 'sentiment' and 'text'

  sentiment                                               text
0   neutral  According to Gran , the company has no plans t...
1   neutral  Technopolis plans to develop in stages an area...
2  negative  The international electronic industry company ...
3  positive  With the new production plant the company woul...
4  positive  According to the company 's updated strategy f...


In [3]:
Y.head(1)

Unnamed: 0,sentiment,text
0,neutral,"According to Gran , the company has no plans t..."


In [4]:
type(Y)

pandas.core.frame.DataFrame

In [5]:
Y = Y.head(50)

In [6]:
import warnings
warnings.simplefilter("ignore")

In [7]:
import pandas as pd
import openai
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge
from nltk.translate.meteor_score import meteor_score
from bert_score import score
from torch.nn import CrossEntropyLoss
import torch

# Step 1: Set up OpenAI API key
openai.api_key = "key"

# Step 2: Use Y as input and convert it to a DataFrame if necessary
if not isinstance(Y, pd.DataFrame):
    if isinstance(Y, list):
        # Assuming Y is a list of texts and sentiments
        if len(Y) > 0 and isinstance(Y[0], str):
            # Create a DataFrame with text and sentiment columns
            texts = [y for y in Y]
            sentiments = [None]*len(Y)  # You need to provide sentiments for all texts
            Y = pd.DataFrame({
                'text': texts,
                'sentiment': sentiments
            })
        elif len(Y) > 0 and isinstance(Y[0], list) and len(Y[0]) == 2:
            # Create a DataFrame with text and sentiment columns
            texts = [y[0] for y in Y]
            sentiments = [y[1] for y in Y]
            Y = pd.DataFrame({
                'text': texts,
                'sentiment': sentiments
            })
        else:
            raise ValueError("Invalid input format")
    else:
        raise ValueError("Invalid input type")

# Step 3: Define sentiment classification function
def classify_sentiment_gpt4(text):
    prompt = f"""
    Analyze the sentiment of the following financial news text and classify it as one of the following:
    - Positive
    - Negative
    - Neutral

    Text: "{text}"
    Sentiment:
    """
    try:
        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error during sentiment classification: {e}")
        return None

# Step 4: Generate predictions
subset_Y = Y.sample(n=min(100, len(Y)), random_state=42)  # Ensure n is not larger than the length of Y
subset_Y['predicted_sentiment'] = subset_Y['text'].apply(classify_sentiment_gpt4)
subset_Y = subset_Y.dropna(subset=['predicted_sentiment'])

true_labels = subset_Y['sentiment'].str.strip().str.lower()
predicted_labels = subset_Y['predicted_sentiment'].str.strip().str.lower()

# Step 5: Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)

unique_labels = list(set(true_labels))
report = classification_report(true_labels, predicted_labels, labels=unique_labels)
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Perplexity from googlespaper
def calculate_perplexity(true_labels, predicted_labels):
    label_to_id = {label: idx for idx, label in enumerate(set(true_labels))}
    true_ids = torch.tensor([label_to_id[label] for label in true_labels])
    pred_ids = torch.tensor([label_to_id[label] for label in predicted_labels])
    logits = torch.randn(len(true_labels), len(label_to_id))
    loss_fn = CrossEntropyLoss()
    loss = loss_fn(logits, true_ids)
    return torch.exp(loss).item()

perplexity = calculate_perplexity(true_labels, predicted_labels)

# BLEU Score
def calculate_bleu(true_labels, predicted_labels):
    bleu_scores = []
    for true, pred in zip(true_labels, predicted_labels):
        reference = [true.split()]
        candidate = pred.split()
        bleu_scores.append(sentence_bleu(reference, candidate))
    return sum(bleu_scores) / len(bleu_scores)

bleu_score = calculate_bleu(true_labels, predicted_labels)

# ROUGE Score
def calculate_rouge(true_labels, predicted_labels):
    rouge = Rouge()
    return rouge.get_scores(predicted_labels.tolist(), true_labels.tolist(), avg=True)

rouge_scores = calculate_rouge(true_labels, predicted_labels)


# BERTScore
def calculate_bertscore(true_labels, predicted_labels):
    P, R, F1 = score(predicted_labels.tolist(), true_labels.tolist(), lang="en")
    return F1.mean().item()

bertscore_f1 = calculate_bertscore(true_labels, predicted_labels)

# Faithfulness fromdeepeval
def check_faithfulness(text, predicted_sentiment):
    prompt = f"""
    Does the following sentiment prediction align with the tone of the input text?
    Text: "{text}"
    Predicted Sentiment: "{predicted_sentiment}"
    Answer:
    """
    try:
        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1
        )
        return response.choices[0].message.content.strip().lower() == "yes"
    except Exception as e:
        print(f"Error during faithfulness check: {e}")
        return False

faithfulness_scores = subset_Y.apply(
    lambda row: check_faithfulness(row['text'], row['predicted_sentiment']), axis=1
)
faithfulness_accuracy = faithfulness_scores.mean()

# Print results
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)
print("Confusion Matrix:")
print(conf_matrix)
print(f"Perplexity: {perplexity:.2f}")
print(f"BLEU Score: {bleu_score:.2f}")
print("ROUGE Scores:", rouge_scores)
#print(f"METEOR Score: {meteor_score_value:.2f}")
print(f"BERTScore F1: {bertscore_f1:.2f}")
print(f"Faithfulness Accuracy: {faithfulness_accuracy:.2f}")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Accuracy: 0.86
Classification Report:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         1
    positive       1.00      0.85      0.92        47
     neutral       0.22      1.00      0.36         2

    accuracy                           0.86        50
   macro avg       0.74      0.95      0.76        50
weighted avg       0.97      0.86      0.90        50

Confusion Matrix:
[[ 1  0  0]
 [ 0  2  0]
 [ 0  7 40]]
Perplexity: 4.09
BLEU Score: 0.00
ROUGE Scores: {'rouge-1': {'r': 0.86, 'p': 0.86, 'f': 0.8599999957000004}, 'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0}, 'rouge-l': {'r': 0.86, 'p': 0.86, 'f': 0.8599999957000004}}
BERTScore F1: 1.00
Faithfulness Accuracy: 0.90


In [None]:
## othersmetrics toadd asneecesary

In [None]:
# METEOR Score
def calculate_meteor(true_labels, predicted_labels):
    meteor_scores = []
    for true, pred in zip(true_labels, predicted_labels):
        reference = [true.split()]
        candidate = pred.split()
        meteor_scores.append(meteor_score(reference, candidate))
    return sum(meteor_scores) / len(meteor_scores)

meteor_score_value = calculate_meteor(true_labels, predicted_labels)


In [8]:
# Export Y to a CSV file
Y.to_csv('SentimentAnalysis.csv', index=False)