In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from datasets import load_dataset, DatasetDict
import os

In [2]:
os.getcwd()

'/atlas/data19/guhitj/Erdos_DL'

In [3]:
os.chdir('Erdos_v2/Erdos-2024-DL-Newsworthy/finetune_roberta')

In [None]:
#gpu_index = 1  # Change to 1 if you want to use the second GPU
#device = torch.device(f"cuda:{gpu_index}" if torch.cuda.is_available() else "cpu")
#print(f"Using device: {device}")

In [4]:
def prepare_data(seed=42):
    dataset_openai = load_dataset('csv', data_files='news_openai_final.csv')

    # Split the dataset into train, validation, and test sets
    train_val_test_split = dataset_openai['train'].train_test_split(test_size=0.2, seed=seed)
    train_val_split = train_val_test_split['train'].train_test_split(test_size=0.25, seed=seed)

    dataset = DatasetDict({
        'train': train_val_split['train'].shuffle(seed=seed),  # 60% of the original data
        'validation': train_val_split['test'].shuffle(seed=seed),  # 20% of the original data
        'test': train_val_test_split['test'].shuffle(seed=seed),  # 20% of the original data
    })

    return dataset

In [5]:
dataset = prepare_data()
dataset['validation']
dataset['train']

Dataset({
    features: ['Publishing Time', 'Ticker', 'Sector', 'Source', 'Headline', 'Text', 'openai_sentiment', 'openai_score'],
    num_rows: 38221
})

In [6]:
from Sentiment_model import SentimentModel
from SentimentDataModule import SentimentDataModule

In [7]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

### Evaluate without fine-tuning roberta

In [8]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3)
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
data_module_roberta = SentimentDataModule(dataset['train'], dataset['validation'], 8,  512)
data_module_roberta.setup()
val_loader_roberta = data_module_roberta.val_dataloader()

In [11]:
#with torch.no_grad():
#    for batch in val_loader_roberta:
#        print(batch)

In [8]:
label_map = {0: -1, 1: 0, 2: 1}

In [13]:
correct_predictions_roberta = 0
total_predictions_roberta = 0
all_predictions_roberta = []
all_labels_roberta = []

In [14]:
with torch.no_grad():
    for batch in val_loader_roberta:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)

        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_roberta += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_roberta += labels_mapped.size(0)
        all_predictions_roberta.extend(predictions_mapped.cpu().numpy())
        all_labels_roberta.extend(labels_mapped.cpu().numpy())

In [15]:
accuracy_roberta = correct_predictions_roberta / total_predictions_roberta
report_roberta = classification_report(all_labels_roberta, all_predictions_roberta, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_roberta:.4f}')
print(f'Classification Report Roberta:\n{report_roberta}')

Accuracy Roberta: 0.5452
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.00      0.00      0.00      2488
     Class 0       0.00      0.00      0.00      3306
     Class 1       0.55      1.00      0.71      6947

    accuracy                           0.55     12741
   macro avg       0.18      0.33      0.24     12741
weighted avg       0.30      0.55      0.38     12741



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Evaluating with finetuned roberta 

In [83]:
#checkpoint_path = '/lustre/umt3/user/guhitj/Erdos_bootcamp/Deeplearning/Project/Results/NewRun/checkpoints/Run_15_20240822-181659_FullRun30EFineTune/epoch=07-val_loss=0.34250.ckpt'
#checkpoint_path = '/lustre/umt3/user/guhitj/Erdos_bootcamp/Deeplearning/Project/Results/NewRun/checkpoints/Run_14_20240822-042913_FullRun30E/epoch=18-val_loss=0.41.ckpt'
#checkpoint_path = '/lustre/umt3/user/guhitj/Erdos_bootcamp/Deeplearning/Project/Results/NewRun/checkpoints/Run_14_20240822-042913_FullRun30E/epoch=06-val_loss=0.72.ckpt'
#checkpoint_path = '/lustre/umt3/user/guhitj/Erdos_bootcamp/Deeplearning/Project/Results/NewRun/checkpoints/Run_15_20240822-181659_FullRun30EFineTune/epoch=12-val_loss=0.34323.ckpt'
checkpoint_path = '/lustre/umt3/user/guhitj/Erdos_bootcamp/Deeplearning/Project/Results/NewRun/checkpoints/Run_16_20240825-085343_FullRun20EFineTune/epoch=06-val_loss=0.32016.ckpt'
model_finetuned = SentimentModel.load_from_checkpoint(checkpoint_path)
model_finetuned.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_finetuned = model_finetuned.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [84]:
data_module_finetuned = SentimentDataModule(dataset['train'], dataset['validation'], 8,  512)
data_module_finetuned.setup()
val_loader_finetuned = data_module_finetuned.val_dataloader()

In [85]:
correct_predictions_finetuned = 0
total_predictions_finetuned = 0
all_predictions_finetuned = []
all_labels_finetuned = []

In [86]:
with torch.no_grad():
    for batch in val_loader_finetuned:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model_finetuned(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs[1]
        #print(logits)
      
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)

        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_finetuned += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_finetuned += labels_mapped.size(0)
        all_predictions_finetuned.extend(predictions_mapped.cpu().numpy())
        all_labels_finetuned.extend(labels_mapped.cpu().numpy())

In [87]:
accuracy_finetuned = correct_predictions_finetuned / total_predictions_finetuned
report_finetuned = classification_report(all_labels_finetuned, all_predictions_finetuned, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_finetuned:.4f}')
print(f'Classification Report Roberta:\n{report_finetuned}')

Accuracy Roberta: 0.8934
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.87      0.91      0.89      2488
     Class 0       0.78      0.84      0.81      3306
     Class 1       0.96      0.91      0.94      6947

    accuracy                           0.89     12741
   macro avg       0.87      0.89      0.88     12741
weighted avg       0.90      0.89      0.89     12741



In [88]:
import pandas as pd
import csv
from datasets import Dataset
import torch.nn.functional as F

In [89]:
news_file = 'news_openai_final.csv'
df = pd.read_csv(news_file)

In [90]:
dataset = Dataset.from_pandas(df)

In [91]:
from SentimentDataModule_all import SentimentDataModule_all

In [92]:
data_module_fullDS = SentimentDataModule_all(dataset, 8,  512)
data_module_fullDS.setup()
data_loader_fullDS = data_module_fullDS.dataloader()

In [93]:
correct_predictions_fullDS = 0
total_predictions_fullDS = 0
all_predictions_fullDS = []
all_labels_fullDS = []

neg_sentiment = []
neutral_sentiment = []
positive_sentiment = []
compound_scores = []

In [94]:
correct_predictions_fullDS = 0
total_predictions_fullDS = 0
all_predictions_fullDS = []
all_labels_fullDS = []

neg_sentiment = []
neutral_sentiment = []
positive_sentiment = []
compound_scores = []

with torch.no_grad():
    for batch in data_loader_fullDS:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Get the logits from the model
        outputs = model_finetuned(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs[1]
        
        # Apply softmax to get probabilities
        probs = F.softmax(logits, dim=-1)

        # Process each sequence in the batch individually
        for i in range(probs.size(0)):  # Loop over each sequence in the batch
            prob_tensor = probs[i]  # Get the probabilities for the i-th sequence

            # Extract the individual probabilities
            negative_prob = prob_tensor[0].item()
            neutral_prob = prob_tensor[1].item()
            positive_prob = prob_tensor[2].item()

            # Calculate the compound score
            compound_score = positive_prob - negative_prob

            # Append probabilities and compound score to respective lists
            neg_sentiment.append(negative_prob)
            neutral_sentiment.append(neutral_prob)
            positive_sentiment.append(positive_prob)
            compound_scores.append(compound_score)

        # Compute predictions for each sequence in the batch
        predictions = torch.argmax(logits, dim=-1)
        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_fullDS += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_fullDS += labels_mapped.size(0)
        all_predictions_fullDS.extend(predictions_mapped.cpu().numpy())
        all_labels_fullDS.extend(labels_mapped.cpu().numpy())

        # Debugging: Print lengths after each batch to ensure consistency
        #print(f"Batch processed. Lengths: neg_sentiment={len(neg_sentiment)}, all_predictions_fullDS={len(all_predictions_fullDS)}")

# Final assertion to ensure consistency
assert len(neg_sentiment) == len(all_predictions_fullDS), "Mismatch in lengths!"
assert len(neutral_sentiment) == len(all_predictions_fullDS), "Mismatch in lengths!"
assert len(positive_sentiment) == len(all_predictions_fullDS), "Mismatch in lengths!"
assert len(compound_scores) == len(all_predictions_fullDS), "Mismatch in lengths!"


In [95]:
accuracy_fullDS = correct_predictions_fullDS / total_predictions_fullDS
report_fullDS = classification_report(all_labels_fullDS, all_predictions_fullDS, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_fullDS:.4f}')
print(f'Classification Report Roberta:\n{report_fullDS}')

Accuracy Roberta: 0.8920
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.88      0.91      0.90     12912
     Class 0       0.77      0.84      0.81     16501
     Class 1       0.96      0.91      0.93     34290

    accuracy                           0.89     63703
   macro avg       0.87      0.89      0.88     63703
weighted avg       0.90      0.89      0.89     63703



In [96]:
len(all_predictions_fullDS)

63703

In [97]:
len(neg_sentiment), len(neutral_sentiment), len(positive_sentiment), len(compound_scores)

(63703, 63703, 63703, 63703)

In [98]:
assert len(all_predictions_fullDS) == len(df)
assert len(neg_sentiment) == len(df)
assert len(neutral_sentiment) == len(df)
assert len(positive_sentiment) == len(df)
assert len(compound_scores) == len(df)

In [99]:
df['frob_sentiment'] = all_predictions_fullDS
df['frob_neg'] = neg_sentiment
df['frob_neu'] = neutral_sentiment
df['frob_pos'] = positive_sentiment
df['frob_comp'] = compound_scores


In [100]:
df['frob_sentiment'] = df['frob_sentiment'].astype(float)

In [101]:
df.head(5)

Unnamed: 0,Publishing Time,Ticker,Sector,Source,Headline,Text,openai_sentiment,openai_score,frob_sentiment,frob_neg,frob_neu,frob_pos,frob_comp
0,2019-03-15 10:46:42+00:00,WFC,Finance,The Motley Fool,Did Wells Fargo CEO Tim Sloan Earn His $1 Mill...,We learned this week that the scandal-plagued ...,-1.0,-0.5,0.0,0.224089,0.701659,0.074252,-0.149837
1,2019-03-15 10:47:26+00:00,AAPL,Technology,The Motley Fool,Don't Underestimate Apple's iPhone Business,The segment is an invaluable asset to Apple's ...,1.0,0.75,1.0,0.027414,0.107334,0.865252,0.837837
2,2019-03-15 11:33:00+00:00,MA,Finance,Forbes,A Closer Look At Mastercard's Key Value Drivers,Mastercard has consistently beat street estima...,1.0,0.8,1.0,0.000396,0.007037,0.992567,0.992171
3,2019-03-15 11:52:45+00:00,BAC,Finance,Benzinga,Jim Cramer Gives His Opinion On Bank Of Americ...,"On CNBC's ""Mad Money Lightning Round"", Jim Cra...",1.0,0.5,-1.0,0.695015,0.258847,0.046138,-0.648877
4,2019-03-15 13:29:39+00:00,GOOGL,Technology,Benzinga,Uber And Waymo Seeking Outside Funding For Aut...,Commercially viable autonomous vehicle (AV) te...,0.0,0.1,0.0,0.027136,0.924445,0.048419,0.021284


In [102]:
df.columns

Index(['Publishing Time', 'Ticker', 'Sector', 'Source', 'Headline', 'Text',
       'openai_sentiment', 'openai_score', 'frob_sentiment', 'frob_neg',
       'frob_neu', 'frob_pos', 'frob_comp'],
      dtype='object')

In [103]:
reorder = [
    'Publishing Time',
    'Ticker',
    'Sector',
    'Source', 
    'Headline',
    'Text',
    'frob_sentiment',
    'frob_comp',
    'frob_neg',
    'frob_neu',
    'frob_pos',
    'openai_sentiment',
    'openai_score'
]

df = df[reorder]

In [104]:
df.head(5)

Unnamed: 0,Publishing Time,Ticker,Sector,Source,Headline,Text,frob_sentiment,frob_comp,frob_neg,frob_neu,frob_pos,openai_sentiment,openai_score
0,2019-03-15 10:46:42+00:00,WFC,Finance,The Motley Fool,Did Wells Fargo CEO Tim Sloan Earn His $1 Mill...,We learned this week that the scandal-plagued ...,0.0,-0.149837,0.224089,0.701659,0.074252,-1.0,-0.5
1,2019-03-15 10:47:26+00:00,AAPL,Technology,The Motley Fool,Don't Underestimate Apple's iPhone Business,The segment is an invaluable asset to Apple's ...,1.0,0.837837,0.027414,0.107334,0.865252,1.0,0.75
2,2019-03-15 11:33:00+00:00,MA,Finance,Forbes,A Closer Look At Mastercard's Key Value Drivers,Mastercard has consistently beat street estima...,1.0,0.992171,0.000396,0.007037,0.992567,1.0,0.8
3,2019-03-15 11:52:45+00:00,BAC,Finance,Benzinga,Jim Cramer Gives His Opinion On Bank Of Americ...,"On CNBC's ""Mad Money Lightning Round"", Jim Cra...",-1.0,-0.648877,0.695015,0.258847,0.046138,1.0,0.5
4,2019-03-15 13:29:39+00:00,GOOGL,Technology,Benzinga,Uber And Waymo Seeking Outside Funding For Aut...,Commercially viable autonomous vehicle (AV) te...,0.0,0.021284,0.027136,0.924445,0.048419,0.0,0.1


In [105]:
output_csv_path = 'news_frob_wprobs_best.csv'  # Replace with your desired output path
df.to_csv(output_csv_path, index=False)

: 