In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from datasets import load_dataset, DatasetDict
import os

In [2]:
os.getcwd()

'/atlas/data19/guhitj/Erdos_DL'

In [3]:
os.chdir('Erdos_v2/Erdos-2024-DL-Newsworthy/finetune_roberta')

In [None]:
#gpu_index = 1  # Change to 1 if you want to use the second GPU
#device = torch.device(f"cuda:{gpu_index}" if torch.cuda.is_available() else "cpu")
#print(f"Using device: {device}")

In [4]:
def prepare_data(seed=1112223):
    dataset_openai = load_dataset('csv', data_files='news_openai_final.csv')

    # Split the dataset into train, validation, and test sets
    train_val_test_split = dataset_openai['train'].train_test_split(test_size=0.2, seed=seed)
    train_val_split = train_val_test_split['train'].train_test_split(test_size=0.25, seed=seed)

    dataset = DatasetDict({
        'train': train_val_split['train'].shuffle(seed=seed),  # 60% of the original data
        'validation': train_val_split['test'].shuffle(seed=seed),  # 20% of the original data
        'test': train_val_test_split['test'].shuffle(seed=seed),  # 20% of the original data
    })

    return dataset

In [5]:
dataset = prepare_data()
dataset['validation']

Dataset({
    features: ['Publishing Time', 'Ticker', 'Sector', 'Source', 'Headline', 'Text', 'openai_sentiment', 'openai_score'],
    num_rows: 12741
})

In [6]:
from Sentiment_model import SentimentModel
from SentimentDataModule import SentimentDataModule

In [7]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

### Evaluate without fine-tuning roberta

In [28]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3)
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
data_module_roberta = SentimentDataModule(dataset['train'], dataset['validation'], 8,  512)
data_module_roberta.setup()
val_loader_roberta = data_module_roberta.val_dataloader()

In [30]:
with torch.no_grad():
    for batch in val_loader_roberta:
        print(batch)

{'input_ids': tensor([[    0, 20770,    36,  ...,     1,     1,     1],
        [    0, 25146,  3921,  ...,     1,     1,     1],
        [    0, 20770,  1782,  ...,     1,     1,     1],
        ...,
        [    0, 25146,    18,  ...,     1,     1,     1],
        [    0, 20770,   327,  ...,     1,     1,     1],
        [    0, 25146,  6494,  ...,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), 'labels': tensor([2, 2, 2, 2, 0, 1, 0, 0])}
{'input_ids': tensor([[    0, 25146, 23227,  ...,     1,     1,     1],
        [    0, 20770,  1869,  ...,     1,     1,     1],
        [    0,   863,  5683,  ...,     1,     1,     1],
        ...,
        [    0, 25146,  2693,  ...,     1,     1,     1],
        [    0, 20770,    44,  ...,     1,     1,     1],
        [    0, 25146,  6

In [10]:
label_map = {0: -1, 1: 0, 2: 1}

In [11]:
correct_predictions_roberta = 0
total_predictions_roberta = 0
all_predictions_roberta = []
all_labels_roberta = []

In [32]:
with torch.no_grad():
    for batch in val_loader_roberta:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)

        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_roberta += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_roberta += labels_mapped.size(0)
        all_predictions_roberta.extend(predictions_mapped.cpu().numpy())
        all_labels_roberta.extend(labels_mapped.cpu().numpy())

In [33]:
accuracy_roberta = correct_predictions_roberta / total_predictions_roberta
report_roberta = classification_report(all_labels_roberta, all_predictions_roberta, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_roberta:.4f}')
print(f'Classification Report Roberta:\n{report_roberta}')

Accuracy Roberta: 0.3853
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.00      0.00      0.00      6232
     Class 0       0.25      0.54      0.35      7590
     Class 1       0.54      0.46      0.50     16196

    accuracy                           0.39     30018
   macro avg       0.27      0.33      0.28     30018
weighted avg       0.36      0.39      0.36     30018



### Evaluating with finetuned roberta 

In [14]:
checkpoint_path = '/lustre/umt3/user/guhitj/Erdos_bootcamp/Deeplearning/Project/Results/NewRun/checkpoints/Run_15_20240822-181659_FullRun30EFineTune/epoch=07-val_loss=0.34250.ckpt'
model_finetuned = SentimentModel.load_from_checkpoint(checkpoint_path)
model_finetuned.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_finetuned = model_finetuned.to(device)

/atlas/data19/guhitj/micromamba/envs/erdos_2024_dl_newsworthy/lib/python3.11/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
Some weigh

In [15]:
data_module_finetuned = SentimentDataModule(dataset['train'], dataset['validation'], 8,  512)
data_module_finetuned.setup()
val_loader_finetuned = data_module_finetuned.val_dataloader()

In [16]:
correct_predictions_finetuned = 0
total_predictions_finetuned = 0
all_predictions_finetuned = []
all_labels_finetuned = []

In [18]:
with torch.no_grad():
    for batch in val_loader_finetuned:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model_finetuned(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs[1]
        #print(logits)
      
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)

        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_finetuned += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_finetuned += labels_mapped.size(0)
        all_predictions_finetuned.extend(predictions_mapped.cpu().numpy())
        all_labels_finetuned.extend(labels_mapped.cpu().numpy())

In [19]:
accuracy_finetuned = correct_predictions_finetuned / total_predictions_finetuned
report_finetuned = classification_report(all_labels_finetuned, all_predictions_finetuned, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_finetuned:.4f}')
print(f'Classification Report Roberta:\n{report_finetuned}')

Accuracy Roberta: 0.8814
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.89      0.90      0.89      2724
     Class 0       0.75      0.83      0.79      3332
     Class 1       0.96      0.90      0.93      7101

    accuracy                           0.88     13157
   macro avg       0.86      0.88      0.87     13157
weighted avg       0.89      0.88      0.88     13157



In [None]:
import pandas as pd
import csv
from datasets import Dataset

In [None]:
news_file = 'news_openai_final.csv'
df = pd.read_csv(news_file)

In [None]:
dataset = Dataset.from_pandas(df)

In [None]:
from SentimentDataModule_all import SentimentDataModule_all

In [None]:
data_module_fullDS = SentimentDataModule_all(dataset, 8,  512)
data_module_fullDS.setup()
data_loader_fullDS = data_module_fullDS.dataloader()

In [None]:
correct_predictions_fullDS = 0
total_predictions_fullDS = 0
all_predictions_fullDS = []
all_labels_fullDS = []

In [None]:
with torch.no_grad():
    for batch in data_loader_fullDS:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model_finetuned(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs[1]
        
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)
        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_fullDS += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_fullDS += labels_mapped.size(0)
        all_predictions_fullDS.extend(predictions_mapped.cpu().numpy())
        all_labels_fullDS.extend(labels_mapped.cpu().numpy())

In [None]:
accuracy_fullDS = correct_predictions_fullDS / total_predictions_fullDS
report_fullDS = classification_report(all_labels_fullDS, all_predictions_fullDS, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_fullDS:.4f}')
print(f'Classification Report Roberta:\n{report_fullDS}')

In [None]:
assert len(all_predictions_fullDS) == len(df)

In [None]:
df['finetune_roberta_sentiment'] = all_predictions_fullDS

In [None]:
df['finetune_roberta_sentiment'] = df['finetune_roberta_sentiment'].astype(float)

In [None]:
df.head(20)

In [None]:
output_csv_path = 'news_finetuned_roberta.csv'  # Replace with your desired output path
df.to_csv(output_csv_path, index=False)