In [56]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from datasets import load_dataset, DatasetDict
import os

In [10]:
os.chdir('Erdos-2024-DL-Newsworthy/sentiment_modified')

In [38]:
#gpu_index = 1  # Change to 1 if you want to use the second GPU
#device = torch.device(f"cuda:{gpu_index}" if torch.cuda.is_available() else "cpu")
#print(f"Using device: {device}")

In [39]:
def prepare_data(seed=23):
    dataset_openai = load_dataset('csv', data_files='news_openai_final.csv')

    # Split the dataset into train, validation, and test sets
    train_val_test_split = dataset_openai['train'].train_test_split(test_size=0.2, seed=seed)
    train_val_split = train_val_test_split['train'].train_test_split(test_size=0.25, seed=seed)

    dataset = DatasetDict({
        'train': train_val_split['train'].shuffle(seed=seed),  # 60% of the original data
        'validation': train_val_split['test'].shuffle(seed=seed),  # 20% of the original data
        'test': train_val_test_split['test'].shuffle(seed=seed),  # 20% of the original data
    })

    return dataset

In [40]:
dataset = prepare_data()
dataset['validation']

Dataset({
    features: ['Publishing Time', 'Ticker', 'Sector', 'Source', 'Headline', 'Text', 'openai_sentiment', 'openai_score'],
    num_rows: 12741
})

In [85]:
from Sentiment_model import SentimentModel
from SentimentDataModule import SentimentDataModule

In [101]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

### Evaluate without fine-tuning roberta

In [83]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3)
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [93]:
data_module_roberta = SentimentDataModule(dataset['train'], dataset['validation'], 8,  512)
data_module_roberta.setup()
val_loader_roberta = data_module_roberta.val_dataloader()

In [88]:
label_map = {0: -1, 1: 0, 2: 1}

In [89]:
correct_predictions_roberta = 0
total_predictions_roberta = 0
all_predictions_roberta = []
all_labels_roberta = []

In [102]:
with torch.no_grad():
    for batch in val_loader_roberta:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)

        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_roberta += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_roberta += labels_mapped.size(0)
        all_predictions_roberta.extend(predictions_mapped.cpu().numpy())
        all_labels_roberta.extend(labels_mapped.cpu().numpy())

In [103]:
accuracy_roberta = correct_predictions_roberta / total_predictions_roberta
report_roberta = classification_report(all_labels_roberta, all_predictions_roberta, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_roberta:.4f}')
print(f'Classification Report Roberta:\n{report_roberta}')

Accuracy Roberta: 0.5403
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.00      0.00      0.00      2784
     Class 0       0.00      0.00      0.00      3683
     Class 1       0.54      1.00      0.70      7602

    accuracy                           0.54     14069
   macro avg       0.18      0.33      0.23     14069
weighted avg       0.29      0.54      0.38     14069



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Evaluating with finetuned roberta 

In [104]:
checkpoint_path = '/lustre/umt3/user/guhitj/Erdos_bootcamp/Deeplearning/Project/Results/NewRun/checkpoints/Run_15_20240822-181659_FullRun30EFineTune/epoch=07-val_loss=0.34250.ckpt'
model_finetuned = SentimentModel.load_from_checkpoint(checkpoint_path)
model_finetuned.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_finetuned = model_finetuned.to(device)

/atlas/data19/guhitj/micromamba/envs/erdos_2024_dl_newsworthy/lib/python3.11/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
Some weigh

In [105]:
data_module_finetuned = SentimentDataModule(dataset['train'], dataset['validation'], 8,  512)
data_module_finetuned.setup()
val_loader_finetuned = data_module_finetuned.val_dataloader()

In [106]:
correct_predictions_finetuned = 0
total_predictions_finetuned = 0
all_predictions_finetuned = []
all_labels_finetuned = []

In [147]:
with torch.no_grad():
    for batch in val_loader_finetuned:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model_finetuned(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs[1]
        print(logits)
      
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)

        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_finetuned += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_finetuned += labels_mapped.size(0)
        all_predictions_finetuned.extend(predictions_mapped.cpu().numpy())
        all_labels_finetuned.extend(labels_mapped.cpu().numpy())

tensor([[-0.8495,  0.9794, -0.2926],
        [-2.8407,  0.0609,  3.4937],
        [-3.1853,  0.3962,  2.7780],
        [-2.7150,  0.0652,  3.2664],
        [-2.5927, -0.9768,  4.7615],
        [-0.6470,  2.2161, -1.6748],
        [-3.2770, -0.0628,  3.5134],
        [-3.5099,  0.0594,  3.9389]], device='cuda:0')
tensor([[-3.0354, -0.9553,  4.5509],
        [-1.8296,  2.2671, -0.7282],
        [-0.1401,  0.2205,  0.0656],
        [-1.9395,  1.5582,  0.0409],
        [-2.4522, -0.9828,  4.3216],
        [-2.2371, -1.1828,  4.3541],
        [ 1.8432,  0.7105, -2.1488],
        [-0.9883,  2.2971, -1.5327]], device='cuda:0')
tensor([[-0.8131,  3.0554, -2.4566],
        [-3.2004,  0.1361,  3.2120],
        [-3.0487, -0.8143,  4.5641],
        [-2.7971, -0.5153,  3.5457],
        [-2.9204, -0.5924,  4.2385],
        [-2.8638,  0.1758,  3.5441],
        [-3.8170,  1.2761,  2.3542],
        [ 1.9802,  0.5075, -2.6186]], device='cuda:0')
tensor([[-2.8255, -0.7810,  4.4605],
        [-3.2858,  1.

KeyboardInterrupt: 

In [112]:
accuracy_finetuned = correct_predictions_finetuned / total_predictions_finetuned
report_finetuned = classification_report(all_labels_finetuned, all_predictions_finetuned, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_finetuned:.4f}')
print(f'Classification Report Roberta:\n{report_finetuned}')

Accuracy Roberta: 0.8804
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.87      0.89      0.88      2535
     Class 0       0.75      0.83      0.79      3324
     Class 1       0.96      0.90      0.93      6882

    accuracy                           0.88     12741
   macro avg       0.86      0.87      0.87     12741
weighted avg       0.89      0.88      0.88     12741



In [137]:
import pandas as pd
import csv
from datasets import Dataset

In [121]:
news_file = 'news_openai_final.csv'
df = pd.read_csv(news_file)

In [122]:
dataset = Dataset.from_pandas(df)

In [140]:
from SentimentDataModule_all import SentimentDataModule_all

In [141]:
data_module_fullDS = SentimentDataModule_all(dataset, 8,  512)
data_module_fullDS.setup()
data_loader_fullDS = data_module_fullDS.dataloader()

In [148]:
correct_predictions_fullDS = 0
total_predictions_fullDS = 0
all_predictions_fullDS = []
all_labels_fullDS = []

In [155]:
with torch.no_grad():
    for batch in data_loader_fullDS:
        #print(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model_finetuned(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs[1]
        
        # Compute predictions
        predictions = torch.argmax(logits, dim=-1)
        predictions_mapped = torch.tensor([label_map[pred.item()] for pred in predictions]).to(device)
        labels_mapped = torch.tensor([label_map[label.item()] for label in labels]).to(device)

        correct_predictions_fullDS += (predictions_mapped == labels_mapped).sum().item()
        total_predictions_fullDS += labels_mapped.size(0)
        all_predictions_fullDS.extend(predictions_mapped.cpu().numpy())
        all_labels_fullDS.extend(labels_mapped.cpu().numpy())

In [156]:
accuracy_fullDS = correct_predictions_fullDS / total_predictions_fullDS
report_fullDS = classification_report(all_labels_fullDS, all_predictions_fullDS, target_names=['Class -1', 'Class 0', 'Class 1'])

print(f'Accuracy Roberta: {accuracy_fullDS:.4f}')
print(f'Classification Report Roberta:\n{report_fullDS}')

Accuracy Roberta: 0.8841
Classification Report Roberta:
              precision    recall  f1-score   support

    Class -1       0.88      0.89      0.88     12912
     Class 0       0.75      0.83      0.79     16501
     Class 1       0.96      0.90      0.93     34290

    accuracy                           0.88     63703
   macro avg       0.86      0.87      0.87     63703
weighted avg       0.89      0.88      0.88     63703



In [157]:
assert len(all_predictions_fullDS) == len(df)

In [158]:
df['finetune_roberta_sentiment'] = all_predictions_fullDS

In [160]:
df['finetune_roberta_sentiment'] = df['finetune_roberta_sentiment'].astype(float)

In [162]:
df.head(20)

Unnamed: 0,Publishing Time,Ticker,Sector,Source,Headline,Text,openai_sentiment,openai_score,finetune_roberta_sentiment
0,2019-03-15 10:46:42+00:00,WFC,Finance,The Motley Fool,Did Wells Fargo CEO Tim Sloan Earn His $1 Mill...,We learned this week that the scandal-plagued ...,-1.0,-0.5,0.0
1,2019-03-15 10:47:26+00:00,AAPL,Technology,The Motley Fool,Don't Underestimate Apple's iPhone Business,The segment is an invaluable asset to Apple's ...,1.0,0.75,1.0
2,2019-03-15 11:33:00+00:00,MA,Finance,Forbes,A Closer Look At Mastercard's Key Value Drivers,Mastercard has consistently beat street estima...,1.0,0.8,1.0
3,2019-03-15 11:52:45+00:00,BAC,Finance,Benzinga,Jim Cramer Gives His Opinion On Bank Of Americ...,"On CNBC's ""Mad Money Lightning Round"", Jim Cra...",1.0,0.5,-1.0
4,2019-03-15 13:29:39+00:00,GOOGL,Technology,Benzinga,Uber And Waymo Seeking Outside Funding For Aut...,Commercially viable autonomous vehicle (AV) te...,0.0,0.1,0.0
5,2019-03-15 15:38:26+00:00,AMZN,Technology,Benzinga,Amazon Wins Another Bull After KeyBanc Upgrade...,"Amazon.com, Inc. accelerated its profitability...",1.0,0.75,1.0
6,2019-03-15 16:15:59+00:00,AMZN,Technology,Market Watch,Amazon's stock jumps after KeyBanc upgrade mak...,Shares of Amazon.com Inc. jumped 1.4% in prema...,1.0,0.8,1.0
7,2019-03-15 16:36:05+00:00,JPM,Finance,The Motley Fool,Why Warren Buffett Thinks JPMorgan's Stock Cou...,The billionaire investor recently revealed his...,1.0,0.75,1.0
8,2019-03-15 18:07:30+00:00,AMZN,Technology,Market Watch,The Ratings Game: Amazon stock gains after Key...,It’s now unanimous—100% of the analysts survey...,0.0,0.1,1.0
9,2019-03-15 18:14:02+00:00,GOOGL,Technology,CNBC,Experimental Google Maps feature puts arrows o...,CNBC tested Google Maps' new augmented reality...,1.0,0.75,1.0


In [163]:
output_csv_path = 'news_finetuned_roberta.csv'  # Replace with your desired output path
df.to_csv(output_csv_path, index=False)