In [54]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

import torch
import torch.nn as nn

import transformers
from transformers import (AutoModelForCausalLM, 
                        AutoTokenizer, 
                        BitsAndBytesConfig, 
                        TrainingArguments, 
                        pipeline, 
                        logging)
import warnings
warnings.filterwarnings("ignore")

from peft import LoraConfig, PeftConfig
from trl import SFTTrainer

from sklearn.metrics import (accuracy_score, 
                            classification_report, 
                            confusion_matrix)
from sklearn.model_selection import train_test_split

from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from makeDataset import *
from torch.utils.data import DataLoader

In [55]:
df = pd.read_csv('../datasets/news.csv')
x = df['title']
x2 = df['text']

batch_size = 16
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone', do_lower_case=True)
model = BertForSequenceClassification.from_pretrained("../finbert_finetuned")
device = torch.device('cuda')
model = model.to(device)

test_dataset = CustomDataset(x, None, tokenizer, max_len=50)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

test_dataset2 = CustomDataset(x2, None, tokenizer, max_len=128)
test_loader2 = DataLoader(test_dataset2, batch_size=batch_size, shuffle=False)

In [56]:
mapper = {'negative': 0, 'neutral': 1, 'positive': 2}
reverse_mappers = {0: 'negative', 1: 'neutral', 2: 'positive'}

### Predictions based on header

In [57]:
model.eval()
predictions = []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        _, predicted = torch.max(outputs.logits, 1)
        temp_list = [int(each) for each in predicted.cpu().numpy()]
        for each in temp_list:
            predictions.append(reverse_mappers[each])
df['title_predictions'] = predictions

In [58]:
model.eval()
predictions = []
with torch.no_grad():
    for batch in test_loader2:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        _, predicted = torch.max(outputs.logits, 1)
        temp_list = [int(each) for each in predicted.cpu().numpy()]
        for each in temp_list:
            predictions.append(reverse_mappers[each])
df['text_predictions'] = predictions

In [59]:
pos_count = 0
neu_count = 0
for title, pred in df[['title', 'title_predictions']].values:
    if pred != 'neutral':
        pos_count += 1
    else:
        neu_count += 1
print(pos_count, neu_count)

83 163


In [60]:
pos_count = 0
neu_count = 0
for title, pred in df[['title', 'text_predictions']].values:
    if pred != 'neutral':
        pos_count += 1
    else:
        neu_count += 1
print(pos_count, neu_count)

57 189


In [63]:
temp_df = df[df['title_predictions'] != 'neutral'][['title', 'text', 'title_predictions', 'text_predictions']]
counter = 0
for title, text, pred, pred2 in temp_df.values:
    if pred != pred2 and pred != 'neutral' and pred2 != 'neutral':
        counter += 1
        print("###############", "title:", pred, "text:", pred2, "################")
        print(title)
        print("######################################")
        print(text)

############### title: negative text: positive ################
Why Is AMD (AMD) Stock Soaring Today
######################################
Published Feb 22, 2024 11:22AM ET Updated Feb 22, 2024 11:30AM ET

Why Is AMD (AMD) Stock Soaring Today

NVDA +0.36% Add to/Remove from Watchlist AMD -2.94% Add to/Remove from Watchlist MSFT -0.32% Add to/Remove from Watchlist

What Happened: Shares of computer processor maker AMD (NASDAQ: ) jumped 10.9% in the morning session as chip and AI stocks surged alongside broader market gains, with the Nasdaq rising by 2.1%, the S&P 500 by 1.5%, and the Dow gaining 0.57% following Nvidia (NASDAQ: )'s outstanding earnings results. During its Q4'2024 earnings, Nvidia reported impressive topline results (7.6% revenue beat), big gross margin improvement, and EPS outperformance vs. Wall Street's estimates. Notably, revenue grew 265% year-on-year and 22% sequentially during the quarter. The strong topline performance was mostly driven by the data center segment

: 