### Imports

In [1]:
from contextlib import nullcontext

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.nn.functional import softmax
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

# Load Model
### Distil Roberta

Huggingface

In [8]:
MODEL = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [9]:
with open('sampleText.txt') as file:
    sequence = file.read()
tokens = tokenizer(sequence, padding=True, truncation=True, return_tensors="pt")

In [10]:
# Feed the tokens to the model
with torch.no_grad():
    outputs = model(**tokens)

In [11]:
# Convert logits to probabilities
probabilities = softmax(outputs.logits, dim=-1)

# Get the predicted class (index of the highest probability)
predicted_class = torch.argmax(probabilities, dim=-1).item()

# Map the predicted class to the corresponding label
label_mapping = model.config.id2label
predicted_label = label_mapping[predicted_class]

print(f"Probabilities: {probabilities}")
print(f"Predicted Class: {predicted_class}")
print(f"Predicted Label: {predicted_label}")

Probabilities: tensor([[5.0872e-04, 1.6343e-03, 9.9786e-01]])
Predicted Class: 2
Predicted Label: positive


## Do this for all the news articles

In [12]:
newspapers = pd.read_csv("newspapers.csv")
newspapers

Unnamed: 0.1,Unnamed: 0,title,date,link,text
0,0,‘This Time Is Different’—A Legendary Trader’s ...,"03/15/2025, 01:05 PM, +0000 UTC",https://www.forbes.com/sites/digital-assets/20...,"ByBilly Bambrough\n\nByBilly Bambrough\n, Seni..."
1,1,21Shares to Liquidate Two Bitcoin and Ether Fu...,"03/15/2025, 06:02 PM, +0000 UTC",https://www.coindesk.com/markets/2025/03/15/21...,Crypto asset manager 21Shares is set to liquid...
2,2,"U.S. government holds $16B in Bitcoin, eyes 1m...","03/14/2025, 07:38 PM, +0000 UTC",https://crypto.news/u-s-government-holds-16b-i...,"\n Share \nAs of March 12, the U.S. ..."
3,3,"Bitcoin, Ethereum, XRP, Dogecoin Rally: Can BT...","03/14/2025, 07:27 PM, +0000 UTC",https://www.benzinga.com/markets/cryptocurrenc...,"Crypto markets are surging into the weekend, s..."
4,4,U.S. crypto czar’s $200 million portfolio held...,"03/14/2025, 09:42 PM, +0000 UTC",https://fortune.com/crypto/2025/03/14/david-sa...,© 2025 Fortune Media IP Limited. All Rights Re...
5,5,BTC Price News: Bitcoin Bounces to $85K; Chain...,"03/14/2025, 04:49 PM, +0000 UTC",https://www.coindesk.com/markets/2025/03/14/bi...,Sellers of risk assets are taking a breather o...
6,6,White House Leak Sparks Wild Speculation Trump...,"03/14/2025, 12:40 PM, +0000 UTC",https://www.forbes.com/sites/digital-assets/20...,"ByBilly Bambrough\n\nByBilly Bambrough\n, Seni..."
7,7,BlackRock CEO Issues Serious Warning Amid $1 T...,"03/13/2025, 10:49 AM, +0000 UTC",https://www.forbes.com/sites/digital-assets/20...,03/13 update below. This post was originally p...
8,8,House Bill to Build Trump’s Bitcoin Reserve Ha...,"03/14/2025, 10:18 PM, +0000 UTC",https://decrypt.co/310039/house-bill-trump-bit...,House Bill to Build Trump’s Bitcoin Reserve Ha...
9,9,Bitcoin death cross signals further downside a...,"03/14/2025, 03:21 PM, +0000 UTC",https://crypto.news/bitcoin-death-cross-signal...,\n Share \nBitcoin and other cryptoc...


In [15]:
def preprocess(str):
    # Remove punctuation, whitespace, and special characters, standardize case
    pattern = r'[^\w\s]|[\n\r\t]'
    preprocessed = re.sub(pattern, '', str).upper()
    return preprocessed

In [19]:
def get_sentiment(str):
    tokens = tokenizer(preprocess(str), padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**tokens)
    probabilities = softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(probabilities, dim=-1).item()
    label_mapping = model.config.id2label
    predicted_label = label_mapping[predicted_class]
    return predicted_label

## Apply the sentiment function for the newspapers

In [22]:
newspapers['sentiment'] = newspapers['text'].apply(get_sentiment)

sentiment
neutral     0.981132
positive    0.018868
Name: proportion, dtype: float64

In [23]:
newspapers['sentiment'].value_counts()

sentiment
neutral     52
positive     1
Name: count, dtype: int64