In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv("cryptonews.csv")

# Display sample rows and column names
print(df.head())
print(df.columns)


                  date                                          sentiment  \
0  2023-12-19 06:40:41  {'class': 'negative', 'polarity': -0.1, 'subje...   
1  2023-12-19 06:03:24  {'class': 'neutral', 'polarity': 0.0, 'subject...   
2  2023-12-19 05:55:14  {'class': 'positive', 'polarity': 0.05, 'subje...   
3  2023-12-19 05:35:26  {'class': 'positive', 'polarity': 0.5, 'subjec...   
4  2023-12-19 05:31:08  {'class': 'neutral', 'polarity': 0.0, 'subject...   

          source     subject  \
0     CryptoNews     altcoin   
1     CryptoNews  blockchain   
2     CryptoNews  blockchain   
3  CoinTelegraph  blockchain   
4  CoinTelegraph    ethereum   

                                                text  \
0  Grayscale CEO Michael Sonnenshein believes the...   
1  In an exclusive interview with CryptoNews, Man...   
2  According to the Federal Court ruling on Decem...   
3  Some suggest EVM inscriptions are the latest w...   
4  A decision by bloXroute Labs to start censorin...   

       

In [2]:
df.rename(columns={"title": "headline"}, inplace=True)


In [3]:
# Drop null values
df = df.dropna(subset=['headline'])

# Optional: lowercasing (FinBERT handles case, but it's up to you)
df['headline'] = df['headline'].apply(str)


In [4]:
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline

# Load FinBERT from Hugging Face
MODEL_NAME = "yiyanghkust/finbert-tone"

tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertForSequenceClassification.from_pretrained(MODEL_NAME)


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)

# Example prediction
example = "Bitcoin surges after ETF approval"
print(classifier(example))


Device set to use cpu


[[{'label': 'Neutral', 'score': 3.212903152416402e-07}, {'label': 'Positive', 'score': 0.9999992847442627}, {'label': 'Negative', 'score': 3.380513078354852e-07}]]


In [6]:
def predict_sentiment(text):
    result = classifier(text)[0]
    label = max(result, key=lambda x: x['score'])['label']
    return label

df['sentiment'] = df['headline'].apply(predict_sentiment)


In [7]:
df.to_csv("crypto_news_sentiment1.csv", index=False)
