In [14]:
import pandas as pd
from transformers import BartForSequenceClassification, BartTokenizer
from tqdm import tqdm

btc = pd.read_csv('../Cleaned_Data/Gnews.csv')

In [15]:
# Initialize the tokenizer and model
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-mnli')
model = BartForSequenceClassification.from_pretrained('facebook/bart-large-mnli')

# Define the classify_text function
def classify_text(premise, hypothesis="This news is bullish for Bitcoin"):
    input_ids = tokenizer.encode(premise, hypothesis, return_tensors='pt')
    logits = model(input_ids)[0]
    entail_contradiction_logits = logits[:,[0,2]]
    probs = entail_contradiction_logits.softmax(dim=1)
    true_prob = probs[:,1].item() * 100
    return true_prob

# Assuming btc is your dataframe and 'description' is the column with text
# Wrap the apply function with tqdm for a progress bar
tqdm.pandas(desc="Classifying")
btc['bullish_bitcoin_prob'] = btc['description'].progress_apply(classify_text)

Classifying: 100%|██████████| 62756/62756 [2:46:52<00:00,  6.27it/s]  


In [16]:
#btc.to_csv('../Cleaned_Data/Gnews_sentiment.csv')

In [16]:
gnews = pd.read_csv('../Cleaned_Data/Gnews_sentiment.csv')

gnews['Date'] = pd.to_datetime(gnews['published date']).dt.date

gnews = gnews.drop(columns=['published date', 'Unnamed: 0', 'title', 'description', 'url', 'publisher'])

gnews = gnews.groupby(['Date']).agg(Sentiment_Bullish=('bullish_bitcoin_prob', 'mean')).reset_index()

gnews.to_csv('../Cleaned_Data/Gnews_sentiment_daily.csv', index=False)