In [21]:
import requests
import pandas as pd
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification


In [22]:
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [23]:
def finbert_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = F.softmax(outputs.logits, dim=1)
        sentiment = torch.argmax(probabilities).item()
        labels = ['Bullish', 'Neutral', 'Bearish']
        probs = probabilities.tolist()
        
        overall_score = (probs[0][2]*1.0 + probs[0][1]*0.0 + probs[0][0]*-1.0)
        return labels[sentiment], overall_score, probs

In [24]:
# process Alpha Vantage response
def process_news_api(news_data):
    results = []

    for news in news_data:
        title = news['title']
        summary = news['summary']
        ov_sentiment_score = news['overall_sentiment_score']
        ov_label = news['overall_sentiment_label']

        text = title+" "+summary

        finbert_label, finbert_score, finbert_probs = finbert_sentiment(text)

        results.append({
            'title': title,
            'summary': summary,
            'api_sentiment_score':ov_sentiment_score,
            'api_sentiment_label': ov_label,
            'finbert_probs': finbert_probs,
            'finbert_score': finbert_score,
            'finbert_label': finbert_label,
            'error': ov_sentiment_score-finbert_score,
        })

    return pd.DataFrame(results)

In [25]:
from api_key import alpha_vantage

url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&apikey={alpha_vantage}'
response = requests.get(url)
news_data = response.json()['feed']

df_sentiment = process_news_api(news_data)

# strong = df_sentiment[df_sentiment['api_sentiment_label'].isin(['Bearish', 'Bullish'])]
# strong.head(10)

df_sentiment.head(10)

Unnamed: 0,title,summary,api_sentiment_score,api_sentiment_label,finbert_probs,finbert_score,finbert_label,error
0,Jim Cramer Reminds Investors 'Nobody Ever Made...,After President Donald Trump announced a 90-da...,0.222298,Somewhat-Bullish,"[[0.9091626405715942, 0.03595219552516937, 0.0...",-0.854278,Bullish,1.076576
1,US Stocks Likely To Open Lower Ahead Of March ...,U.S. stock futures were lower on Thursday afte...,0.145367,Neutral,"[[0.008295504376292229, 0.9734761118888855, 0....",0.009933,Neutral,0.135434
2,3 Top Vanguard ETFs to Buy With the S&P 500 in...,With 91 exchange-traded funds ( ETFs ) in its ...,0.284136,Somewhat-Bullish,"[[0.017607256770133972, 0.8369032144546509, 0....",0.127882,Neutral,0.156254
3,Trump's Tariffs Could Push Apple iPhone Prices...,Following President Donald Trump's global tari...,0.182397,Somewhat-Bullish,"[[0.7688975930213928, 0.20200467109680176, 0.0...",-0.7398,Bullish,0.922197
4,3 Cheap Tech Stocks to Buy Right Now,"With the recent stock market crash, a number o...",0.206823,Somewhat-Bullish,"[[0.4589504897594452, 0.014957285486161709, 0....",0.067142,Bearish,0.139681
5,S&P 500 Records Biggest Surge Since 2008 As Tr...,The CNN Money Fear and Greed index showed easi...,0.01219,Neutral,"[[0.9278472065925598, 0.034282386302948, 0.037...",-0.889977,Bullish,0.902167
6,"SPY, QQQ Call Volumes Spiked Minutes Before Ta...",Alexandria Ocasio-Cortez ( D-N.Y. ) has called...,0.09788,Neutral,"[[0.12347915768623352, 0.3090002238750458, 0.5...",0.444041,Bearish,-0.346161
7,"Trump Says 'This Is A Great Time To Buy', But ...",President Donald Trump indicated a more lenien...,0.208079,Somewhat-Bullish,"[[0.6260848641395569, 0.2645699381828308, 0.10...",-0.51674,Bullish,0.724819
8,"Vietnam, A Hub For US Tech Giants Like Apple A...",The U.S. and Vietnam have launched trade negot...,0.137249,Neutral,"[[0.9377287030220032, 0.022343721240758896, 0....",-0.897801,Bullish,1.03505
9,"VERO, X, RDW, AAPL, TSLA: Top 5 Trending Stock...",Wall Street experienced a significant upswing ...,0.204314,Somewhat-Bullish,"[[0.9472808837890625, 0.025763031095266342, 0....",-0.920325,Bullish,1.124639


In [31]:
err_count = 0.0
accuracy = 0.0

for row in df_sentiment.iterrows():
    error = row[1]['error']
    if error > 1.2:
        err_count += 1.0
    else:
        accuracy += 1.0
    
print(f'Error percentage: {err_count/len(df_sentiment)*100.0}')
print(f'Accuracy: {accuracy/len(df_sentiment)*100.0}')

Error percentage: 4.0
Accuracy: 96.0


In [27]:
df_sentiment.to_csv('./data/finbert_sentiment.csv', index=False)