In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
import requests
from pathlib import Path
import datetime as dt

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Read your api key environment variable
load_dotenv()
api_key = os.getenv("NEWS_API_KEY")
print(api_key)

1ee42792279e4bb38c96b682f9029b47


In [4]:
# Create a newsapi client
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key)


In [3]:
search_term= input("What Search Term?")

What Search Term? Bitcoin


In [5]:
# Fetch the news articles
btc_headlines = newsapi.get_everything(
    search_term,
    language="en",
    page_size=100,
    sort_by="relevancy"
)
btc_headlines

{'status': 'ok',
 'totalResults': 9511,
 'articles': [{'source': {'id': 'wired', 'name': 'Wired'},
   'author': 'Arielle Pardes',
   'title': 'Miami’s Bitcoin Conference Left a Trail of Harassment',
   'description': 'For some women, inappropriate conduct from other conference-goers continued to haunt them online.',
   'url': 'https://www.wired.com/story/bitcoin-2022-conference-harassment/',
   'urlToImage': 'https://media.wired.com/photos/627a89e3e37e715cb7d760d2/191:100/w_1280,c_limit/Bitcoin_Miami_Biz_GettyImages-1239817123.jpg',
   'publishedAt': '2022-05-10T16:59:46Z',
   'content': 'Now, even though there are a number of women-focused crypto spaces, Odeniran says women are still underrepresented. Ive been in spaces where Im the only Black person, or the only woman, or the only B… [+3828 chars]'},
  {'source': {'id': 'the-verge', 'name': 'The Verge'},
   'author': 'Justine Calma',
   'title': 'Why fossil fuel companies see green in Bitcoin mining projects',
   'description': 'Exxo

In [6]:
# Create the sentiment scores DataFrame
btc_sentiments = []

for article in btc_headlines["articles"]:
    try:
        Date = article["publishedAt"]
        text = article["content"]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        btc_sentiments.append({
            "Date": Date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
btc_df = pd.DataFrame(btc_sentiments)   
btc_df["Date"] = pd.to_datetime(btc_df["Date"])
btc_df = btc_df.set_index("Date")
btc_df = btc_df.sort_index()
btc_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-04-29 10:35:00+00:00,0.3400,0.062,0.000,0.938
2022-04-29 11:54:00+00:00,0.0000,0.000,0.000,1.000
2022-04-29 12:11:15+00:00,0.0000,0.000,0.000,1.000
2022-04-29 21:13:00+00:00,0.0000,0.000,0.000,1.000
2022-05-02 11:30:00+00:00,-0.3269,0.085,0.143,0.772
...,...,...,...,...
2022-05-27 16:45:47+00:00,0.0000,0.000,0.000,1.000
2022-05-28 17:36:26+00:00,-0.4019,0.039,0.083,0.879
2022-05-28 20:57:40+00:00,-0.2960,0.079,0.084,0.838
2022-05-30 11:12:00+00:00,0.0000,0.000,0.000,1.000


In [7]:
btc_df.index = btc_df.index.round('15min')
btc_df.index = btc_df.index.tz_localize(None)
btc_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-04-29 10:30:00,0.3400,0.062,0.000,0.938
2022-04-29 12:00:00,0.0000,0.000,0.000,1.000
2022-04-29 12:15:00,0.0000,0.000,0.000,1.000
2022-04-29 21:15:00,0.0000,0.000,0.000,1.000
2022-05-02 11:30:00,-0.3269,0.085,0.143,0.772
...,...,...,...,...
2022-05-27 16:45:00,0.0000,0.000,0.000,1.000
2022-05-28 17:30:00,-0.4019,0.039,0.083,0.879
2022-05-28 21:00:00,-0.2960,0.079,0.084,0.838
2022-05-30 11:15:00,0.0000,0.000,0.000,1.000


In [10]:
# Creacting a binary response for the tsla stock
btc_df['comp_score'] = btc_df['compound'].apply(lambda c: '1' if c >=0 else '0')

btc_df.head(25)

Unnamed: 0_level_0,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-29 10:30:00,0.34,0.062,0.0,0.938,1
2022-04-29 12:00:00,0.0,0.0,0.0,1.0,1
2022-04-29 12:15:00,0.0,0.0,0.0,1.0,1
2022-04-29 21:15:00,0.0,0.0,0.0,1.0,1
2022-05-02 11:30:00,-0.3269,0.085,0.143,0.772,0
2022-05-02 15:00:00,-0.8049,0.0,0.222,0.778,0
2022-05-02 17:00:00,-0.6486,0.0,0.159,0.841,0
2022-05-03 11:30:00,0.0,0.0,0.0,1.0,1
2022-05-03 14:15:00,0.0,0.0,0.0,1.0,1
2022-05-04 11:15:00,0.5106,0.102,0.0,0.898,1


In [9]:
btc_trading_data = pd.read_csv(
    Path("Sentiment_data/btc_data_5m.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
btc_trading_data.tail()

FileNotFoundError: [Errno 2] No such file or directory: 'Sentiment_data\\btc_data_5m.csv'

In [None]:
result_df = pd.merge(btc_trading_data, btc_df, left_index=True, right_index=True)
result_df


In [None]:
#save as a df
result_df.to_csv("Sentiment_data/lithium_sentiment.csv",index=True)