In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
import requests
from pathlib import Path
import datetime as dt

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [6]:
# Read your api key environment variable
load_dotenv()
api_key = os.getenv("NEWS_API_KEY")
print(api_key)

1ee42792279e4bb38c96b682f9029b47


In [3]:
# Create a newsapi client
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key)


In [4]:
search_term= input("What Search Term?")

What Search Term? help


In [5]:
# Fetch the news articles
btc_headlines = newsapi.get_everything(
    search_term,
    language="en",
    page_size=100,
    sort_by="relevancy"
)
btc_headlines

{'status': 'ok',
 'totalResults': 213636,
 'articles': [{'source': {'id': 'engadget', 'name': 'Engadget'},
   'author': 'Karissa Bell',
   'title': 'Meta withdraws Oversight Board request for help with Ukraine policies',
   'description': 'Meta has withdrawn\r\n a request it made to its oversight Board seeking guidance on shaping its content moderation policies amid Russia’s invasion of Ukraine. The company had originally asked the Oversight Board for a policy advisory opinion (PAO) in March, fol…',
   'url': 'https://www.engadget.com/meta-withdraws-oversight-board-request-for-help-with-ukraine-policies-193638529.html',
   'urlToImage': 'https://s.yimg.com/os/creatr-uploaded-images/2021-01/4e4406f0-616e-11eb-bedf-b2adf9310de2',
   'publishedAt': '2022-05-11T19:36:38Z',
   'content': 'Meta has withdrawn\r\n a request it made to its oversight Board seeking guidance on shaping its content moderation policies amid Russias invasion of Ukraine. The company had originally asked the Oversi… [+

In [8]:
# Create the sentiment scores DataFrame
btc_sentiments = []

for article in btc_headlines["articles"]:
    try:
        Date = article["publishedAt"]
        text = article["content"]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        btc_sentiments.append({
            "Date": Date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
btc_df = pd.DataFrame(btc_sentiments)   
btc_df["Date"] = pd.to_datetime(btc_df["Date"])
btc_df = btc_df.set_index("Date")
btc_df = btc_df.sort_index()
btc_df


Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-04-29 12:27:30+00:00,0.0000,0.000,0.000,1.000
2022-04-29 19:45:40+00:00,-0.0015,0.071,0.071,0.858
2022-04-29 23:12:32+00:00,-0.8689,0.000,0.247,0.753
2022-04-29 23:24:28+00:00,0.0000,0.000,0.000,1.000
2022-04-30 06:00:49+00:00,-0.8570,0.000,0.217,0.783
...,...,...,...,...
2022-05-30 16:48:00+00:00,0.8225,0.268,0.000,0.732
2022-05-30 21:26:58+00:00,-0.6369,0.108,0.208,0.684
2022-05-30 23:05:16+00:00,0.2023,0.055,0.000,0.945
2022-05-31 04:38:00+00:00,0.2500,0.088,0.000,0.912


In [9]:
btc_df.index = btc_df.index.round('15min')
btc_df.index = btc_df.index.tz_localize(None)
btc_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-04-29 12:30:00,0.0000,0.000,0.000,1.000
2022-04-29 19:45:00,-0.0015,0.071,0.071,0.858
2022-04-29 23:15:00,-0.8689,0.000,0.247,0.753
2022-04-29 23:30:00,0.0000,0.000,0.000,1.000
2022-04-30 06:00:00,-0.8570,0.000,0.217,0.783
...,...,...,...,...
2022-05-30 16:45:00,0.8225,0.268,0.000,0.732
2022-05-30 21:30:00,-0.6369,0.108,0.208,0.684
2022-05-30 23:00:00,0.2023,0.055,0.000,0.945
2022-05-31 04:45:00,0.2500,0.088,0.000,0.912


In [10]:
# Creacting a binary response for the tsla stock
btc_df['comp_score'] = btc_df['compound'].apply(lambda c: '1' if c >=0 else '0')

btc_df.tail()

Unnamed: 0_level_0,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-05-30 16:45:00,0.8225,0.268,0.0,0.732,1
2022-05-30 21:30:00,-0.6369,0.108,0.208,0.684,0
2022-05-30 23:00:00,0.2023,0.055,0.0,0.945,1
2022-05-31 04:45:00,0.25,0.088,0.0,0.912,1
2022-05-31 07:00:00,-0.3818,0.066,0.109,0.824,0


In [11]:
btc_trading_data = pd.read_csv(
    Path("Sentiment_data/btc_data_5m.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
btc_trading_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-19 08:15:00,55155.87,55200.31,55121.01,55121.01,3.11221,171690.325162
2022-04-19 08:30:00,55121.01,55265.87,55110.97,55255.67,1.75836,97031.437113
2022-04-19 08:45:00,55254.41,55324.55,55221.07,55221.07,2.12392,117354.63636
2022-04-19 09:00:00,55208.59,55225.92,55150.33,55178.9,3.01601,166483.479149
2022-04-19 09:15:00,55165.19,55199.89,55165.19,55195.68,0.77454,42743.37523


In [12]:
result_df = pd.merge(btc_trading_data, btc_df, left_index=True, right_index=True)
result_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [13]:
#save as a df
result_df.to_csv("Sentiment_data/lithium_sentiment.csv",index=True)