In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
import requests
from pathlib import Path
import datetime as dt

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Read your api key environment variable
load_dotenv()
api_key = os.getenv("NEWS_API_KEY")
print(api_key)

1ee42792279e4bb38c96b682f9029b47


In [3]:
# Create a newsapi client
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key)


In [23]:
search_term= input("What Search Term?")

What Search Term? Ethereum


In [24]:
# Fetch the news articles
eth_headlines = newsapi.get_everything(
    search_term,
    language="en",
    page_size=100,
    sort_by="relevancy"
)
eth_headlines

{'status': 'ok',
 'totalResults': 4663,
 'articles': [{'source': {'id': 'engadget', 'name': 'Engadget'},
   'author': 'Jon Fingas',
   'title': "Here's what NFTs look like on Instagram",
   'description': "Meta has revealed more of how NFTs will work on Instagram. In the US-based test, you can show what you've bought or created for free by connecting your Instagram account to a compatible digital wallet and posting for the world to see. If you like, the social …",
   'url': 'https://www.engadget.com/instagram-nft-details-131020868.html',
   'urlToImage': 'https://s.yimg.com/os/creatr-uploaded-images/2022-05/2546c160-d05e-11ec-b75e-e45eaa8c5b2b',
   'publishedAt': '2022-05-10T13:10:20Z',
   'content': "Meta has revealed more of how NFTs will work on Instagram. In the US-based test, you can show what you've bought or created for free by connecting your Instagram account to a compatible digital walle… [+1223 chars]"},
  {'source': {'id': None, 'name': 'Gizmodo.com'},
   'author': 'Kyle Ba

In [25]:
# Create the sentiment scores DataFrame
eth_sentiments = []

for article in eth_headlines["articles"]:
    try:
        Date = article["publishedAt"]
        text = article["content"]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        eth_sentiments.append({
            "Date": Date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
eth_df = pd.DataFrame(eth_sentiments)   
eth_df["Date"] = pd.to_datetime(eth_df["Date"])
eth_df = eth_df.set_index("Date")
eth_df = eth_df.sort_index()
eth_df


Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-05-03 05:15:00+00:00,-0.4404,0.000,0.088,0.912
2022-05-03 10:00:00+00:00,0.2732,0.170,0.118,0.713
2022-05-03 18:23:55+00:00,-0.4404,0.064,0.139,0.797
2022-05-03 23:01:00+00:00,0.3818,0.073,0.000,0.927
2022-05-04 02:32:00+00:00,-0.2732,0.000,0.055,0.945
...,...,...,...,...
2022-06-03 14:00:00+00:00,0.4404,0.075,0.000,0.925
2022-06-03 16:11:26+00:00,-0.5574,0.000,0.098,0.902
2022-06-03 20:26:26+00:00,0.4404,0.122,0.000,0.878
2022-06-03 21:34:57+00:00,0.9169,0.301,0.000,0.699


In [26]:
eth_df = eth_df.resample('15min').mean().ffill()
eth_df.index = eth_df.index.tz_localize(None)
eth_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-05-03 05:15:00,-0.4404,0.000,0.088,0.912
2022-05-03 05:30:00,-0.4404,0.000,0.088,0.912
2022-05-03 05:45:00,-0.4404,0.000,0.088,0.912
2022-05-03 06:00:00,-0.4404,0.000,0.088,0.912
2022-05-03 06:15:00,-0.4404,0.000,0.088,0.912
...,...,...,...,...
2022-06-03 20:45:00,0.4404,0.122,0.000,0.878
2022-06-03 21:00:00,0.4404,0.122,0.000,0.878
2022-06-03 21:15:00,0.4404,0.122,0.000,0.878
2022-06-03 21:30:00,0.9169,0.301,0.000,0.699


In [27]:
#eth_df.index = eth_df.index.round('15min')
#eth_df.index = eth_df.index.tz_localize(None)
#eth_df

In [28]:
# Creacting a binary response for the tsla stock
eth_df['comp_score'] = eth_df['compound'].apply(lambda c: '1' if c >=0 else '0')

eth_df.tail()
len(eth_df)

3043

In [29]:
eth_trading_data = pd.read_csv(
    Path("eth_data_15m.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
eth_trading_data.tail(20)

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-06-03 23:15:00,2472.51,2473.01,2463.74,2467.04,5.6838,14036.900324
2022-06-03 23:30:00,2467.86,2467.86,2460.11,2461.13,7.4919,18453.211935
2022-06-03 23:45:00,2460.7,2462.0,2455.84,2460.5,14.8037,36431.859497
2022-06-04 00:00:00,2460.7,2463.43,2455.51,2461.02,6.019,14812.136029
2022-06-04 00:15:00,2461.72,2467.48,2457.39,2465.52,9.5615,23547.022158
2022-06-04 00:30:00,2465.44,2466.83,2460.91,2464.3,5.4604,13455.092709
2022-06-04 00:45:00,2464.73,2465.59,2459.47,2462.49,5.8797,14473.596952
2022-06-04 01:00:00,2460.87,2462.66,2455.84,2461.15,16.0249,39404.479382
2022-06-04 01:15:00,2461.41,2464.5,2444.85,2447.29,13.1788,32329.727192
2022-06-04 01:30:00,2450.11,2453.02,2443.9,2443.9,17.6439,43217.621894


In [30]:
result_df = pd.merge(eth_trading_data, eth_df, left_index=True, right_index=True)
result_df.head(20)

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-05-24 18:15:00,2760.49,2780.0,2758.63,2771.13,12.3054,34104.134565,-0.1027,0.0,0.04,0.96,0
2022-05-24 18:30:00,2768.63,2770.72,2759.32,2767.64,5.8977,16299.10068,-0.1027,0.0,0.04,0.96,0
2022-05-24 18:45:00,2764.02,2773.33,2750.0,2752.38,6.1664,17023.241913,-0.1027,0.0,0.04,0.96,0
2022-05-24 19:00:00,2752.73,2752.73,2736.78,2741.71,10.2021,27970.991419,-0.1027,0.0,0.04,0.96,0
2022-05-24 19:15:00,2741.95,2752.35,2740.16,2744.27,13.2652,36444.30782,-0.1027,0.0,0.04,0.96,0
2022-05-24 19:30:00,2746.61,2764.29,2740.11,2756.6,4.2928,11821.300718,-0.1027,0.0,0.04,0.96,0
2022-05-24 19:45:00,2759.47,2769.96,2758.32,2764.01,7.8259,21651.950709,-0.1027,0.0,0.04,0.96,0
2022-05-24 20:00:00,2762.51,2768.42,2755.6,2757.95,8.4111,23251.86709,-0.1027,0.0,0.04,0.96,0
2022-05-24 20:15:00,2757.9,2761.52,2754.08,2757.51,3.3922,9352.798387,-0.1027,0.0,0.04,0.96,0
2022-05-24 20:30:00,2758.58,2760.19,2746.11,2753.33,9.127,25144.14781,-0.1027,0.0,0.04,0.96,0


In [31]:
len(result_df)

975

In [32]:
#save as a df
result_df.to_csv("qt.csv",index=True)