In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
import requests
from pathlib import Path
import datetime as dt

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Read your api key environment variable
load_dotenv()
api_key = os.getenv("NEWS_API_KEY")
print(api_key)

1ee42792279e4bb38c96b682f9029b47


In [3]:
# Create a newsapi client
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key)


In [4]:
search_term= input("What Search Term?")

What Search Term? quantitative tightening


In [13]:
# Fetch the news articles
eth_headlines = newsapi.get_everything(
    search_term,
    language="en",
    page_size=100,
    sort_by="relevancy"
)
eth_headlines

{'status': 'ok',
 'totalResults': 470,
 'articles': [{'source': {'id': 'reuters', 'name': 'Reuters'},
   'author': None,
   'title': 'Foreigners pull out of Asian equities for fifth straight month in May - Reuters',
   'description': 'Asian equities continued to witness foreign outflows for a fifth consecutive month in May, hit by concerns over monetary tightening measures by major central banks and supply chain disruptions due to strict lockdowns in China.',
   'url': 'https://www.reuters.com/markets/deals/foreigners-pull-out-asian-equities-fifth-straight-month-may-2022-06-02/',
   'urlToImage': 'https://www.reuters.com/resizer/y-eXy0v2Z4Pny8xyR1wZxURHvqk=/1200x628/smart/filters:quality(80)/cloudfront-us-east-2.images.arcpublishing.com/reuters/XEGW3VJX2JIFRJM7MYC7RMNWD4.jpg',
   'publishedAt': '2022-06-02T07:19:00Z',
   'content': 'June 2 (Reuters) - Asian equities continued to witness foreign outflows for a fifth consecutive month in May, hit by concerns over monetary tightening meas

In [32]:
# Create the sentiment scores DataFrame
eth_sentiments = []

for article in eth_headlines["articles"]:
    try:
        Date = article["publishedAt"]
        text = article["content"]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        eth_sentiments.append({
            "Date": Date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
eth_df = pd.DataFrame(eth_sentiments)   
eth_df["Date"] = pd.to_datetime(eth_df["Date"])
eth_df = eth_df.set_index("Date")
eth_df = eth_df.sort_index()
eth_df


Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-05-01 10:00:00+00:00,0.4588,0.075,0.000,0.925
2022-05-01 12:11:00+00:00,0.3182,0.067,0.000,0.933
2022-05-01 20:00:17+00:00,-0.2732,0.123,0.150,0.727
2022-05-02 08:42:00+00:00,0.5859,0.130,0.000,0.870
2022-05-02 08:42:22+00:00,0.5859,0.130,0.000,0.870
...,...,...,...,...
2022-06-01 20:34:17+00:00,-0.7227,0.000,0.126,0.874
2022-06-01 20:48:00+00:00,0.0000,0.000,0.000,1.000
2022-06-02 00:20:13+00:00,-0.3400,0.000,0.066,0.934
2022-06-02 07:19:00+00:00,0.0000,0.000,0.000,1.000


In [47]:
eth_df = eth_df.resample('15min').mean().ffill()
eth_df.index = eth_df.index.tz_localize(None)
eth_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-05-01 10:00:00,0.4588,0.075,0.000,0.925
2022-05-01 10:15:00,0.4588,0.075,0.000,0.925
2022-05-01 10:30:00,0.4588,0.075,0.000,0.925
2022-05-01 10:45:00,0.4588,0.075,0.000,0.925
2022-05-01 11:00:00,0.4588,0.075,0.000,0.925
...,...,...,...,...
2022-06-02 06:15:00,-0.3400,0.000,0.066,0.934
2022-06-02 06:30:00,-0.3400,0.000,0.066,0.934
2022-06-02 06:45:00,-0.3400,0.000,0.066,0.934
2022-06-02 07:00:00,-0.3400,0.000,0.066,0.934


In [48]:
#eth_df.index = eth_df.index.round('15min')
#eth_df.index = eth_df.index.tz_localize(None)
#eth_df

In [49]:
# Creacting a binary response for the tsla stock
eth_df['comp_score'] = eth_df['compound'].apply(lambda c: '1' if c >=0 else '0')

eth_df.tail()
len(eth_df)

3062

In [50]:
eth_trading_data = pd.read_csv(
    Path("eth_data_15m.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
eth_trading_data.tail(20)

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-06-02 05:00:00,2534.38,2534.99,2530.62,2533.56,14.2912,36207.929618
2022-06-02 05:15:00,2534.41,2534.72,2521.37,2522.38,19.714,49799.393367
2022-06-02 05:30:00,2522.38,2533.57,2521.43,2531.74,67.7253,171425.295749
2022-06-02 05:45:00,2531.8,2551.0,2531.8,2544.98,58.4373,148428.793595
2022-06-02 06:00:00,2547.44,2547.44,2539.37,2542.92,36.415,92569.414642
2022-06-02 06:15:00,2543.99,2545.79,2538.2,2538.74,18.6608,47428.156266
2022-06-02 06:30:00,2536.99,2537.92,2534.01,2534.94,24.943,63249.657446
2022-06-02 06:45:00,2535.2,2536.65,2526.06,2529.68,31.7276,80321.849365
2022-06-02 07:00:00,2531.33,2540.81,2531.33,2539.38,52.3763,132924.798374
2022-06-02 07:15:00,2538.82,2543.31,2535.52,2536.23,22.3535,56793.178509


In [56]:
result_df = pd.merge(eth_trading_data, eth_df, left_index=True, right_index=True)
result_df.head(20)

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-05-23 00:00:00,2897.64,2899.72,2888.5,2890.09,15.4361,44682.16312,0.5574,0.194,0.073,0.733,1
2022-05-23 00:15:00,2889.77,2901.52,2884.71,2899.59,20.9878,60773.794404,0.5574,0.194,0.073,0.733,1
2022-05-23 00:30:00,2901.25,2907.83,2876.62,2877.01,17.2164,49810.159499,0.5574,0.194,0.073,0.733,1
2022-05-23 00:45:00,2877.62,2880.73,2875.71,2878.94,7.1218,20498.136978,0.5574,0.194,0.073,0.733,1
2022-05-23 01:00:00,2878.61,2882.21,2876.34,2879.14,8.5415,24586.643166,0.5574,0.194,0.073,0.733,1
2022-05-23 01:15:00,2878.56,2878.67,2863.26,2865.31,11.3964,32676.481618,0.5574,0.194,0.073,0.733,1
2022-05-23 01:30:00,2865.05,2871.29,2861.85,2871.08,11.7682,33725.033562,0.5574,0.194,0.073,0.733,1
2022-05-23 01:45:00,2869.56,2872.02,2864.36,2866.53,6.5751,18854.080864,0.5574,0.194,0.073,0.733,1
2022-05-23 02:00:00,2866.54,2871.37,2866.47,2868.26,20.9493,60118.648051,0.5574,0.194,0.073,0.733,1
2022-05-23 02:15:00,2868.8,2870.41,2855.25,2855.25,42.369,121255.536783,0.5574,0.194,0.073,0.733,1


In [53]:
len(result_df)

990

In [54]:
#save as a df
result_df.to_csv("qt.csv",index=True)