# Correlating Returns

In [18]:
import alpaca_trade_api as tradeapi
import pandas as pd
from newsapi.newsapi_client import NewsApiClient
from datetime import date, datetime, timedelta
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')
import os
from dotenv import load_dotenv ## We need this library to load the .env file
from pathlib import Path

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\metin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


## Load API Keys from Environment Variables

In [19]:
abs_path = Path(r'C:/Users/metin/Documents/nufintech/.env')
load_dotenv(abs_path) 

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [20]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = '1D'

# Get current date and the date from one month ago
current_date = date.today()
past_date = date.today() - timedelta(weeks=4)

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=current_date,
    end=past_date,
    after=None,
    until=None,
).df

df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-02-19 00:00:00-05:00,320.0,324.57,320.0,323.62,19929550
2020-02-20 00:00:00-05:00,322.55,324.65,318.21,320.3,22805445
2020-02-21 00:00:00-05:00,318.62,320.45,310.5,313.03,29739870
2020-02-24 00:00:00-05:00,297.26,304.18,289.23,298.17,49427633
2020-02-25 00:00:00-05:00,300.95,302.53,286.13,288.08,52521891


In [21]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df.drop(columns=['open', 'high', 'low', 'volume'], inplace=True)

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

df.head()

Unnamed: 0,close
2020-02-19,323.62
2020-02-20,320.3
2020-02-21,313.03
2020-02-24,298.17
2020-02-25,288.08


In [22]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change()
aapl_returns.head()

Unnamed: 0,close
2020-02-19,
2020-02-20,-0.010259
2020-02-21,-0.022697
2020-02-24,-0.047471
2020-02-25,-0.03384


In [23]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []
    date = current_date
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > past_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates



In [24]:
# Get first topic
aapl_headlines, dates = get_headlines("apple")

Fetching news about 'apple'
******************************
retrieving news from: 2020-07-11
retrieving news from: 2020-07-10
retrieving news from: 2020-07-09
retrieving news from: 2020-07-08
retrieving news from: 2020-07-07
retrieving news from: 2020-07-06
retrieving news from: 2020-07-05
retrieving news from: 2020-07-04
retrieving news from: 2020-07-03
retrieving news from: 2020-07-02
retrieving news from: 2020-07-01
retrieving news from: 2020-06-30
retrieving news from: 2020-06-29
retrieving news from: 2020-06-28
retrieving news from: 2020-06-27
retrieving news from: 2020-06-26
retrieving news from: 2020-06-25
retrieving news from: 2020-06-24
retrieving news from: 2020-06-23
retrieving news from: 2020-06-22
retrieving news from: 2020-06-21
retrieving news from: 2020-06-20
retrieving news from: 2020-06-19
retrieving news from: 2020-06-18
retrieving news from: 2020-06-17
retrieving news from: 2020-06-16
retrieving news from: 2020-06-15
retrieving news from: 2020-06-14


In [25]:
# Get second topic
trade_headlines, _ = get_headlines("trade")

Fetching news about 'trade'
******************************
retrieving news from: 2020-07-11
retrieving news from: 2020-07-10
retrieving news from: 2020-07-09
retrieving news from: 2020-07-08
retrieving news from: 2020-07-07
retrieving news from: 2020-07-06
retrieving news from: 2020-07-05
retrieving news from: 2020-07-04
retrieving news from: 2020-07-03
retrieving news from: 2020-07-02
retrieving news from: 2020-07-01
retrieving news from: 2020-06-30
retrieving news from: 2020-06-29
retrieving news from: 2020-06-28
retrieving news from: 2020-06-27
retrieving news from: 2020-06-26
retrieving news from: 2020-06-25
retrieving news from: 2020-06-24
retrieving news from: 2020-06-23
retrieving news from: 2020-06-22
retrieving news from: 2020-06-21
retrieving news from: 2020-06-20
retrieving news from: 2020-06-19
retrieving news from: 2020-06-18
retrieving news from: 2020-06-17
retrieving news from: 2020-06-16
retrieving news from: 2020-06-15
retrieving news from: 2020-06-14


In [26]:
# Get third topic
economy_headlines, _ = get_headlines("economy")

Fetching news about 'economy'
******************************
retrieving news from: 2020-07-11
retrieving news from: 2020-07-10
retrieving news from: 2020-07-09
retrieving news from: 2020-07-08
retrieving news from: 2020-07-07
retrieving news from: 2020-07-06
retrieving news from: 2020-07-05
retrieving news from: 2020-07-04
retrieving news from: 2020-07-03
retrieving news from: 2020-07-02
retrieving news from: 2020-07-01
retrieving news from: 2020-06-30
retrieving news from: 2020-06-29
retrieving news from: 2020-06-28
retrieving news from: 2020-06-27
retrieving news from: 2020-06-26
retrieving news from: 2020-06-25
retrieving news from: 2020-06-24
retrieving news from: 2020-06-23
retrieving news from: 2020-06-22
retrieving news from: 2020-06-21
retrieving news from: 2020-06-20
retrieving news from: 2020-06-19
retrieving news from: 2020-06-18
retrieving news from: 2020-06-17
retrieving news from: 2020-06-16
retrieving news from: 2020-06-15
retrieving news from: 2020-06-14


In [27]:
# Get fourth topic
iphone_headlines, _ = get_headlines("iphone")

Fetching news about 'iphone'
******************************
retrieving news from: 2020-07-11
retrieving news from: 2020-07-10
retrieving news from: 2020-07-09
retrieving news from: 2020-07-08
retrieving news from: 2020-07-07
retrieving news from: 2020-07-06
retrieving news from: 2020-07-05
retrieving news from: 2020-07-04
retrieving news from: 2020-07-03
retrieving news from: 2020-07-02
retrieving news from: 2020-07-01
retrieving news from: 2020-06-30
retrieving news from: 2020-06-29
retrieving news from: 2020-06-28
retrieving news from: 2020-06-27
retrieving news from: 2020-06-26
retrieving news from: 2020-06-25
retrieving news from: 2020-06-24
retrieving news from: 2020-06-23
retrieving news from: 2020-06-22
retrieving news from: 2020-06-21
retrieving news from: 2020-06-20
retrieving news from: 2020-06-19
retrieving news from: 2020-06-18
retrieving news from: 2020-06-17
retrieving news from: 2020-06-16
retrieving news from: 2020-06-15
retrieving news from: 2020-06-14


In [28]:
# Get fifth topic
gold_headlines, _ = get_headlines("gold")

Fetching news about 'gold'
******************************
retrieving news from: 2020-07-11
retrieving news from: 2020-07-10
retrieving news from: 2020-07-09
retrieving news from: 2020-07-08
retrieving news from: 2020-07-07
retrieving news from: 2020-07-06
retrieving news from: 2020-07-05
retrieving news from: 2020-07-04
retrieving news from: 2020-07-03
retrieving news from: 2020-07-02
retrieving news from: 2020-07-01
retrieving news from: 2020-06-30
retrieving news from: 2020-06-29
retrieving news from: 2020-06-28
retrieving news from: 2020-06-27
retrieving news from: 2020-06-26
retrieving news from: 2020-06-25
retrieving news from: 2020-06-24
retrieving news from: 2020-06-23
retrieving news from: 2020-06-22
retrieving news from: 2020-06-21
retrieving news from: 2020-06-20
retrieving news from: 2020-06-19
retrieving news from: 2020-06-18
retrieving news from: 2020-06-17
retrieving news from: 2020-06-16
retrieving news from: 2020-06-15
retrieving news from: 2020-06-14


In [29]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [30]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment


In [31]:
# Get averages of each topics sentiment
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)
trade_avg = headline_sentiment_summarizer_avg(trade_headlines)
economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
iphone_avg = headline_sentiment_summarizer_avg(iphone_headlines)
gold_avg = headline_sentiment_summarizer_avg(gold_headlines)


In [32]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "aapl_avg": aapl_avg,
        "trade_avg": trade_avg,
        "economy_avg": economy_avg,
        "iphone_avg": iphone_avg,
        "gold_avg": gold_avg,
    }
)


In [33]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [34]:
# Merge with AAPL returns
topic_sentiments = aapl_returns.join(topic_sentiments).dropna(how="any")

display(topic_sentiments)

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
2020-06-15,0.0158,0.074765,-0.19598,-0.051445,0.072985,-0.07193
2020-06-16,0.027027,0.128445,0.074055,0.04096,0.14341,0.11283
2020-06-17,-0.002885,-0.013415,-0.15196,-0.10604,-0.06677,0.00394
2020-06-18,0.000986,-0.119365,0.01114,0.071625,-0.061535,0.08606
2020-06-19,-0.005802,-0.02373,-0.109785,0.03953,0.091215,0.01105
2020-06-22,0.026835,0.08689,0.049625,0.13225,0.07833,0.149215
2020-06-23,0.020728,0.038195,-0.01992,0.15554,0.050505,0.01851
2020-06-24,-0.017223,0.08948,0.10266,-0.0562,0.078425,-0.015225
2020-06-25,0.013387,0.092345,-0.022705,0.071725,0.04418,0.027205
2020-06-26,-0.030668,0.153065,-0.174055,0.108985,0.03297,0.23878


In [35]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
close,1.0,0.217676,0.460678,0.192252,0.320172,-0.174407
aapl_avg,0.217676,1.0,0.0282096,0.00977739,0.581539,0.329332
trade_avg,0.460678,0.0282096,1.0,0.372797,0.190106,-0.101046
economy_avg,0.192252,0.00977739,0.372797,1.0,0.0505336,0.293298
iphone_avg,0.320172,0.581539,0.190106,0.0505336,1.0,0.208661
gold_avg,-0.174407,0.329332,-0.101046,0.293298,0.208661,1.0
