# Correlating Returns

In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from alpaca_trade_api.rest import REST, TimeFrame
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [2]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWSAPI"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

#api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [26]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
#current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
#past_date = pd.Timestamp(datetime.now()- timedelta(30), tz="America/New_York").isoformat()
current_date = "2022-05-10"
past_date = "2022-04-11"

api = REST(alpaca_api_key , alpaca_secret_key, api_version='v2')
#yearly = api.get_bars("AAPL", TimeFrame.Day, "2021-03-20", "2022-03-20", adjustment='raw').df


# Get 4 weeks worth of historical data for AAPL
df = api.get_bars(
    ticker,
    TimeFrame.Day,
    past_date,
    current_date,
    adjustment='raw'
).df

# Display data
df.head()
#yearly.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-04-11 04:00:00+00:00,168.77,169.03,165.5,165.75,72097900,675470,166.692744
2022-04-12 04:00:00+00:00,167.98,169.87,166.64,167.66,79055332,676081,168.091692
2022-04-13 04:00:00+00:00,167.35,171.04,166.77,170.4,70369548,555114,169.679633
2022-04-14 04:00:00+00:00,170.63,171.27,165.04,165.29,75174683,595052,167.215205
2022-04-18 04:00:00+00:00,163.9,166.5984,163.57,165.07,68843906,574584,164.928104


In [27]:
# Drop Outer Table Level
#df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df = df.drop(columns=["open", "high", "low", "volume","vwap","trade_count"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

# Display sample data
df.head()

Unnamed: 0,close
2022-04-11,165.75
2022-04-12,167.66
2022-04-13,170.4
2022-04-14,165.29
2022-04-18,165.07


In [28]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change().dropna()

# Display sample data
aapl_returns.head()

Unnamed: 0,close
2022-04-12,0.011523
2022-04-13,0.016343
2022-04-14,-0.029988
2022-04-18,-0.001331
2022-04-19,0.014115


In [29]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

Note: Be aware that running the 3 requests below will only work once within a 24 hour period due to the request limits imposed by the API provider.

In [40]:
# Get first topic
aapl_headlines, dates = get_headlines("aapl")

Fetching news about 'aapl'
******************************
retrieving news from: 2022-05-10 00:00:00
retrieving news from: 2022-05-09 00:00:00
retrieving news from: 2022-05-08 00:00:00
retrieving news from: 2022-05-07 00:00:00
retrieving news from: 2022-05-06 00:00:00
retrieving news from: 2022-05-05 00:00:00
retrieving news from: 2022-05-04 00:00:00
retrieving news from: 2022-05-03 00:00:00
retrieving news from: 2022-05-02 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [None]:
# Get second topic
trade_headlines, _ = get_headlines("trade")

In [41]:
# Get third topic
economy_headlines, _ = get_headlines("economy")


Fetching news about 'economy'
******************************
retrieving news from: 2022-05-10 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [33]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [34]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [42]:
# Get averages of each topics sentiment
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)
trade_avg = headline_sentiment_summarizer_avg(trade_headlines)
economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
print(aapl_avg)

[0.11166000000000001, 0.05244, -0.038325, 0.08354999999999999, 0.139065, 0.18004, -0.013364999999999998, -0.026605, -0.014264999999999991, -0.02489500000000001, 0.06363999999999999, 0.12304, 0.09962499999999999, 0.06450000000000002, 0.10294, 0.096545, 0.05136999999999999, -0.015224999999999997, 0.119515, -0.05328499999999999, 0.092055, 0.045485, 0.06970499999999999, 0.039245, 0.09351, 0.152445, 0.16216, 0.051115, 0.022504999999999994]


In [36]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "aapl_avg": aapl_avg,
        "trade_avg": trade_avg,
        "economy_avg": economy_avg,
    }
)

In [37]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [38]:
# Merge with AAPL returns
topic_sentiments = aapl_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments)

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg
2022-04-12,0.011523,0.022505,0.0187,-0.004915
2022-04-13,0.016343,0.051115,0.08381,-0.11188
2022-04-14,-0.029988,0.16216,-0.064265,-0.04002
2022-04-18,-0.001331,0.069705,-0.05143,-0.08529
2022-04-19,0.014115,0.045485,0.040045,-0.159075
2022-04-20,-0.001016,0.092055,0.01967,-0.028255
2022-04-21,-0.004844,-0.053285,0.015255,-0.103115
2022-04-22,-0.027821,0.119515,-0.01999,0.02
2022-04-25,0.006737,0.096545,-0.022935,-0.006745
2022-04-26,-0.037328,0.10294,-0.160935,-0.127695


In [39]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg
close,1.0,-0.484793,0.187994,-0.241562
aapl_avg,-0.484793,1.0,-0.134323,0.071675
trade_avg,0.187994,-0.134323,1.0,0.116214
economy_avg,-0.241562,0.071675,0.116214,1.0
