In [82]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

get_ipython().run_line_magic("matplotlib", "inline")


In [83]:
nltk.download("vader_lexicon")
analyzer = SentimentIntensityAnalyzer()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\14694\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [84]:
# Load .env enviroment variables
load_dotenv()


# Set Alpaca API key and secret
alpaca_api_key = os.getenv('ALPACA_API_KEY')
alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

In [85]:
def stock_info_grab(ticker):
    """
    Takes ticker symbol and returns DataFrame with Date, Close, and Pct Change columns.
    """
    # Set timeframe to '1D'
    timeframe = "1D"

    # Set current date and the date from one month ago using the ISO format
    current_date = pd.Timestamp("2020-11-09", tz="America/New_York").isoformat()
    past_date = pd.Timestamp("2016-08-27", tz="America/New_York").isoformat()

    df = api.get_barset(
        ticker,
        timeframe,
        limit=None,
        start=past_date,
        end=current_date,
        after=None,
        until=None,
    ).df
    df = df.droplevel(axis=1, level=0)
    df.index = df.index.date
    df['pct change'] = df['close'].pct_change()
    df['pct change'].dropna
    df = df.reset_index()
    df = df.drop(columns=['open', 'high', 'low', 'volume'])
    df = df.rename(columns={'index':'Date'})
    df = df.set_index('Date')
    return df

In [86]:
aapl_stock_info = stock_info_grab("AAPL")
btc_stock_info = stock_info_grab("BTC")
tsla_stock_info = stock_info_grab("TSLA")
spy_stock_info = stock_info_grab("SPY")
aapl_stock_info

Unnamed: 0_level_0,close,pct change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-08-29,106.820,
2016-08-30,105.990,-0.007770
2016-08-31,106.110,0.001132
2016-09-01,106.730,0.005843
2016-09-02,107.730,0.009369
...,...,...
2020-11-03,110.375,0.014756
2020-11-04,114.940,0.041359
2020-11-05,118.990,0.035236
2020-11-06,118.685,-0.002563


In [164]:
aapl_file = Path('Resources/AAPL_HEADLINES.csv')
btc_file = Path('Resources/BTCUSA_HEADLINES.csv')
spy_file = Path('Resources/SPY_HEADLINES.csv')
tsla_file = Path('Resources/TSLA_HEADLINES.csv')

aapl_headlines_df = pd.read_csv(aapl_file)
btc_headlines_df = pd.read_csv(btc_file)
spy_headlines_df = pd.read_csv(spy_file)
tsla_headlines_df = pd.read_csv(tsla_file)

#aapl_headlines['Date'] = pd.to_datetime(aapl_headlines['Date']).dt.strftime('%Y-%m-%d')
#aapl_headlines = aapl_headlines.set_index('Date')
aapl_headlines_df

Unnamed: 0,Headline,Date
0,"Apple Inc. stock falls Monday, underperforms m...","Nov. 9, 2020 at 4:30 p.m. ET"
1,Big Tech Stocks Are Lagging Today. Why They’ll...,"Nov. 9, 2020 at 1:45 p.m. ET"
2,"As Apple releases its new line of Macs, the bi...","Nov. 9, 2020 at 1:18 p.m. ET"
3,"In the Midst of Election Uncertainty, Younger ...","Nov. 6, 2020 at 9:21 p.m. ET"
4,Berkshire Buybacks Hit Record $9 Billion in Th...,"Nov. 7, 2020 at 8:49 a.m. ET"
...,...,...
9868,Respect for America has climbed during the Oba...,"Aug. 29, 2016 at 11:47 a.m. ET"
9869,"Fitbit upgrades now track yoga, weightlifting ...","Aug. 29, 2016 at 9:41 a.m. ET"
9870,5 things Tim Cook has done better at Apple tha...,"Aug. 28, 2016 at 9:15 p.m. ET"
9871,Want to invest in self-driving cars? Check out...,"Aug. 27, 2016 at 11:02 a.m. ET"


In [165]:
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result


In [199]:
def create_sentiment_df(df):
    """
    Takes headlines DataFrame & creates DataFrame with Sentiment columns.
    Splits Date & Time, creates Time column and moves Date to Index.
    """
    title_sent = {
        "compound": [],
        "positive": [],
        "neutral": [],
        "negative": [],
        "sentiment": [],
    }

    for index, row in df.iterrows():
        try:
            # Sentiment scoring with VADER
            title_sentiment = analyzer.polarity_scores(row["Headline"])
            title_sent["compound"].append(title_sentiment["compound"])
            title_sent["positive"].append(title_sentiment["pos"])
            title_sent["neutral"].append(title_sentiment["neu"])
            title_sent["negative"].append(title_sentiment["neg"])
            title_sent["sentiment"].append(get_sentiment(title_sentiment["compound"]))
        except AttributeError:
            pass

    title_sent_df = pd.DataFrame(title_sent)
    #title_sent_df.head()

    headline_sentiment_df = df.join(title_sent_df)
    headline_sentiment_df.dropna()
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.replace('at','-')
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.split('-').str[0]
    headline_sentiment_df = headline_sentiment_df.reindex(columns=['Date', 'Headline', 'compound', 'positive', 'neutral', 'negative', 'sentiment'])
    headline_sentiment_df['Date'] = pd.to_datetime(headline_sentiment_df['Date'])
    headline_sentiment_df.set_index('Date')
    return headline_sentiment_df

In [206]:
aapl_headlines = create_sentiment_df(aapl_headlines_df)
#btc_headlines = create_sentiment_df(btc_headlines_df)
tsla_headlines = create_sentiment_df(tsla_headlines_df)
spy_headlines = create_sentiment_df(spy_headlines_df)
aapl_headlines

Unnamed: 0,Date,Headline,compound,positive,neutral,negative,sentiment
0,2020-11-09,"Apple Inc. stock falls Monday, underperforms m...",0.0000,0.000,1.000,0.000,0
1,2020-11-09,Big Tech Stocks Are Lagging Today. Why They’ll...,-0.0772,0.121,0.738,0.141,-1
2,2020-11-09,"As Apple releases its new line of Macs, the bi...",0.4767,0.193,0.807,0.000,1
3,2020-11-06,"In the Midst of Election Uncertainty, Younger ...",-0.3400,0.000,0.806,0.194,-1
4,2020-11-07,Berkshire Buybacks Hit Record $9 Billion in Th...,-0.1531,0.000,0.882,0.118,-1
...,...,...,...,...,...,...,...
9868,2016-08-29,Respect for America has climbed during the Oba...,0.4767,0.279,0.721,0.000,1
9869,2016-08-29,"Fitbit upgrades now track yoga, weightlifting ...",0.0000,0.000,1.000,0.000,0
9870,2016-08-28,5 things Tim Cook has done better at Apple tha...,0.4404,0.209,0.791,0.000,1
9871,2016-08-27,Want to invest in self-driving cars? Check out...,0.0772,0.126,0.874,0.000,1


In [209]:
# find average sentiment score by date
aapl_scores = aapl_headlines.groupby(['Date']).mean().sort_values(by='Date')
#btc_scores = btc_headlines.groupby(['Date']).mean().sort_values(by='Date')
tsla_scores = tsla_headlines.groupby(['Date']).mean().sort_values(by='Date')
spy_scores = spy_headlines.groupby(['Date']).mean().sort_values(by='Date')
aapl_scores

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-03-19,0.836000,0.530000,0.470000,0.000000,1.000000
2016-08-27,0.038600,0.063000,0.937000,0.000000,0.500000
2016-08-28,0.440400,0.209000,0.791000,0.000000,1.000000
2016-08-29,0.067100,0.102000,0.842286,0.055714,0.000000
2016-08-30,-0.015205,0.061591,0.883455,0.054955,-0.090909
...,...,...,...,...,...
2020-11-04,-0.038410,0.078900,0.800900,0.120300,-0.300000
2020-11-05,0.304967,0.202333,0.747333,0.050333,0.333333
2020-11-06,-0.099333,0.054833,0.845500,0.099500,-0.500000
2020-11-07,-0.153100,0.000000,0.882000,0.118000,-1.000000


In [210]:
#not relevant, using as template for final df
df = pd.DataFrame(columns=['Date',
                           'Time',
                           'Headline',
                           'Vader compound',
                           'Vader positive',
                           'Vader neutral',
                           'Vader negative',
                           'Vader sentiment',
                           'Lex compound',
                           'Lex positive',
                           'Lex neutral',
                           'Lex negative',
                           'Lex sentiment',
                           'Sentiment Difference',
                           'stock close',
                           'stock pct change',
                           'buy/sell/hold'])

In [212]:
aapl_complete = pd.concat([aapl_scores,aapl_stock_info], join='outer', axis=1).dropna()
btc_complete = pd.concat([btc_scores,btc_stock_info], join='outer', axis=1).dropna()
tsla_complete = pd.concat([tsla_scores,tsla_stock_info], join='outer', axis=1).dropna()
spy_complete = pd.concat([spy_scores,spy_stock_info], join='outer', axis=1).dropna()
spy_complete

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,pct change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-08-30,0.000000,0.000000,1.000000,0.000000,0.000000,217.970,-0.001786
2016-08-31,0.099550,0.086333,0.891167,0.022500,0.166667,217.390,-0.002661
2016-09-01,0.190200,0.170500,0.829500,0.000000,0.500000,217.410,0.000092
2016-09-02,0.016800,0.101333,0.801000,0.098000,0.000000,218.370,0.004416
2016-09-07,-0.401900,0.175000,0.485000,0.340000,-1.000000,219.060,-0.000046
...,...,...,...,...,...,...,...
2020-11-03,0.197560,0.120800,0.879200,0.000000,0.400000,335.970,0.017443
2020-11-04,0.111800,0.140500,0.805500,0.054000,0.166667,343.495,0.022398
2020-11-05,-0.018637,0.052500,0.873125,0.074375,0.000000,350.210,0.019549
2020-11-06,0.085420,0.098400,0.862800,0.038800,0.200000,350.190,-0.000057


In [213]:
spy_stock_info

Unnamed: 0_level_0,close,pct change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-08-29,218.360,
2016-08-30,217.970,-0.001786
2016-08-31,217.390,-0.002661
2016-09-01,217.410,0.000092
2016-09-02,218.370,0.004416
...,...,...
2020-11-03,335.970,0.017443
2020-11-04,343.495,0.022398
2020-11-05,350.210,0.019549
2020-11-06,350.190,-0.000057
