In [11]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

get_ipython().run_line_magic("matplotlib", "inline")


In [12]:
nltk.download("vader_lexicon")
analyzer = SentimentIntensityAnalyzer()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\14694\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [14]:
# Load .env enviroment variables
load_dotenv()


# Set Alpaca API key and secret
alpaca_api_key = os.getenv('ALPACA_API_KEY')
alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

In [50]:
def stock_info_grab(ticker):
    """
    Takes ticker symbol and returns DataFrame with Date, Close, and Pct Change columns.
    """
    # Set timeframe to '1D'
    timeframe = "1D"

    # Set current date and the date from one month ago using the ISO format
    current_date = pd.Timestamp("2020-11-09", tz="America/New_York").isoformat()
    past_date = pd.Timestamp("2016-08-27", tz="America/New_York").isoformat()

    df = api.get_barset(
        ticker,
        timeframe,
        limit=None,
        start=past_date,
        end=current_date,
        after=None,
        until=None,
    ).df
    df = df.droplevel(axis=1, level=0)
    df.index = df.index.date
    df['pct change'] = df['close'].pct_change()
    df['pct change'].dropna
    df = df.reset_index()
    df = df.drop(columns=['open', 'high', 'low', 'volume'])
    df = df.rename(columns={'index':'Date'})
    return df

In [73]:
aapl_stock_info = stock_info_grab("AAPL")
btc_stock_info = stock_info_grab("BTC")
tsla_stock_info = stock_info_grab("TSLA")
spy_stock_info = stock_info_grab("SPY")
btc_stock_info

Unnamed: 0,Date,close,pct change


In [55]:
aapl_file = Path('Resources/AAPL_HEADLINES.csv')
btc_file = Path('Resources/BTCUSA_HEADLINES.csv')
spy_file = Path('Resources/SPY_HEADLINES.csv')
tsla_file = Path('Resources/TSLA_HEADLINES.csv')

aapl_headlines = pd.read_csv(aapl_file)
btc_headlines = pd.read_csv(btc_file)
spy_headlines = pd.read_csv(spy_file)
tsla_headlines = pd.read_csv(tsla_file)
aapl_headlines.head(10)

Unnamed: 0,Headline,Date
0,"Apple Inc. stock falls Monday, underperforms m...","Nov. 9, 2020 at 4:30 p.m. ET"
1,Big Tech Stocks Are Lagging Today. Why They’ll...,"Nov. 9, 2020 at 1:45 p.m. ET"
2,"As Apple releases its new line of Macs, the bi...","Nov. 9, 2020 at 1:18 p.m. ET"
3,"In the Midst of Election Uncertainty, Younger ...","Nov. 6, 2020 at 9:21 p.m. ET"
4,Berkshire Buybacks Hit Record $9 Billion in Th...,"Nov. 7, 2020 at 8:49 a.m. ET"
5,This single-country stock picker has beaten th...,"Nov. 3, 2020 at 7:12 a.m. ET"
6,"Apple Inc. stock falls Friday, underperforms m...","Nov. 6, 2020 at 4:30 p.m. ET"
7,T-Mobile Stock Is at a Record High After Earni...,"Nov. 6, 2020 at 2:16 p.m. ET"
8,Dow's 25-point fall led by losses in UnitedHea...,"Nov. 6, 2020 at 10:53 a.m. ET"
9,"Dow falls 110 points on losses for Apple Inc.,...","Nov. 6, 2020 at 9:45 a.m. ET"


In [56]:
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result


In [57]:
def create_sentiment_df(df):
    """
    Takes headlines DataFrame & creates DataFrame with Sentiment columns.
    Splits Date & Time, creates Time column and moves Date to Index.
    """
    title_sent = {
        "compound": [],
        "positive": [],
        "neutral": [],
        "negative": [],
        "sentiment": [],
    }

    for index, row in df.iterrows():
        try:
            # Sentiment scoring with VADER
            title_sentiment = analyzer.polarity_scores(row["Headline"])
            title_sent["compound"].append(title_sentiment["compound"])
            title_sent["positive"].append(title_sentiment["pos"])
            title_sent["neutral"].append(title_sentiment["neu"])
            title_sent["negative"].append(title_sentiment["neg"])
            title_sent["sentiment"].append(get_sentiment(title_sentiment["compound"]))
        except AttributeError:
            pass

    title_sent_df = pd.DataFrame(title_sent)
    #title_sent_df.head()

    headline_sentiment_df = df.join(title_sent_df)
    headline_sentiment_df.dropna()
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.replace('at','-')
    headline_sentiment_df['Time'] = headline_sentiment_df['Date'].str.split('-').str[1]
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.split('-').str[0]
    headline_sentiment_df = headline_sentiment_df.reindex(columns=['Date', 'Time', 'Headline', 'compound', 'positive', 'neutral', 'negative', 'sentiment'])
    headline_sentiment_df.set_index('Date', inplace=True)
    return headline_sentiment_df

In [58]:
aapl_headlines = create_sentiment_df(aapl_headlines)
btc_headlines = create_sentiment_df(btc_headlines)
tsla_headlines = create_sentiment_df(tsla_headlines)
spy_headlines = create_sentiment_df(spy_headlines)

In [59]:
#reset index to groupby date
aapl_headlines.reset_index()
btc_headlines.reset_index()
tsla_headlines.reset_index()
spy_headlines.reset_index()

Unnamed: 0,Date,Time,Headline,compound,positive,neutral,negative,sentiment
0,"Nov. 9, 2020",11:57 a.m. ET,"Bullish momentum accelerates, S&P 500 extends ...",0.1280,0.158,0.842,0.0,1
1,"Nov. 6, 2020",11:52 a.m. ET,"Charting a bullish November start, S&P 500 ext...",0.4939,0.318,0.682,0.0,1
2,"Nov. 5, 2020",11:55 a.m. ET,Here’s how to trade a stock market that is swi...,0.0000,0.000,1.000,0.0,0
3,"Nov. 5, 2020",11:39 a.m. ET,"Charting bullish follow-through, S&P 500 knife...",0.0000,0.000,1.000,0.0,0
4,"Nov. 4, 2020",11:57 a.m. ET,"Charting an election-fueled rally, major U.S. ...",0.0000,0.000,1.000,0.0,0
...,...,...,...,...,...,...,...,...
10303,"Jun. 29, 2010",12:33 p.m. ET,Elevation increases its stake in fast-growing ...,0.0000,0.000,1.000,0.0,0
10304,"Jun. 29, 2010",11:37 a.m. ET,Tesla IPO opens at $19 a share,0.2960,0.306,0.694,0.0,1
10305,"Jun. 29, 2010",11:27 a.m. ET,"Tesla shares open at $19 in IPO, above $17 offer",0.2960,0.196,0.804,0.0,1
10306,"Jun. 29, 2010",11:25 a.m. ET,Tesla shares start trading,0.2960,0.423,0.577,0.0,1


In [72]:
# find average sentiment score by date
aapl_scores = aapl_headlines.groupby(['Date']).mean().reset_index()
btc_scores = btc_headlines.groupby(['Date']).mean().reset_index()
tsla_scores = tsla_headlines.groupby(['Date']).mean().reset_index()
spy_scores = spy_headlines.groupby(['Date']).mean().reset_index()
aapl_scores

Unnamed: 0,Date,compound,positive,neutral,negative,sentiment
0,"Apr. 1, 2017",0.113150,0.106500,0.893500,0.000000,0.500000
1,"Apr. 1, 2019",0.468340,0.297400,0.702600,0.000000,1.000000
2,"Apr. 1, 2020",-0.087443,0.045571,0.861714,0.092857,-0.428571
3,"Apr. 10, 2017",-0.127650,0.000000,0.920000,0.080000,-0.250000
4,"Apr. 10, 2018",0.105260,0.096000,0.904000,0.000000,0.600000
...,...,...,...,...,...,...
1405,"Sep. 9, 2016",-0.059275,0.049500,0.872750,0.077750,-0.250000
1406,"Sep. 9, 2017",-0.243400,0.053600,0.762600,0.183800,-0.400000
1407,"Sep. 9, 2018",0.324300,0.145000,0.855000,0.000000,0.500000
1408,"Sep. 9, 2019",0.065883,0.108667,0.828833,0.062500,0.166667


In [69]:
df = pd.DataFrame(columns=['Date',
                           'Time',
                           'Headline',
                           'Vader compound',
                           'Vader positive',
                           'Vader neutral',
                           'Vader negative',
                           'Vader sentiment',
                           'Lex compound',
                           'Lex positive',
                           'Lex neutral',
                           'Lex negative',
                           'Lex sentiment',
                           'Sentiment Difference',
                           'stock close',
                           'stock pct change',
                           'buy/sell/hold'])
df

Unnamed: 0,Date,Time,Headline,Vader compound,Vader positive,Vader neutral,Vader negative,Vader sentiment,Lex compound,Lex positive,Lex neutral,Lex negative,Lex sentiment,Sentiment Difference,stock close,stock pct change,buy/sell/hold


In [70]:
aapl_complete = pd.concat([aapl_scores,aapl_stock_info], join='inner', axis=1)
aapl_complete

Unnamed: 0,Date,compound,positive,neutral,negative,sentiment,Date.1,close,pct change
0,"Apr. 1, 2017",0.113150,0.106500,0.893500,0.000000,0.500000,2016-08-29,106.820,
1,"Apr. 1, 2019",0.468340,0.297400,0.702600,0.000000,1.000000,2016-08-30,105.990,-0.007770
2,"Apr. 1, 2020",-0.087443,0.045571,0.861714,0.092857,-0.428571,2016-08-31,106.110,0.001132
3,"Apr. 10, 2017",-0.127650,0.000000,0.920000,0.080000,-0.250000,2016-09-01,106.730,0.005843
4,"Apr. 10, 2018",0.105260,0.096000,0.904000,0.000000,0.600000,2016-09-02,107.730,0.009369
...,...,...,...,...,...,...,...,...,...
1053,"Nov. 2, 2018",0.122062,0.132857,0.794381,0.072714,0.142857,2020-11-03,110.375,0.014756
1054,"Nov. 2, 2019",0.094400,0.071750,0.893625,0.034625,0.125000,2020-11-04,114.940,0.041359
1055,"Nov. 2, 2020",-0.102360,0.038600,0.896400,0.065000,-0.200000,2020-11-05,118.990,0.035236
1056,"Nov. 20, 2016",0.147567,0.129333,0.797333,0.073333,0.666667,2020-11-06,118.685,-0.002563
