In [491]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from sklearn.neighbors import KNeighborsClassifier
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import tensorflow as tf
get_ipython().run_line_magic("matplotlib", "inline")
%matplotlib inline

In [3]:
nltk.download("vader_lexicon")
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\godz7\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
# Load .env enviroment variables
load_dotenv()


# Set Alpaca API key and secret
alpaca_api_key = os.getenv('ALPACA_API_KEY')
alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')

api = tradeapi.REST('PKYSIX5VD8DLHIZOILZS', 'Yv4AYGCNo9puqbXGPq2zF1sNrzy63CWCrWNJnOse', api_version='v2')

In [5]:
def stock_info_grab(ticker):
    """
    Takes ticker symbol and returns DataFrame with Date, Close, and Pct Change columns.
    """
    # Set timeframe to '1D'
    timeframe = "1D"

    # Set current date and the date from one month ago using the ISO format
    current_date = pd.Timestamp("2020-11-09", tz="America/New_York").isoformat()
    past_date = pd.Timestamp("2016-08-27", tz="America/New_York").isoformat()

    df = api.get_barset(
        ticker,
        timeframe,
        limit=None,
        start=past_date,
        end=current_date,
        after=None,
        until=None,
    ).df
    df = df.droplevel(axis=1, level=0)
    df.index = df.index.date
    df['pct change'] = df['close'].pct_change()
    df['pct change'].dropna
    df = df.reset_index()
    df = df.drop(columns=['open', 'high', 'low', 'volume'])
    df = df.rename(columns={'index':'Date'})
    df = df.set_index('Date')
    return df

In [6]:
aapl_stock_info = stock_info_grab("AAPL")
amzn_stock_info = stock_info_grab("AMZN")
tsla_stock_info = stock_info_grab("TSLA")
spy_stock_info = stock_info_grab("SPY")
aapl_stock_info

Unnamed: 0_level_0,close,pct change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-08-29,106.8200,
2016-08-30,105.9900,-0.007770
2016-08-31,106.1100,0.001132
2016-09-01,106.7300,0.005843
2016-09-02,107.7300,0.009369
2016-09-06,107.7000,-0.000278
2016-09-07,108.3700,0.006221
2016-09-08,105.5100,-0.026391
2016-09-09,103.1400,-0.022462
2016-09-12,105.4400,0.022300


In [7]:
aapl_file = Path('../Resources/AAPL_HEADLINES.csv')
#amzn_file = Path('../Resources/AMZN_HEADLINES.csv')
spy_file = Path('../Resources/SPY_HEADLINES.csv')
tsla_file = Path('../Resources/TSLA_HEADLINES.csv')

aapl_headlines_df = pd.read_csv(aapl_file)
#amzn_headlines_df = pd.read_csv(amzn_file)
spy_headlines_df = pd.read_csv(spy_file)
tsla_headlines_df = pd.read_csv(tsla_file)

#aapl_headlines['Date'] = pd.to_datetime(aapl_headlines['Date']).dt.strftime('%Y-%m-%d')
#aapl_headlines = aapl_headlines.set_index('Date')
aapl_headlines_df

Unnamed: 0,Headline,Date
0,"Apple Inc. stock falls Monday, underperforms m...","Nov. 9, 2020 at 4:30 p.m. ET"
1,Big Tech Stocks Are Lagging Today. Why They’ll...,"Nov. 9, 2020 at 1:45 p.m. ET"
2,"As Apple releases its new line of Macs, the bi...","Nov. 9, 2020 at 1:18 p.m. ET"
3,"In the Midst of Election Uncertainty, Younger ...","Nov. 6, 2020 at 9:21 p.m. ET"
4,Berkshire Buybacks Hit Record $9 Billion in Th...,"Nov. 7, 2020 at 8:49 a.m. ET"
5,This single-country stock picker has beaten th...,"Nov. 3, 2020 at 7:12 a.m. ET"
6,"Apple Inc. stock falls Friday, underperforms m...","Nov. 6, 2020 at 4:30 p.m. ET"
7,T-Mobile Stock Is at a Record High After Earni...,"Nov. 6, 2020 at 2:16 p.m. ET"
8,Dow's 25-point fall led by losses in UnitedHea...,"Nov. 6, 2020 at 10:53 a.m. ET"
9,"Dow falls 110 points on losses for Apple Inc.,...","Nov. 6, 2020 at 9:45 a.m. ET"


In [8]:
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result


In [9]:
def create_sentiment_df(df):
    """
    Takes headlines DataFrame & creates DataFrame with Sentiment columns.
    Splits Date & Time, creates Time column and moves Date to Index.
    """
    title_sent = {
        "compound": [],
        "positive": [],
        "neutral": [],
        "negative": [],
        "sentiment": [],
    }

    for index, row in df.iterrows():
        try:
            # Sentiment scoring with VADER
            title_sentiment = analyzer.polarity_scores(row["Headline"])
            title_sent["compound"].append(title_sentiment["compound"])
            title_sent["positive"].append(title_sentiment["pos"])
            title_sent["neutral"].append(title_sentiment["neu"])
            title_sent["negative"].append(title_sentiment["neg"])
            title_sent["sentiment"].append(get_sentiment(title_sentiment["compound"]))
        except AttributeError:
            pass

    title_sent_df = pd.DataFrame(title_sent)
    #title_sent_df.head()

    headline_sentiment_df = df.join(title_sent_df)
    headline_sentiment_df.dropna()
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.replace('at','-')
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.split('-').str[0]
    headline_sentiment_df = headline_sentiment_df.reindex(columns=['Date', 'Headline', 'compound', 'positive', 'neutral', 'negative', 'sentiment'])
    headline_sentiment_df['Date'] = pd.to_datetime(headline_sentiment_df['Date'])
    headline_sentiment_df.set_index('Date')
    return headline_sentiment_df

In [10]:
aapl_headlines = create_sentiment_df(aapl_headlines_df)
amzn_headlines = create_sentiment_df(amzn_headlines_df)
tsla_headlines = create_sentiment_df(tsla_headlines_df)
spy_headlines = create_sentiment_df(spy_headlines_df)

Unnamed: 0,Date,Headline,compound,positive,neutral,negative,sentiment
0,2020-11-09,"Apple Inc. stock falls Monday, underperforms m...",0.0000,0.000,1.000,0.000,0
1,2020-11-09,Big Tech Stocks Are Lagging Today. Why They’ll...,-0.0772,0.121,0.738,0.141,-1
2,2020-11-09,"As Apple releases its new line of Macs, the bi...",0.4767,0.193,0.807,0.000,1
3,2020-11-06,"In the Midst of Election Uncertainty, Younger ...",-0.3400,0.000,0.806,0.194,-1
4,2020-11-07,Berkshire Buybacks Hit Record $9 Billion in Th...,-0.1531,0.000,0.882,0.118,-1
5,2020-11-03,This single-country stock picker has beaten th...,-0.0258,0.115,0.766,0.119,0
6,2020-11-06,"Apple Inc. stock falls Friday, underperforms m...",0.0000,0.000,1.000,0.000,0
7,2020-11-06,T-Mobile Stock Is at a Record High After Earni...,0.0000,0.000,1.000,0.000,0
8,2020-11-06,Dow's 25-point fall led by losses in UnitedHea...,-0.1280,0.171,0.620,0.209,-1
9,2020-11-06,"Dow falls 110 points on losses for Apple Inc.,...",-0.1280,0.158,0.647,0.194,-1


In [43]:
# find average sentiment score by date
aapl_scores = aapl_headlines.groupby('Date').mean().sort_values(by='Date')
#amzn_scores = amzn_headlines.groupby(['Date']).mean().sort_values(by='Date')
tsla_scores = tsla_headlines.groupby(['Date']).mean().sort_values(by='Date')
spy_scores = spy_headlines.groupby(['Date']).mean().sort_values(by='Date')

In [44]:
aapl_scores = aapl_scores[['positive', 'neutral', 'negative', 'sentiment']]
#amzn_scores = amzn_scores[['positive', 'neutral', 'negative', 'sentiment']]
tsla_scores = tsla_scores[['positive', 'neutral', 'negative', 'sentiment']]
spy_scores = spy_scores[['positive', 'neutral', 'negative', 'sentiment']]

In [45]:
# sent scores distribution across each df poss use histogram, calc meanstd, or percentiles 
aapl_complete = pd.concat([aapl_scores,aapl_stock_info], join='outer', axis=1).dropna()
#amzn_complete = pd.concat([amzn_scores,amzn_stock_info], join='outer', axis=1).dropna()
tsla_complete = pd.concat([tsla_scores,tsla_stock_info], join='outer', axis=1).dropna()
spy_complete = pd.concat([spy_scores,spy_stock_info], join='outer', axis=1).dropna()
aapl_complete

Unnamed: 0_level_0,positive,neutral,negative,sentiment,close,pct change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-08-30,0.061591,0.883455,0.054955,-0.090909,105.9900,-0.007770
2016-08-31,0.070400,0.818600,0.111000,-0.200000,106.1100,0.001132
2016-09-01,0.069625,0.897625,0.032750,0.125000,106.7300,0.005843
2016-09-02,0.063143,0.845429,0.091429,-0.285714,107.7300,0.009369
2016-09-06,0.131750,0.804500,0.063750,0.250000,107.7000,-0.000278
2016-09-07,0.096000,0.877000,0.027000,0.250000,108.3700,0.006221
2016-09-08,0.069714,0.862357,0.067929,0.000000,105.5100,-0.026391
2016-09-09,0.049500,0.872750,0.077750,-0.250000,103.1400,-0.022462
2016-09-12,0.113000,0.887000,0.000000,0.333333,105.4400,0.022300
2016-09-13,0.089818,0.888455,0.021818,0.272727,108.0200,0.024469


In [None]:
apple_test_scores = aapl_headlines.groupby('Date').mean().sort_values(by='Date')
tsla_complete = pd.concat([tsla_scores,tsla_stock_info], join='outer', axis=1).dropna()
aapl_test_complete['predicted pct change'] = aapl_test_complete['pct change'].shift(periods=-1)
aapl_test_complete['SCORE'] = ''

In [215]:
apple_test_scores = aapl_headlines.groupby('Date').mean().sort_values(by='Date')
aapl_test_complete = pd.concat([apple_test_scores,aapl_stock_info], join='outer', axis=1).dropna()
aapl_test_complete['predicted pct change'] = aapl_test_complete['pct change'].shift(periods=-1)
aapl_test_complete['SCORE'] = ''

In [216]:
temp = [];
for i in range(round(len(aapl_test_complete['compound']))):
    start = i
    end=(i+1)
    temp.append(aapl_test_complete['compound'][start:end])

In [217]:
for x in range(0,len(temp)):
    for integer,row in temp[x].iteritems():
        if row >= 0.05:
            aapl_test_complete.set_value(integer,'SCORE',1)
            
        elif (row <= -0.05):
            aapl_test_complete.set_value(integer,'SCORE',-1)
            
        else:
            aapl_test_complete.set_value(integer,'SCORE', 0)

  # Remove the CWD from sys.path while we load stuff.
  import sys
  after removing the cwd from sys.path.


In [218]:
aapl_test_complete.head()

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,pct change,predicted pct change,SCORE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-08-30,-0.015205,0.061591,0.883455,0.054955,-0.090909,105.99,-0.00777,0.001132,0
2016-08-31,-0.04342,0.0704,0.8186,0.111,-0.2,106.11,0.001132,0.005843,0
2016-09-01,0.009625,0.069625,0.897625,0.03275,0.125,106.73,0.005843,0.009369,0
2016-09-02,-0.087129,0.063143,0.845429,0.091429,-0.285714,107.73,0.009369,-0.000278,-1
2016-09-06,0.0932,0.13175,0.8045,0.06375,0.25,107.7,-0.000278,0.006221,1


In [306]:
test = aapl_test_complete

In [307]:
temp2 = [];
test['ACTION'] = ''
for i in range(round(len(test['predicted pct change']))):
    start = i
    end=(i+1)
    temp2.append(test['predicted pct change'][start:end])
    
    

for x in range(0,len(temp2)):
    for integer,row in temp2[x].iteritems():
        if row >0:
            test.set_value(integer,'ACTION','ADD MONEY')
            
        elif (row==0):
            test.set_value(integer,'ACTION','HOLD')
            
        else:
            test.set_value(integer,'ACTION', 'TAKE MONEY OUT')

  del sys.path[0]
  app.launch_new_instance()


In [254]:
test[test['ACTION'] == 'HOLD']

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,pct change,predicted pct change,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-08-21,0.14028,0.1322,0.8374,0.0304,0.2,215.06,-0.00181,0.0,1,HOLD
2019-09-05,0.0739,0.0696,0.9026,0.0278,0.2,213.27,0.019211,0.0,1,HOLD
2019-10-29,-0.092375,0.0,0.93225,0.06775,-0.25,243.29,-0.023089,0.0,-1,HOLD


In [265]:
test = test.reset_index()
del test['Date']

In [332]:
def window_data(df, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(df) - window):
        features = df.iloc[i : (i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [346]:
# Define the window size
window_size = 1

# Set the index of the feature and target columns
feature_column = 9
target_column = 9

# Create the features (X) and target (y) data using the window_data() function.
X, y = window_data(test, window_size, feature_column, target_column)

# Print a few sample values from X and y
print (f"X sample values:\n{X[:3]} \n")
print (f"y sample values:\n{y[:3]}")

X sample values:
[[1]
 [1]
 [1]] 

y sample values:
[[ 1]
 [ 1]
 [-1]]


In [347]:
# Manually splitting the data
split = int(0.7 * len(X))

X_train = X[: split]
X_test = X[split:]

y_train = y[: split]
y_test = y[split:]

In [348]:
# Importing the MinMaxScaler from sklearn
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Fit the MinMaxScaler object with the features data X
scaler.fit(X)

# Scale the features training and testing sets
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Fit the MinMaxScaler object with the target data Y
scaler.fit(y)

# Scale the target training and testing sets
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)



In [349]:
# Reshape the features data
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Print some sample data after reshaping the datasets
print (f"X_train sample values:\n{X_train[:3]} \n")
print (f"X_test sample values:\n{X_test[:3]}")

X_train sample values:
[[[1.]]

 [[1.]]

 [[1.]]] 

X_test sample values:
[[[0.]]

 [[1.]]

 [[0.]]]


In [356]:
# Importing required Keras modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Define the LSTM RNN model.
model = Sequential()

# Initial model setup
number_units = 1
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))

# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))

# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))

model.add(Dense(units=1, activation='relu'))
model.add(Dense(units=1, activation='relu'))
model.add(Dense(units=1, activation='relu'))
model.add(Dense(units=1, activation='relu'))
model.add(Dense(units=1, activation='relu'))

# Output layer
model.add(Dense(1))

In [357]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error",metrics=['accuracy'])

In [358]:
# Show the model summary
model.summary()

Model: "sequential_44"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_32 (LSTM)               (None, 1, 1)              12        
_________________________________________________________________
dropout_30 (Dropout)         (None, 1, 1)              0         
_________________________________________________________________
lstm_33 (LSTM)               (None, 1, 1)              12        
_________________________________________________________________
dropout_31 (Dropout)         (None, 1, 1)              0         
_________________________________________________________________
lstm_34 (LSTM)               (None, 1)                 12        
_________________________________________________________________
dropout_32 (Dropout)         (None, 1)                 0         
_________________________________________________________________
dense_113 (Dense)            (None, 1)               

In [360]:
# Train the model
model.fit(X_train, y_train, epochs=300, shuffle=False, batch_size=90, verbose=1)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 

<tensorflow.python.keras.callbacks.History at 0x238ae48a550>

In [361]:
# Evaluate the model
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 0.24522481858730316, Accuracy: 0.5664557218551636


In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [342]:
# Make predictions using the testing data X_test
predicted = model.predict(X_test)

In [343]:
# Recover the original prices instead of the scaled version
predicted_prices = scaler.inverse_transform(predicted)
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

In [344]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Actual": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
}, index = test.index[-len(real_prices): ]) 

# Show the DataFrame's head
stocks.head()

Unnamed: 0_level_0,Actual,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-08-12,1.0,-0.053317
2019-08-13,-1.0,-0.028901
2019-08-14,-1.0,-0.053317
2019-08-15,1.0,-0.053317
2019-08-16,1.0,-0.028901


In [378]:
apple_test_scores.head()

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-03-19,0.836,0.53,0.47,0.0,1.0
2016-08-27,0.0386,0.063,0.937,0.0,0.5
2016-08-28,0.4404,0.209,0.791,0.0,1.0
2016-08-29,0.0671,0.102,0.842286,0.055714,0.0
2016-08-30,-0.015205,0.061591,0.883455,0.054955,-0.090909


In [379]:
aapl_test_complete

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,pct change,predicted pct change,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-08-30,-0.015205,0.061591,0.883455,0.054955,-0.090909,105.9900,-0.007770,0.001132,0,ADD MONEY
2016-08-31,-0.043420,0.070400,0.818600,0.111000,-0.200000,106.1100,0.001132,0.005843,0,ADD MONEY
2016-09-01,0.009625,0.069625,0.897625,0.032750,0.125000,106.7300,0.005843,0.009369,0,ADD MONEY
2016-09-02,-0.087129,0.063143,0.845429,0.091429,-0.285714,107.7300,0.009369,-0.000278,-1,TAKE MONEY OUT
2016-09-06,0.093200,0.131750,0.804500,0.063750,0.250000,107.7000,-0.000278,0.006221,1,ADD MONEY
2016-09-07,0.088762,0.096000,0.877000,0.027000,0.250000,108.3700,0.006221,-0.026391,1,TAKE MONEY OUT
2016-09-08,0.010821,0.069714,0.862357,0.067929,0.000000,105.5100,-0.026391,-0.022462,0,TAKE MONEY OUT
2016-09-09,-0.059275,0.049500,0.872750,0.077750,-0.250000,103.1400,-0.022462,0.022300,-1,ADD MONEY
2016-09-12,0.158900,0.113000,0.887000,0.000000,0.333333,105.4400,0.022300,0.024469,1,ADD MONEY
2016-09-13,0.079327,0.089818,0.888455,0.021818,0.272727,108.0200,0.024469,0.034716,1,ADD MONEY


In [471]:
apple_test_scores = aapl_headlines.groupby('Date').mean().sort_values(by='Date')
aapl_test_complete = pd.concat([apple_test_scores,aapl_stock_info], join='outer', axis=1).dropna()
aapl_test_complete['predicted pct change'] = aapl_test_complete['pct change'].shift(periods=-1)
aapl_test_complete['SCORE'] = ''

In [545]:
tsla_test_scores = tsla_headlines.groupby('Date').mean().sort_values(by='Date')
tsla_test_complete = pd.concat([tsla_test_scores,tsla_stock_info], join='outer', axis=1).dropna()
tsla_test_complete['predicted pct change'] = tsla_test_complete['pct change'].shift(periods=-1)
tsla_test_complete['SCORE'] = ''

In [472]:
temp = [];
for i in range(round(len(aapl_test_complete['compound']))):
    start = i
    end=(i+1)
    temp.append(aapl_test_complete['compound'][start:end])

In [473]:
#APPLE

for x in range(0,len(temp)):
    for integer,row in temp[x].iteritems():
        if row >= 0.05:
            aapl_test_complete.set_value(integer,'SCORE',1)
            
        elif (row <= -0.05):
            aapl_test_complete.set_value(integer,'SCORE',-1)
            
        else:
            aapl_test_complete.set_value(integer,'SCORE', 0)

  if sys.path[0] == '':
  if __name__ == '__main__':
  


In [546]:
#TESLA
temp = [];
for i in range(round(len(tsla_test_complete['compound']))):
    start = i
    end=(i+1)
    temp.append(tsla_test_complete['compound'][start:end])


for x in range(0,len(temp)):
    for integer,row in temp[x].iteritems():
        if row >= 0.05:
            tsla_test_complete.set_value(integer,'SCORE',1)
            
        elif (row <= -0.05):
            tsla_test_complete.set_value(integer,'SCORE',-1)
            
        else:
            tsla_test_complete.set_value(integer,'SCORE', 0)

  if sys.path[0] == '':
  from ipykernel import kernelapp as app


In [547]:
test2 = tsla_test_complete

In [550]:
test2.head()

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,pct change,predicted pct change,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-08-30,0.0,0.0,1.0,0.0,0.0,211.34,-0.017937,0.002886,0,1
2016-08-31,0.09955,0.086333,0.891167,0.0225,0.166667,211.95,0.002886,-0.052748,1,0
2016-09-01,0.1902,0.1705,0.8295,0.0,0.5,200.77,-0.052748,-0.014893,1,0
2016-09-02,0.0168,0.101333,0.801,0.098,0.0,197.78,-0.014893,-0.005522,0,0
2016-09-07,-0.4019,0.175,0.485,0.34,-1.0,201.7,-0.005522,-0.020873,-1,0


In [549]:
#TESLA
temp2 = [];
test2['ACTION'] = ''
for i in range(round(len(test2['predicted pct change']))):
    start = i
    end=(i+1)
    temp2.append(test2['predicted pct change'][start:end])
    
    

for x in range(0,len(temp2)):
    for integer,row in temp2[x].iteritems():
        if row >0:
            test2.set_value(integer,'ACTION',1)
            
        elif (row==0):
            test2.set_value(integer,'ACTION',1)
            
        else:
            test2.set_value(integer,'ACTION', 0)

  


In [474]:
test = aapl_test_complete

In [475]:
temp2 = [];
test['ACTION'] = ''
for i in range(round(len(test['predicted pct change']))):
    start = i
    end=(i+1)
    temp2.append(test['predicted pct change'][start:end])
    
    

for x in range(0,len(temp2)):
    for integer,row in temp2[x].iteritems():
        if row >0:
            test.set_value(integer,'ACTION',1)
            
        elif (row==0):
            test.set_value(integer,'ACTION',1)
            
        else:
            test.set_value(integer,'ACTION', 0)

  del sys.path[0]
  app.launch_new_instance()


In [476]:
test = test.dropna()

In [92]:
del aapl_test_complete['Date']

In [226]:
test = test.reset_index()
del test['Date']

In [477]:
del test['pct change']
test.head()

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,predicted pct change,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-08-30,-0.015205,0.061591,0.883455,0.054955,-0.090909,105.99,0.001132,0,1
2016-08-31,-0.04342,0.0704,0.8186,0.111,-0.2,106.11,0.005843,0,1
2016-09-01,0.009625,0.069625,0.897625,0.03275,0.125,106.73,0.009369,0,1
2016-09-02,-0.087129,0.063143,0.845429,0.091429,-0.285714,107.73,-0.000278,-1,0
2016-09-06,0.0932,0.13175,0.8045,0.06375,0.25,107.7,0.006221,1,1


In [470]:
test

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,predicted pct change,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-08-30,-0.015205,0.061591,0.883455,0.054955,-0.090909,105.9900,0.001132,0,ADD MONEY
2016-08-31,-0.043420,0.070400,0.818600,0.111000,-0.200000,106.1100,0.005843,0,ADD MONEY
2016-09-01,0.009625,0.069625,0.897625,0.032750,0.125000,106.7300,0.009369,0,ADD MONEY
2016-09-02,-0.087129,0.063143,0.845429,0.091429,-0.285714,107.7300,-0.000278,-1,TAKE MONEY OUT
2016-09-06,0.093200,0.131750,0.804500,0.063750,0.250000,107.7000,0.006221,1,ADD MONEY
2016-09-07,0.088762,0.096000,0.877000,0.027000,0.250000,108.3700,-0.026391,1,TAKE MONEY OUT
2016-09-08,0.010821,0.069714,0.862357,0.067929,0.000000,105.5100,-0.022462,0,TAKE MONEY OUT
2016-09-09,-0.059275,0.049500,0.872750,0.077750,-0.250000,103.1400,0.022300,-1,ADD MONEY
2016-09-12,0.158900,0.113000,0.887000,0.000000,0.333333,105.4400,0.024469,1,ADD MONEY
2016-09-13,0.079327,0.089818,0.888455,0.021818,0.272727,108.0200,0.034716,1,ADD MONEY


In [458]:
X = test.drop("ACTION", axis=1)
y = test["ACTION"]
print(X.shape, y.shape)

(1051, 8) (1051,)


In [459]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [460]:
from keras.models import Sequential
from keras.layers import Dense

# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=8))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

In [461]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
25/25 - 0s - loss: 0.8679 - accuracy: 0.6117
Epoch 2/100
25/25 - 0s - loss: 0.5554 - accuracy: 0.8490
Epoch 3/100
25/25 - 0s - loss: 0.4043 - accuracy: 0.9010
Epoch 4/100
25/25 - 0s - loss: 0.2921 - accuracy: 0.9404
Epoch 5/100
25/25 - 0s - loss: 0.2278 - accuracy: 0.9467
Epoch 6/100
25/25 - 0s - loss: 0.1783 - accuracy: 0.9645
Epoch 7/100
25/25 - 0s - loss: 0.1475 - accuracy: 0.9734
Epoch 8/100
25/25 - 0s - loss: 0.1285 - accuracy: 0.9759
Epoch 9/100
25/25 - 0s - loss: 0.1129 - accuracy: 0.9708
Epoch 10/100
25/25 - 0s - loss: 0.1011 - accuracy: 0.9746
Epoch 11/100
25/25 - 0s - loss: 0.0894 - accuracy: 0.9772
Epoch 12/100
25/25 - 0s - loss: 0.0792 - accuracy: 0.9860
Epoch 13/100
25/25 - 0s - loss: 0.0755 - accuracy: 0.9835
Epoch 14/100
25/25 - 0s - loss: 0.0668 - accuracy: 0.9886
Epoch 15/100
25/25 - 0s - loss: 0.0658 - accuracy: 0.9848
Epoch 16/100
25/25 - 0s - loss: 0.0576 - accuracy: 0.9911
Epoch 17/100
25/25 - 0s - loss: 0.0579 - accuracy: 0.9860
Epoch 18/100
25/25 - 0s

<tensorflow.python.keras.callbacks.History at 0x238b3fb07b8>

In [462]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

9/9 - 0s - loss: 0.1458 - accuracy: 0.9620
Normal Neural Network - Loss: 0.14584596455097198, Accuracy: 0.9619771838188171


In [463]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)



In [464]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['TAKE MONEY OUT' 'TAKE MONEY OUT' 'ADD MONEY' 'TAKE MONEY OUT'
 'TAKE MONEY OUT']
Actual Labels: ['TAKE MONEY OUT', 'TAKE MONEY OUT', 'ADD MONEY', 'TAKE MONEY OUT', 'TAKE MONEY OUT']


In [35]:
apple_score_test = aapl_scores.reset_index()
apple_score_test['Date'] = apple_score_test['Date'].astype(str)

apple_stock_test = aapl_stock_info.reset_index()
apple_stock_test['Date'] = apple_stock_test['Date'].astype(str)

In [519]:
test2

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,pct change,predicted pct change,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-08-30,0.000000,0.000000,1.000000,0.000000,0.000000,211.3400,-0.017937,0.002886,0,ADD MONEY
2016-08-31,0.099550,0.086333,0.891167,0.022500,0.166667,211.9500,0.002886,-0.052748,1,TAKE MONEY OUT
2016-09-01,0.190200,0.170500,0.829500,0.000000,0.500000,200.7700,-0.052748,-0.014893,1,TAKE MONEY OUT
2016-09-02,0.016800,0.101333,0.801000,0.098000,0.000000,197.7800,-0.014893,-0.005522,0,TAKE MONEY OUT
2016-09-07,-0.401900,0.175000,0.485000,0.340000,-1.000000,201.7000,-0.005522,-0.020873,-1,TAKE MONEY OUT
2016-09-08,0.159525,0.119250,0.834500,0.046500,0.250000,197.4900,-0.020873,0.019592,1,ADD MONEY
2016-09-12,0.101067,0.205833,0.683667,0.110500,0.333333,198.2800,0.019592,-0.011247,1,TAKE MONEY OUT
2016-09-13,0.101150,0.083500,0.916500,0.000000,0.500000,196.0500,-0.011247,0.002244,1,ADD MONEY
2016-09-14,0.200950,0.139000,0.861000,0.000000,0.500000,196.4900,0.002244,0.019085,1,ADD MONEY
2016-09-15,0.128771,0.118429,0.831857,0.049714,0.285714,200.2400,0.019085,0.025769,1,ADD MONEY


In [507]:
test

Unnamed: 0_level_0,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-08-30,0,1
2016-08-31,0,1
2016-09-01,0,1
2016-09-02,-1,0
2016-09-06,1,1
2016-09-07,1,0
2016-09-08,0,0
2016-09-09,-1,1
2016-09-12,1,1
2016-09-13,1,1


In [541]:
target = test2["ACTION"].astype(int)
target_names = ["negative", "positive"]

In [551]:
test2.head()

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment,close,pct change,predicted pct change,SCORE,ACTION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-08-30,0.0,0.0,1.0,0.0,0.0,211.34,-0.017937,0.002886,0,1
2016-08-31,0.09955,0.086333,0.891167,0.0225,0.166667,211.95,0.002886,-0.052748,1,0
2016-09-01,0.1902,0.1705,0.8295,0.0,0.5,200.77,-0.052748,-0.014893,1,0
2016-09-02,0.0168,0.101333,0.801,0.098,0.0,197.78,-0.014893,-0.005522,0,0
2016-09-07,-0.4019,0.175,0.485,0.34,-1.0,201.7,-0.005522,-0.020873,-1,0


In [563]:
test2=test2.reset_index()
test2 = test2[['compound','SCORE','ACTION']]
data = test2.drop("ACTION", axis=1).dropna()
feature_names = data.columns
data.head()

KeyError: "['compound'] not in index"

In [560]:
data.columns

Index(['Date', 'SCORE'], dtype='object')

In [561]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [562]:
# Support vector machine linear classifier
from sklearn.svm import SVC 
model = SVC(kernel='sigmoid')
model.fit(X_train, y_train)

TypeError: float() argument must be a string or a number, not 'Timestamp'

In [539]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.517


In [540]:
# Calculate classification report
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))

              precision    recall  f1-score   support

    negative       0.50      0.24      0.32       126
    positive       0.52      0.78      0.62       135

    accuracy                           0.52       261
   macro avg       0.51      0.51      0.47       261
weighted avg       0.51      0.52      0.48       261

