In [1]:
# Import the necessary modules
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import linear_model
from sklearn import preprocessing
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn import ensemble
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score

  from pandas.core import datetools


In [2]:
AAPL = pd.read_csv('D:\\Data\\sandp500\\AAPL_data.csv')

In [3]:
AAPL.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name
1253,2017-08-07,157.06,158.92,156.67,158.81,21870321,AAPL
1254,2017-08-08,158.6,161.83,158.27,160.08,36205896,AAPL
1255,2017-08-09,159.26,161.27,159.11,161.06,26131530,AAPL
1256,2017-08-10,159.9,160.0,154.63,155.32,40804273,AAPL
1257,2017-08-11,156.6,158.57,156.07,157.48,26257096,AAPL


In [4]:
# The momentum will show how many days in a row the stock has moved up or down. 

# Create a list to store the momentum
momentum = [0]
i=1
# Calculate the momentums and store them in the new column, 'Momentum'
for row in AAPL['Close']:
    if i < len(AAPL):
        if AAPL.Close[i] >= AAPL.Close[i-1]:
            momentum.append(+1)
            i = i+1
        elif AAPL.Close[i] < AAPL.Close[i-1]:
            momentum.append(-1)
            i = i+1
AAPL['Momentum'] = momentum
AAPL.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name,Momentum
0,2012-08-13,89.06,90.0,89.04,90.0,69707463,AAPL,0
1,2012-08-14,90.27,91.23,90.03,90.24,85041824,AAPL,1
2,2012-08-15,90.19,90.57,89.68,90.12,64377278,AAPL,-1
3,2012-08-16,90.17,90.97,90.07,90.91,63694204,AAPL,1
4,2012-08-17,91.43,92.6,91.26,92.59,110689894,AAPL,1


In [5]:
AAPL.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name,Momentum
1253,2017-08-07,157.06,158.92,156.67,158.81,21870321,AAPL,1
1254,2017-08-08,158.6,161.83,158.27,160.08,36205896,AAPL,1
1255,2017-08-09,159.26,161.27,159.11,161.06,26131530,AAPL,1
1256,2017-08-10,159.9,160.0,154.63,155.32,40804273,AAPL,-1
1257,2017-08-11,156.6,158.57,156.07,157.48,26257096,AAPL,1


In [6]:
# Drop the last row to get rid of the NaN values
AAPL = AAPL.iloc[:len(AAPL)-1,:]

In [7]:
streak = [0] * len(AAPL)
i=1
# Calculate the streaks and store them in the new column, 'Streak'
for row in AAPL['Close']:
    if i < len(AAPL):
        if AAPL.Close[i] > AAPL.Close[i-1]:
            if streak[i-1] >= 0:
                streak[i] = streak[i-1]+1
                i = i+1
            else:
                streak[i]=0
                i = i+1
        elif AAPL.Close[i] < AAPL.Close[i-1]:
            if streak[i-1] <= 0:
                streak[i] = streak[i-1]-1
                i = i+1
            else:
                streak[i]=0
                i = i+1

AAPL['Streak'] = streak
AAPL.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name,Momentum,Streak
0,2012-08-13,89.06,90.0,89.04,90.0,69707463,AAPL,0,0
1,2012-08-14,90.27,91.23,90.03,90.24,85041824,AAPL,1,1
2,2012-08-15,90.19,90.57,89.68,90.12,64377278,AAPL,-1,0
3,2012-08-16,90.17,90.97,90.07,90.91,63694204,AAPL,1,1
4,2012-08-17,91.43,92.6,91.26,92.59,110689894,AAPL,1,2


In [8]:
# Create a 'Future Momentum' feature that the model will attempt to predict.
AAPL['Future Momentum'] = AAPL.Momentum.shift(-1)
AAPL.head()# Drop the last row to get rid of the NaN values
AAPL = AAPL.iloc[:len(AAPL)-1,:]
AAPL.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name,Momentum,Streak,Future Momentum
1251,2017-08-03,157.05,157.21,155.02,155.57,27097296,AAPL,-1,0,1.0
1252,2017-08-04,156.07,157.4,155.69,156.39,20559852,AAPL,1,0,1.0
1253,2017-08-07,157.06,158.92,156.67,158.81,21870321,AAPL,1,0,1.0
1254,2017-08-08,158.6,161.83,158.27,160.08,36205896,AAPL,1,0,1.0
1255,2017-08-09,159.26,161.27,159.11,161.06,26131530,AAPL,1,0,-1.0


In [9]:
# Set the variables. 
# Use the closing value for Y
# Use the new features for X
Y = AAPL['Future Momentum']
X = AAPL[['Close', 'Volume', 'Momentum', 'Streak']]

# Create training and test sets.
offset = int(X.shape[0] * 0.8)

# Put 90% of the data in the training set.
X_train, Y_train = X[:offset], Y[:offset]

# And put 10% in the test set.
X_test, Y_test = X[offset:], Y[offset:]

In [10]:
# Create a cash_available list that stores how much cash is available 
# to buy the stocks. It will be $1000 every ten business days.
cash_available = [0]

# Start out with 0 stocks_owned
stocks_owned = [0]

# Create DataFrame for profits
profits = {'Model':[],'Profit':[]}
columns = profits.keys()
model_profits = pd.DataFrame(data=profits, columns=columns)

In [11]:
# Create a pred_return function to calculate the returns of the prediction models
def pred_return(y_pred, data, cash_available, stocks_owned):
    # Create DataFrame for values
    new_values = {'Stocks':[],'Cash':[]}
    columns = new_values.keys()
    values = pd.DataFrame(data=new_values, columns=columns)
    for i in range(len(y_pred)):
        # For every tenth iteration of i, add 1000 to cash_available
        if i%10 == 0:
            cash_available = cash_available+1000
        # If the predicted value is greater than zero, buy more stock
        if y_pred[i] > 0:
            [stocks_owned,cash_available] = buy_stock(cash_available, 
                                                      stocks_owned, y_pred, 
                                                      data.Close[i])
        # If the predicted value is less than zero, sell stock
        elif y_pred[i] < 0:
            [stocks_owned,cash_available] = sell_stock(cash_available, 
                                                      stocks_owned, y_pred, 
                                                      data.Close[i])
        stocks_owned = [stocks_owned,cash_available][0]
        cash_available = [stocks_owned,cash_available][1]
        new_values = {'Stocks':[stocks_owned], 'Cash':[cash_available]}
        values = values.append(pd.DataFrame(data=new_values, columns=
                                            new_values.keys()), 
                                            ignore_index=True)
    #print(values,y_pred)
    return(values)

In [12]:
# Create a buy_stock function that buys as many stocks as can be afforded
def buy_stock(cash_available, stocks_owned, y_pred, value):
    # Set number of stocks to buy
    num_stocks_buy = int(cash_available/value)
    # Subtract from cash_available, store it in a list
    cash_available = cash_available-num_stocks_buy*value
    stocks_owned = stocks_owned+num_stocks_buy
    return(stocks_owned, cash_available)

In [13]:
# Create a sell_stock function that sells all stocks
def sell_stock(cash_available, stocks_owned, y_pred, value):
    sell_value = stocks_owned*value
    cash_available = cash_available + sell_value
    stocks_owned = 0
    return(stocks_owned, cash_available)

In [14]:
# Set up the Ridge Regression
ridgeregr = linear_model.Ridge(alpha=10, fit_intercept=False) 
# Run the pred_return function for ridge
y_pred = ridgeregr.fit(X_train,Y_train).predict(X_test)
cash_available = 0
stocks_owned = 0
values = pred_return(y_pred, AAPL, cash_available, stocks_owned)

In [15]:
# Calculate the ending returns from the lr model
ridgeregr_returns = values.loc[len(values)-1,'Stocks']*AAPL.loc[len(y_pred)-1,'Close'] + values.loc[len(values)-1, 'Cash']
print(ridgeregr_returns)

22717.14


In [16]:
# 1255 rows of data
# $1000 granted every 10 days = 125,500 dollars granted.
# Since we are predicting the last 20% of the data, multiply by .20 
# A total of $25,100 granted in the last 20% of the data
# Get total profit by subrtracting returns by dollars granted. 
# Get total profit by subrtracting returns by dollars granted.
ridgeregr_profit = ridgeregr_returns - 25100
print(ridgeregr_profit)

# Store the profit in the model_profits table
profits = {'Model':['Ridge Regression'], 'Profit':ridgeregr_profit}
model_profits = model_profits.append(pd.DataFrame(data=profits, columns=profits.keys()), ignore_index=True)

-2382.86


In [17]:
# Create the 401k approach simulator, safe_invest, 
# That buys as much stock as is available every two weeks / ten business days.
def safe_invest(data, cash_available, stocks_owned):
    # Create DataFrame for values
    new_values = {'Stocks':[],'Cash':[]}
    columns = new_values.keys()
    values = pd.DataFrame(data=new_values, columns=columns)
    for i in range(len(data.loc[offset:,:])):
        # For every tenth iteration of i, add 1000 to cash_available
        if i%10 == 0:
            cash_available = cash_available+1000
        num_stocks_buy = int(cash_available/data.Close[i])
        stocks_owned = num_stocks_buy + stocks_owned
        cash_available = cash_available-num_stocks_buy*data.Close[i]
        # Store the values in a table
        new_values = {'Stocks':[stocks_owned], 'Cash':[cash_available]}
        values = values.append(pd.DataFrame(data=new_values, columns=
                                            new_values.keys()), 
                                            ignore_index=True)

    #print(values)
    return(values)

In [18]:
# Run the safe_invest function
cash_available = 0
stocks_owned = 0
values = safe_invest(AAPL, cash_available, stocks_owned)

In [19]:
# Calculate the ending returns from the safe_invest function
safe_returns = values.loc[len(values)-1,'Stocks']*AAPL.loc[len(AAPL.loc[offset:,:])-1,'Close'] + values.loc[len(values)-1, 'Cash']
print(safe_returns)

26187.33


In [20]:
# 1255 rows of data
# $1000 granted every 10 days = 125,500 dollars granted.
# Since we are predicting the last 20% of the data, multiply by .20 
# A total of $25,100 granted in the last 20% of the data
# Get total profit by subrtracting returns by dollars granted. 
# Get total profit by subrtracting returns by dollars granted. 
safe_profit = safe_returns - 25100
print(safe_profit)

# Store the profit in the model_profits table
profits = {'Model':['401k Simulator'], 'Profit':safe_profit}
model_profits = model_profits.append(pd.DataFrame(data=profits, columns=profits.keys()), ignore_index=True)

1087.33
