# Single Model Monetization - Part 2
### CSCI 4210 - Simulation and Modeling
#### Matthew Wicker

In this notebook, we will reload our pretrained model. Then, we will attempt to exploit this models prediction to maximize profits (or to minimize losses!).
### Getting started - Clone the DeepMarketModels Repository:

#### • From your terminal run the following command: git clone https://github.com/matthewwicker/DeepMarketModels.git
 

In [1]:
import sys
sys.path.append('/usr/local/lib/python2.7/site-packages')

import csv
import numpy as np
from __future__ import print_function
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.cross_validation import  train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (16, 10)
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import os


#===========================================================
#          We have set these for reproducibility
#===========================================================

np.random.seed(5)
look_back = 7

#===========================================================
#      Constants which define the stock you want to use
#===========================================================

# Here is the path to your finacial data from Yahoo:
# Note: Replace NVIDIA stock. Not enough data on recent trends
STOCK_TO_USE = "AAPL"
DATA_PATH = "FinData/" + STOCK_TO_USE + ".csv"
MODEL_JSON_PATH = "FinModels/" + STOCK_TO_USE + ".json"
MODEL_WEIGHT_PATH = "FinModels/" + STOCK_TO_USE + ".h5"

Using TensorFlow backend.


Populating the interactive namespace from numpy and matplotlib




## Load in the data that we will be using

This is the same as in the previous notebook

In [None]:
# Here are the basic values of each stock that we have information on
class date:
    def __init__(self):
        self.date = "NaN"
        self.open = 0.0
        self.high = 0.0
        self.low = 0.0
        self.close = 0.0
        self.adj = 0.0
        self.volume = 0.0
    def reinit(self, date, op, high, low, close, adj, vol):
        self.date = date
        self.open = float(op)
        self.high = float(high)
        self.low = float(low)
        self.close = float(close)
        self.adj = float(adj)
        self.volume = float(vol)
        
data_entries = []
print ("Opening File: %s"%(DATA_PATH))
with open(DATA_PATH, 'rb') as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
        if('null' in row): continue;
        if(i == 0): continue;
        temp_date = date()
        temp_date.reinit(row[0], row[1], row[2], row[3], row[4], row[5], row[6])
        data_entries.append(temp_date)
        
dates_of_data = np.asarray([i.date for i in data_entries])
open_of_data = np.asarray([i.open for i in data_entries])
close_of_data = np.asarray([i.close for i in data_entries])
high_of_data = np.asarray([i.high for i in data_entries])
low_of_data = np.asarray([i.low for i in data_entries])

dates_of_data = dates_of_data[-2500:]
open_of_data = open_of_data[-2500:]
close_of_data = close_of_data[-2500:]
high_of_data = high_of_data[-2500:]
low_of_data = low_of_data[-2500:]


print("Done reading file")

Opening File: FinData/AAPL.csv
Done reading file


## Load in the pre-trained model

In [None]:
layers = [1,look_back,1]
model = Sequential()
d = 0.1
model = Sequential()    
model.add(LSTM(32, input_shape=(layers[1], layers[0]), return_sequences=False))
model.add(Dropout(d))
        
model.add(Dense(4,kernel_initializer="uniform",activation='relu'))        
model.add(Dense(1,kernel_initializer="uniform",activation='linear'))

model.load_weights(MODEL_WEIGHT_PATH)
model.summary()

## Test - Train Split (just as before)

In [None]:
stock_prices = open_of_data
stock_prices = stock_prices.reshape(len(stock_prices), 1)


# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
stock_prices = scaler.fit_transform(stock_prices)

train_size = int(len(stock_prices) * 0.95)
test_size = len(stock_prices) - train_size
train, test = stock_prices[0:train_size,:], stock_prices[train_size:len(stock_prices),:]

print('Split data into training set and test set... Number of training samples/ test samples:', len(train), len(test))

def create_dataset(dataset, look_back):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# convert stock price data into time series dataset
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

# reshape input of the LSTM to be format [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))


# Here is where some things are different: 
testX_E = testX[:100]
testX_P = testX[:-25]


print("Done with split")

## Create a relevent measure of our data based on some unseen data

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# invert predictions and targets to unscaled
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

# calculate root mean squared error
trainY = trainY.reshape(len(trainY[0]), 1)
testY = testY.reshape(len(testY[0]), 1)
trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))


# shift predictions of training data for plotting
trainPredictPlot = np.empty_like(stock_prices)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift predictions of test data for plotting
testPredictPlot = np.empty_like(stock_prices)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(stock_prices)-1, :] = testPredict

In [None]:
# plot baseline and predictions
plt.clf()
plt.title("Results of our Prediction for %s Stock Prices [Close]"%(STOCK_TO_USE))
plt.xlabel("Day")
plt.ylabel("Price")
test_size = 125

real = close_of_data[-test_size:]
pred = testPredictPlot[-test_size:]

c, = plt.plot(real[:100], label='Close Price',alpha=0.3)
predict_test, = plt.plot(pred[:100], label='LSTM Prediction on Unseen Data', color='r')
plt.legend(handles=[c, predict_test])
plt.show()

In [None]:
#objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp')
cod = real[:100]
lstmp = pred[:100]
performance = [lstmp[i] - cod[i] for i in range(100)]
y_pos = np.arange(len(performance))

cmap = matplotlib.cm.get_cmap('Spectral_r')
max_val = 50
colors = []
for i in performance:
    color_val = cmap(abs(i)/max_val)
    hex_col = '#%02x%02x%02x' % (color_val[0][0]*255, color_val[0][1]*255, color_val[0][2]*255)
    colors.append(hex_col)


    
plt.bar(y_pos, performance, align='center', alpha=0.5,width=1.0, color=colors)
plt.ylabel('Difference in USD ($)')
plt.title('Prediction Accuracy - Difference between Predicted Close and Real Close - %s'%(STOCK_TO_USE))
plt.xlabel('Day')

plt.show()

max_val += 1
pylab.rcParams['figure.figsize'] = (16, 1)
plt.clf()
ones = np.ones(max_val)
color_val = [cmap(float(i)/max_val)for i in range(max_val)]
plt.bar(np.arange(max_val), ones, align='center', alpha=0.5,width=1.0, color=color_val)
plt.ylim([0,1])
plt.xlim([0,50])
plt.title("Chart for Difference Plot Color per USD ($)")
plt.show()
pylab.rcParams['figure.figsize'] = (16, 10)

# Turning the visuals into something useful:

In [None]:
# First lets seperate the negative values from the 
# positive values so we can get a sense of if we 
# are over predicting or under predicting
cmap = matplotlib.cm.get_cmap('hot_r')
cod = real[:100]
lstmp = pred[:100]

negs = [lstmp[i] - cod[i] for i in range(100) if((lstmp[i] - cod[i]) < 0)]
poss = [lstmp[i] - cod[i] for i in range(100) if((lstmp[i] - cod[i]) > 0)]

negs = np.asarray(negs)
poss = np.asarray(poss)

print("Number of overshoots: %s Average Overshoot Val: %s"%(len(poss), np.average(poss)))
print("Number of undershoots: %s Average Undershoot Val: %s"%(len(negs), np.average(negs)))
# After getting the trend of over or under predicting
# we will figure out the average overshoot and 
# undershoot

real = close_of_data[-test_size:]
pred = testPredictPlot[-test_size:]

overshoot =  np.average(poss)
undershoot =  np.average(negs)
naive_bet = (overshoot+undershoot)/2

x_vals = np.arange(25)

pred_ub = [i-overshoot for i in pred[-25:]]
pred_lb = [i-undershoot for i in pred[-25:]]
pred_b = [i-naive_bet for i in pred[-25:]]
r = plt.scatter(x_vals, pred[-25:], label="Predicted Values", alpha=0.3, color=cmap(0.6))
l = plt.scatter(x_vals, pred_ub, label="Predicted Stock Lower Bound", alpha=0.3, color=cmap(0.4))
u = plt.scatter(x_vals, pred_lb, label="Predicted Stock Upper Bound", alpha=0.3, color=cmap(1.0))
b = plt.errorbar(x_vals, pred_b, yerr=overshoot, label="Value to Bet On", alpha=0.2, color=cmap(0.8))
b = plt.errorbar(x_vals, pred_b, yerr=undershoot, label="Value to Bet On", alpha=0.2, color=cmap(0.8))
o, = plt.plot(real[-25:], label='Close Price',alpha=0.3)
plt.legend(handles=[r,l,u,b,o])
plt.show()

## Using under and overshoot to trade this stock

* If we predict that the current price will certainly go up (even under lower bound conditions) then we want a strong  investment

* If we predict that the current price may go up on the average case we will make a relatively strong investment

* If we predict that the stock may go up, but has a higher chance of going down then we invest very weakly


In [None]:
data = real[-25:]
vals = pred_b

investment_rec = []


#This strategy constant will determine how aggressive the investment strategy is. 
# 10 - conservative 
# 20 - optimistic
# 30 - aggressive
# 40 - insanity
STRATEGY_CONSTANT = 10

for i in range(len(data)):
    if(i == len(data)-1):
        break
    if(data[i] < pred_ub[i+1]):
        inv = STRATEGY_CONSTANT*(1 - data[i]/pred_lb[i+1])
        investment_rec.append(inv)
        continue
    elif(data[i] < pred_b[i+1]):
        inv = (STRATEGY_CONSTANT/2)*(1 - data[i]/pred_lb[i+1])
        investment_rec.append(inv)
    elif(data[i] < pred_lb[i+1]):
        inv = 1 - data[i]/pred_lb[i+1]
        investment_rec.append(inv)
    else:
        investment_rec.append(0)
        
for i in range(len(investment_rec)):
    if(investment_rec[i] > 1):
        investment_rec[i] = 1

x = np.arange(len(investment_rec))  
plt.title("Investment Strategy Profile")
plt.xlabel("Day")
plt.ylabel("Amount of Original Amount to Invest in this Stock")
plt.bar(x, investment_rec, alpha=0.2, color='b')
plt.show()

# So how much money do we make (or lose)?

Now that we have the amounts that we will invest based on our confidence in the model, let's see what the payoff would be for following this strategy

In [None]:
TOTAL_AMOUNT = float(100)

inv_perf = []

for i in range(len(data)):
    if(i == len(data)-1):
        break
    total_invested = TOTAL_AMOUNT * investment_rec[i]
    #print("Investing: ", total_invested)
    TOTAL_AMOUNT-=total_invested
    returned = total_invested*float(data[i+1]/data[i])
    #print("Return: ", returned)
    TOTAL_AMOUNT+=returned
    inv_perf.append(returned-total_invested)
zero = np.zeros(len(inv_perf))
print("Total Percent Gain with this Investment Strategy: %s"%((TOTAL_AMOUNT-100)[0]))
plt.title("Percent Profit and Loss per Day")
plt.xlabel("Day of Investment")
plt.ylabel("Percent Return")
k, = plt.plot(inv_perf, label="Cumulative Gain/Loss: %.3f (Percent)"%((TOTAL_AMOUNT-100)[0]))
z, = plt.plot(zero, label="Zero Gain/Loss Line", color='r', alpha=0.2)
plt.legend(handles=[k,z])
plt.show()
   

# How do we do over all of our models?

Here, we plot a bar chart which tells us the gain and loss of our naive investment strategy on each of the market models we trained

In [None]:
performance = [3.572, -1.146, -0.229, 0.056, -0.643, 0.032, -0.252, -0.340, 0.128, 0.001, 0.075, 0.263, -4, 0.312, 1.949, -0.108, 0.658, 0.126, -0.329]          
performance = sort(performance)
sum_p = sum(performance)

colors = []
for i in performance:
    if(i < 0):
        colors.append('r')
    else:
        colors.append('g')

x = np.arange(len(performance)) 
plt.title("Total performance of all models in percentage")
plt.xlabel("Model")
plt.ylabel("Performance (Percent)")
b = plt.bar(x, performance, color=colors, alpha=0.3, label="Total Gain: +%.3f"%(sum_p) )
plt.legend(handles=[b])
plt.show()