In [396]:
import robin_stocks as r
import os
from datetime import datetime

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from dateutil.relativedelta import relativedelta

#Log in to Robinhood
robinhood_login = r.login('lkleinbrodt@gmail.com', 'RIPmamba24!robinhood')

#Get Alpaca APIs
with open('api_keys.txt') as api_file:
    api_keys = api_file.read().replace('\n', '').split(',')
    alpaca_api = {a.split('=')[0]: a.split('=')[1] for a in api_keys}


In [469]:
df = pd.read_csv('data/historical_data.csv')
df = df.dropna()
ori_df = df.copy()
dates = pd.to_datetime(df.pop('begins_at'))
#keep only the close price columns
df = df[[col for col in df.columns if 'close_price' in col]]
#df = df[df.columns[np.argsort(df.sum()).isin(range(25))]]
#keep only the funds with more than 10 unique value in the first year
#unique_counts = df.head(365).nunique()
#df = df[unique_counts.keys()[unique_counts>10].values]
df = df[['AAPL_close_price', 'AAXN_close_price']]
df = df.diff()
df = df[1:]
ori_df = ori_df[1:]
dates = dates[1:]

In [470]:
train_end= np.min(dates) + relativedelta(months = 44)
val_end = train_end + relativedelta(months = 10) #leaves 6 months to backtest

train_df = df[dates <= train_end]
val_df = df[(dates > train_end) & (dates <= val_end)]
test_df = df[dates > val_end]

In [471]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift, 
                 train_df=train_df, val_df=val_df, test_df=test_df,
                label_columns=None):
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        
        #Index the labels (and all columns)
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i,name in enumerate(train_df.columns)}
        
        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
        
    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])
    
    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns],
                axis = -1)
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])
        return inputs, labels
    
    def plot(self, plot_col, model=None, max_subplots=3):
        inputs, labels = self.example
        plt.figure(figsize=(12, 8))
        plot_col_index = self.column_indices[plot_col]
        max_n = min(max_subplots, len(inputs))
        for n in range(max_n):
            plt.subplot(3, 1, n+1)
            plt.ylabel(f'{plot_col} [normed]')
            plt.plot(self.input_indices, inputs[n, :, plot_col_index],
             label='Inputs', marker='.', zorder=-10)
            
            if self.label_columns:
                label_col_index = self.label_columns_indices.get(plot_col, None)
            else:
                label_col_index = plot_col_index
            
            if label_col_index is None:
                continue

            plt.scatter(self.label_indices, labels[n, :, label_col_index],
                        edgecolors='k', label='Labels', c='#2ca02c', s=64)
            if model is not None:
                predictions = model(inputs)
                plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                          marker='X', edgecolors='k', label='Predictions',
                          c='#ff7f0e', s=64)
            if n == 0:
                plt.legend()
        plt.xlabel('Time [d]')
    
    def make_dataset(self, data, shuffle = True):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=shuffle,
            batch_size=32
        )
        ds = ds.map(self.split_window)
        
        return ds
    
@property
def train(self):
    return self.make_dataset(self.train_df)

@property
def val(self):
    return self.make_dataset(self.val_df)

@property
def test(self):
    return self.make_dataset(self.test_df)

@property
def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
        # No example batch was found, so get one from the `.train` dataset
        result = next(iter(self.train))
        # And cache it for next time
        self._example = result
    return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example

In [443]:
MAX_EPOCHS = 50
def compile_and_fit(model, window, epochs = MAX_EPOCHS, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=patience,
                                                      mode='min')
    model.compile(loss=tf.losses.MeanSquaredError(), 
                  optimizer=tf.optimizers.Adam(),
                  metrics=[tf.metrics.MeanAbsoluteError()])
    
    history = model.fit(window.train, epochs = epochs,
                        validation_data=window.val,
                        #callbacks=[early_stopping]
                       )
    return history

In [481]:
CONV_WIDTH = 3
OUT_STEPS = 7
num_features = df.shape[1]

multi_window = WindowGenerator(input_width = 60, label_width = OUT_STEPS, shift = OUT_STEPS, label_columns=['AAXN_close_price'])

one_shot_lstm_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, lstm_units]
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(4, return_sequences=False),
    # Shape => [batch, out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS,
                          kernel_initializer=tf.initializers.zeros),
    # Shape => [batch, out_steps, features]
    #tf.keras.layers.Reshape([OUT_STEPS, 1])
])

multi_dense_model = tf.keras.Sequential([
    # Take the last time step.
    # Shape [batch, time, features] => [batch, 1, features]
    tf.keras.layers.Lambda(lambda x: x[:, -1:, :]),
    # Shape => [batch, 1, dense_units]
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation = 'relu'),
    # Shape => [batch, out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS,
                          kernel_initializer=tf.initializers.zeros),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, 1])
])

compile_and_fit(one_shot_lstm_model, multi_window, epochs = 25)
#compile_and_fit(multi_dense_model, multi_window, epochs = 25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7fb31703ce80>

In [445]:
class FeedBack(tf.keras.Model):
  def __init__(self, units, out_steps):
    super().__init__()
    self.out_steps = out_steps
    self.units = units
    self.lstm_cell = tf.keras.layers.LSTMCell(units)
    # Also wrap the LSTMCell in an RNN to simplify the `warmup` method.
    self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True)
    self.dense = tf.keras.layers.Dense(num_features)
    
def warmup(self, inputs):
  # inputs.shape => (batch, time, features)
  # x.shape => (batch, lstm_units)
  x, *state = self.lstm_rnn(inputs)

  # predictions.shape => (batch, features)
  prediction = self.dense(x)
  return prediction, state

FeedBack.warmup = warmup

def call(self, inputs, training=None):
  # Use a TensorArray to capture dynamically unrolled outputs.
  predictions = []
  # Initialize the lstm state
  prediction, state = self.warmup(inputs)

  # Insert the first prediction
  predictions.append(prediction)

  # Run the rest of the prediction steps
  for n in range(1, self.out_steps):
    # Use the last prediction as input.
    x = prediction
    # Execute one lstm step.
    x, state = self.lstm_cell(x, states=state,
                              training=training)
    # Convert the lstm output to a prediction.
    prediction = self.dense(x)
    # Add the prediction to the output
    predictions.append(prediction)

  # predictions.shape => (time, batch, features)
  predictions = tf.stack(predictions)
  # predictions.shape => (batch, time, features)
  predictions = tf.transpose(predictions, [1, 0, 2])
  return predictions

FeedBack.call = call
feedback_model = FeedBack(units=32, out_steps=OUT_STEPS)
history = compile_and_fit(feedback_model, multi_window, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [446]:
#n_inputs = multi_window.input_width
#n_features = train_df.shape[1]
#batch = np.array(test_df[-n_inputs:]).reshape((1, n_inputs, n_features))
#one_shot_lstm_model.predict(batch)[0,:,:][-1]

In [482]:
all_predictions = one_shot_lstm_model.predict(multi_window.make_dataset(df, shuffle = False))

In [483]:
seven_day_deltas = [0 for i in range(len(df) - len(all_predictions)-1)] + [np.max(np.cumsum(pred)) for pred in all_predictions] + [0]

In [484]:
starting_cash = 1000
eval_df = ori_df[['AAPL_close_price']].copy()
eval_df['SevenDayHigh'] = seven_day_deltas
cash_reserves = starting_cash
eval_df['Cash'] = 0
eval_df['Cost'] = 0
eval_df['Revenue'] = 0
eval_df['Shares'] = 0
eval_df['PortfolioValue'] = 0
eval_df = eval_df[dates > val_end]

In [485]:
for i in range(len(eval_df)-1):
    max_delta = eval_df['SevenDayHigh'].iloc[i]
    eval_df['Cash'].iloc[i] = cash_reserves
    if max_delta > 0:
        if eval_df['AAPL_close_price'].iloc[i] < cash_reserves:
            eval_df['Cost'].iloc[i] = eval_df['AAPL_close_price'].iloc[i]
            eval_df['Shares'].iloc[i+1] = eval_df['Shares'].iloc[i] + 1
            cash_reserves -= eval_df['AAPL_close_price'].iloc[i]
        else:
            eval_df['Shares'].iloc[i+1] = eval_df['Shares'].iloc[i]
    elif eval_df['Shares'].iloc[i] > 0:
        eval_df['Revenue'].iloc[i] = eval_df['AAPL_close_price'].iloc[i]
        eval_df['Shares'].iloc[i+1] = eval_df['Shares'].iloc[i] - 1
        cash_reserves += eval_df['AAPL_close_price'].iloc[i]
        
    eval_df['PortfolioValue'].iloc[i] = (eval_df['AAPL_close_price'].iloc[i] * eval_df['Shares'].iloc[i])


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [486]:
final_cost = np.sum(eval_df['Cost'])
final_revenue = np.sum(eval_df['Revenue'])
final_assets = eval_df.iloc[-2]['PortfolioValue']
final_cash = cash_reserves
final_return = final_assets + final_cash #- final_cost 

long_term_return = (eval_df['AAPL_close_price'].iloc[-2] / eval_df['AAPL_close_price'].iloc[0])

profit_score = ((final_return) / starting_cash)  / long_term_return

In [487]:
final_return / starting_cash

1.4039775

In [488]:
profit_score

0.8447517320734341