In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statistics as stat

import os
import logging
import math
import itertools

import scipy.stats
import typing
import random
import time
from datetime import datetime

import tensorflow as tf
from sklearn.cross_validation import train_test_split

import softops
import tensorboard

logging.basicConfig(level=logging.DEBUG)

In [None]:
class TradingState:
    def __init__(self, money: tf.Tensor, stocks: tf.Tensor, stock_prices: tf.Tensor, inner_state: typing.Any = None, money_value: tf.Tensor = None, stocks_value: tf.Tensor = None, account_value: tf.Tensor = None):
        self.money = tf.identity(money, name='money')
        self.stocks = tf.identity(stocks, name='stocks')
        self.stock_prices = tf.identity(stock_prices, name='stock_prices')
        self.inner_state = inner_state

        if money_value is not None:
            self.money_value = tf.identity(money_value, name='money_value')
        else:
            self.money_value = tf.reduce_sum(self.money, axis=1, name='money_value')
            
        if stocks_value is not None:
            self.stocks_value = tf.identity(stocks_value, name='stocks_value')
        else:
            self.stocks_value = tf.reduce_sum(self.stocks*self.stock_prices, axis=1, name='stocks_value')
            
        if account_value is not None:
            self.account_value = tf.identity(account_value, name='account_value')
        else:
            self.account_value = tf.add(self.money_value, self.stocks_value, name='account_value')

    def copy(self, **kwargs):
        args = dict(
            money = self.money,
            stocks = self.stocks,
            stock_prices = self.stock_prices,
            inner_state = self.inner_state,
            money_value = self.money_value,
            stocks_value = self.stocks_value,
            account_value = self.account_value)
        args.update(kwargs)
        return TradingState(**args)

In [None]:
FEATURE_LOW    = 0
FEATURE_HIGH   = 1
FEATURE_OPEN   = 2
FEATURE_CLOSE  = 3
FEATURE_VOLUME = 4

RESPONSE_BUY_PRICE   = 0
RESPONSE_BUY_AMOUNT  = 1
RESPONSE_SELL_LOW    = 2
RESPONSE_SELL_HIGH   = 3

MONEY_TODAY = 0
MONEY_TOMORROW = 1

SELL_TRANSACTION_COST = 5 # 5 USD per transaction
BUY_TRANSACTION_COST = 5  # 5 USD per transaction

def trader_builder(historic_window_size, trainable_variables_node):    
    with trainable_variables_node:
        coef_buy_price  = tf.Variable(tf.zeros([historic_window_size, 4]), name="buy_coefs")
        coef_sell_price = tf.Variable(tf.zeros([historic_window_size, 4]), name="sell_coefs")
        coef_bias = tf.Variable([-1.,+1.], name="biases")
        
    first_inner_state = tf.constant([1,2,3])
        
    def build_trader(state: TradingState, historic_data: tf.Tensor, inner_state: typing.Any) -> tf.Tensor:
        """
        Builds the trader network on tensorflow

        Arguments:
        - state: Trading state coming from the previous day
        - historic_data: tensor[minibatch, time, company] -> [low, high, open, close]

        Returns: 
        tensor[minibatch, company] -> [buy_price, buy_amount, sell_low_price, sell_high_price]
        - buy_price: The price for which you want to buy stocks
        - buy_amount: Fraction of the cash available to be used to buy stocks from this company (If the price target is reached)
        - sell_high_price: If the stock price reaches more than this amount, all of the owned stocks will be sold (Ideal case, sell when the price is high)
        - sell_low_price: If the stock price reaches less than this amount, all of the owned stocks will also be sold (Fucked up case: Prices are crashing, minimize losses)
        """
        with tf.name_scope('normalize_input'):
            normalize_factor = tf.concat(
                [
                    tf.expand_dims(historic_data[:, 0, :, FEATURE_OPEN ], 1),
                    historic_data[:, :-1, :, FEATURE_CLOSE],
                ],
                axis = 1,
                name = 'factor'
            )
            normalized_history = tf.multiply(
                100., 
                tf.log( historic_data[:,:,:,:-1] / normalize_factor[:, :, :, tf.newaxis]), 
                name='normalized_history'
            )
        
        with tf.name_scope('buy_price'):
            buy_price = coef_bias[0] + tf.reduce_sum(normalized_history * coef_buy_price[tf.newaxis, :, tf.newaxis, :], axis=[1,3])
        with tf.name_scope('buy_amount'):
            buy_amount = tf.fill(tf.shape(buy_price), value=0.1)
        with tf.name_scope('sell_low_price'):
            sell_low_price = tf.fill(tf.shape(buy_price), value=-9999.)  # Never!
        with tf.name_scope('sell_high_price'):
            sell_high_price = coef_bias[1] + tf.reduce_sum(normalized_history * coef_sell_price[tf.newaxis, :, tf.newaxis, :], axis=[1,3])

        with tf.name_scope('denormalize_prices'):
            def denorm(x, name=None):
                return tf.multiply(tf.exp(x / 100), historic_data[:, -1, :, FEATURE_CLOSE], name = name)
            buy_price = denorm(buy_price, 'buy_price')
            sell_low_price = denorm(sell_low_price, 'sell_low_price')
            sell_high_price = denorm(sell_high_price, 'sell_high_price')
            
        with tf.name_scope('result'):
            result = tf.stack([buy_price, buy_amount, sell_low_price, sell_high_price], axis=2)
            return (result, inner_state)
        
    return build_trader, first_inner_state

In [None]:
def build_env_step(build_trader, state: TradingState, historic_data: tf.Tensor, next_day_data: tf.Tensor) -> TradingState:
    """
    Builds a single-day trading environment.
    Arguments:
    - state: Trading state coming from the previous day
    - historic_data: tensor[minibatch, time, company] -> [low, high, open, close]
    - next_day_data: tensor[minibatch, company] -> [low, high, open, close]

    
    Returns: The next state, computed at the end of the next trading day
    """   
    with tf.name_scope("trader"):
        trader, new_inner_state = build_trader(state = state, historic_data = historic_data, inner_state = state.inner_state)
       
        
    with tf.name_scope("simulator"):
        softness = 1
        
        with tf.name_scope("slicing"):
            buy_price       = trader[:,:, RESPONSE_BUY_PRICE]
            buy_amount      = trader[:,:, RESPONSE_BUY_AMOUNT]
            sell_low_price  = trader[:,:, RESPONSE_SELL_LOW]
            sell_high_price = trader[:,:, RESPONSE_SELL_HIGH]
            current_money   = state.money[:, MONEY_TODAY]
            current_stocks  = state.stocks
            eod_stock_prices = next_day_data[:, :, FEATURE_CLOSE]

        # Executes SELL transactions if prices reaches BELOW a threshold (minimize losses)            
        # (If the day opens below the threshold for LOW SELL, use the open price)
        with tf.name_scope("sell_low"):
            sell_low_price = tf.identity(
                sell_low_price,
                name='order_price')
            sell_low_price = tf.minimum(
                next_day_data[:, :, FEATURE_OPEN],
                sell_low_price, 
                name = 'actual_price')
            sell_low_amount = tf.identity(
                current_stocks,
                name = 'order_amount')
            sell_low_kernel = tf.multiply(
                softops.gte(sell_low_price, next_day_data[:, :, FEATURE_LOW], percent = True, softness = softness),
                softops.positive(sell_low_amount),
                name = 'order_executed')
                
            sell_low_stock_bough = tf.zeros(tf.shape(sell_low_kernel), name='stock_bought')
            sell_low_stock_sold = tf.multiply(sell_low_kernel, sell_low_amount, name='stock_sold')
            money_earned_sell_low = tf.reduce_sum(sell_low_stock_sold * sell_low_price, axis=1, name='money_earned')
            money_spent_sell_low = tf.reduce_sum(sell_low_kernel * SELL_TRANSACTION_COST, axis=1, name='money_spent')

        # Executes SELL transactions if prices reaches ABOVE a threshold (Maximize gains)
        with tf.name_scope("sell_high"):
            sell_high_price = tf.identity(
                sell_high_price,
                name='order_price')
            sell_high_price = tf.identity(
                sell_high_price,
                name='actual_price')
            sell_high_amount = tf.identity(
                current_stocks - sell_low_stock_sold,
                name = 'order_amount')
            sell_high_kernel = tf.multiply(
                softops.lte(sell_high_price, next_day_data[:, :, FEATURE_HIGH], percent = True, softness = softness),
                softops.positive(sell_high_amount),
                name = 'order_executed')
                
            sell_high_stock_bough = tf.zeros(tf.shape(sell_high_kernel), name='stock_bought')
            sell_high_stock_sold = tf.multiply(sell_high_kernel, sell_high_amount, name='stock_sold')
            money_earned_sell_high = tf.reduce_sum(sell_high_stock_sold * sell_high_price, axis=1, name='money_earned')
            money_spent_sell_high = tf.reduce_sum(sell_high_kernel * SELL_TRANSACTION_COST, axis=1, name='money_spent')

        # Executes BUY transactions if prices reaches above a threshold
        # (Normalize `buy_amount` to be in amount of stocks, instead of fraction of my money)
        with tf.name_scope("buy"):
            buy_price = tf.identity(
                buy_price,
                name = 'order_price')
            buy_price = tf.identity(
                buy_price,
                name = 'actual_price')
            buy_amount = softops.floor(
                buy_amount * current_money[:, tf.newaxis] / buy_price,
                name = 'order_amount')
            buy_kernel = tf.multiply(
                softops.gte(buy_price, next_day_data[:, :, FEATURE_LOW], percent = True, softness = softness),
                softops.positive(buy_amount),
                name='kernel')
                
            buy_stock_bough = tf.multiply(buy_kernel, buy_amount, name='stock_sold')
            buy_stock_sold = tf.zeros(tf.shape(buy_kernel), name='stock_bought')
            money_earned_buy = tf.zeros(tf.shape(current_money), name='money_earned')
            money_spent_buy = tf.reduce_sum(buy_stock_bough * buy_price + buy_kernel * BUY_TRANSACTION_COST, axis=1, name='money_spent')

        with tf.name_scope("update_money"):           
            total_money_earned = money_earned_sell_high + money_earned_sell_low + money_earned_buy
            total_money_spent = money_spent_sell_low + money_spent_sell_high + money_spent_buy
            new_money_first = tf.expand_dims(current_money - total_money_spent + state.money[:, MONEY_TOMORROW], axis = 1)
            new_money_middle = state.money[:, 2:]
            new_money_last = tf.expand_dims(total_money_earned, axis = 1)
            
            next_money = tf.concat([
                new_money_first,
                new_money_middle,
                new_money_last
            ], axis = 1)

            with tf.name_scope("money_earned"):
                tf.summary.histogram('values', total_money_earned)
                tf.summary.scalar('mean', tf.reduce_mean(total_money_earned))
            with tf.name_scope("money_spent"):
                tf.summary.histogram('values', total_money_spent)
                tf.summary.scalar('mean', tf.reduce_mean(total_money_spent))
            with tf.name_scope("money_delta"):
                tf.summary.histogram('values', total_money_earned - total_money_spent)
                tf.summary.scalar('mean', tf.reduce_mean(total_money_earned - total_money_spent))
            with tf.name_scope("money_final"):
                tf.summary.histogram('values', tf.reduce_sum(next_money, axis=1))
                tf.summary.scalar('mean', tf.reduce_mean(tf.reduce_sum(next_money, axis=1)))

        with tf.name_scope("update_stocks"):
            total_stocks_bought = sell_low_stock_bough + sell_high_stock_bough + buy_stock_bough
            total_stocks_sold = sell_low_stock_sold + sell_high_stock_sold + buy_stock_sold
            next_stocks = current_stocks + total_stocks_bought - total_stocks_sold
            next_stocks_value = tf.reduce_sum(next_stocks * eod_stock_prices, axis=1)
            
#             with tf.name_scope("stocks_bought"):
#                 tf.summary.histogram('values', total_stocks_bought)
#                 tf.summary.scalar('mean', tf.reduce_mean(total_stocks_bought))
#             with tf.name_scope("stocks_sold"):
#                 tf.summary.histogram('values', total_stocks_sold)
#                 tf.summary.scalar('mean', tf.reduce_mean(total_stocks_sold))
#             with tf.name_scope("stocks_delta"):
#                 tf.summary.histogram('values', total_stocks_bought - total_stocks_sold)
#                 tf.summary.scalar('mean', tf.reduce_mean(total_stocks_bought - total_stocks_sold))
#             with tf.name_scope("stocks_final"):
#                 tf.summary.histogram('values', next_stocks)
#                 tf.summary.scalar('mean', tf.reduce_mean(next_stocks))
            with tf.name_scope("stocks_value_final"):
                tf.summary.histogram('values', next_stocks_value)
                tf.summary.scalar('mean', tf.reduce_mean(next_stocks_value))
            with tf.name_scope("stocks_value_delta"):
                tf.summary.histogram('values', next_stocks_value - state.stocks_value)
                tf.summary.scalar('mean', tf.reduce_mean(next_stocks_value - state.stocks_value))
    
        return dict(
            money = next_money,
            stocks = next_stocks,
            stock_prices = eod_stock_prices,
            stocks_value= next_stocks_value,
            inner_state = new_inner_state
        )
        

In [None]:
def build_env(trainable_variables_node, historic_data, initial_state, num_days, historic_window_size):    
    build_trader, first_inner_state = trader_builder(historic_window_size, trainable_variables_node)
    
    with tf.name_scope(f'state_0'):
        current_state = TradingState(inner_state = first_inner_state, **initial_state)
    all_states = [current_state]
    
        
    for i in range(num_days):
        #print(f'day_{i}')
        with tf.name_scope(f'day_{i}'):
            with tf.name_scope(f'historic_data'):
                historic_slice = historic_data[:, i:i+historic_window_size, : ,:]
            with tf.name_scope(f'next_day_data'):
                next_day_slice = historic_data[:, i+historic_window_size, : ,:]

            current_state = build_env_step(
                build_trader = build_trader,
                state = current_state,
                historic_data = historic_slice,
                next_day_data = next_day_slice
            )
            
        with tf.name_scope(f'state_{i+1}'):
            current_state = TradingState(**current_state)

        all_states.append(current_state)
    return all_states

In [None]:
def build_graph(warmup_days = 5, evaluated_days = 20, historic_window_size = 5, money_settle_time = 3, check_numerics = False) -> tf.Graph:
    total_days = evaluated_days + warmup_days + historic_window_size
    
    graph = tf.Graph()
    with graph.as_default():        
        trainable_variables_node = tf.name_scope("trainable_parameters")
        iteration = tf.Variable(0, name='iteration', dtype=tf.int32, expected_shape=())
                    
        with tf.name_scope("inputs"):
            stock_history = tf.placeholder(tf.float32, shape=(None, total_days, None, 5), name="stock_history")

            minibatch_size = tf.shape(stock_history)[0]
            num_companies = tf.shape(stock_history)[2]    
            default_initial_stocks = tf.zeros([minibatch_size, num_companies], name = 'default_initial_stocks')
            initial_stocks = tf.placeholder_with_default(default_initial_stocks, shape = (None, None), name = 'initial_stocks')

            initial_money_simple = tf.placeholder_with_default(10000., shape = (), name = 'initial_money_simple')
            default_initial_money = tf.concat(
                [
                    initial_money_simple * tf.ones([minibatch_size, 1]),
                    tf.zeros([minibatch_size, money_settle_time-1])
                ], 
                axis = 1,
                name = 'default_initial_money')
            initial_money = tf.placeholder_with_default(default_initial_money, shape = (None, money_settle_time), name = 'initial_money')
            
            with tf.name_scope("initial_state"):
                initial_state = dict(
                    money = initial_money,
                    stocks = initial_stocks,
                    stock_prices = stock_history[:, historic_window_size - 1, :, FEATURE_CLOSE],
                )

        with tf.name_scope("trading"):
            all_states = build_env(trainable_variables_node, stock_history, initial_state, warmup_days + evaluated_days, historic_window_size)
            initial_account_value = all_states[0].account_value

        with tf.name_scope("evaluation"):
            with tf.name_scope("account_values"):
                account_values = tf.stack([
                    all_states[i].account_value
                    for i in range(warmup_days, warmup_days + evaluated_days + 1)
                ], axis = 1, name = 'values')
            with tf.name_scope("daily_variation"):
                daily_variation = tf.div(account_values[:, 1:], account_values[:, :-1], name = "values")
            with tf.name_scope("normalized_daily_variation"):
                normalized_daily_variation = tf.multiply(100., tf.log(daily_variation), name = "values")
            
            with tf.name_scope("summaries"):
                with tf.name_scope("daily_variation"):
                    tf.summary.histogram('values', tf.clip_by_value(normalized_daily_variation, -10, +10, name='values'))
                    tf.summary.scalar('mean', tf.reduce_mean(normalized_daily_variation, name='mean'))
                with tf.name_scope("final_account_value"):
                    tf.summary.histogram('values', tf.clip_by_value(all_states[-1].account_value, 0, 2*initial_account_value, name='values'))
                    tf.summary.scalar('mean', tf.reduce_mean(all_states[-1].account_value, name='mean'))
                with tf.name_scope("final_money"):
                    tf.summary.histogram('values', tf.clip_by_value(all_states[-1].money_value, 0, 2*initial_account_value, name='values'))
                    tf.summary.scalar('mean', tf.reduce_mean(all_states[-1].money_value, name='mean'))
                with tf.name_scope("final_stocks_value"):
                    tf.summary.histogram('values', tf.clip_by_value(all_states[-1].stocks_value, 0, 2*initial_account_value, name='values'))
                    tf.summary.scalar('mean', tf.reduce_mean(all_states[-1].stocks_value, name='mean'))
                
        if check_numerics:
            tf.add_check_numerics_ops()
    return graph    

In [None]:
graph = build_graph(check_numerics=True)
tensorboard.show_graph(graph)

In [None]:
DATASET_DIR = "dataset/dataset-2017-10-11"
STOCK_DIR = f"{DATASET_DIR}/Stocks"
ETF_DIR = f"{DATASET_DIR}/ETFs"

sp500 = pd.read_csv('dataset/s&p500.tsv', sep='\t')['Ticker symbol']

def concat_datasets(datasets):
    return pd.concat(
        df.assign(Symbol = symbol)
        for symbol, df in datasets.items()
    )

def dataset_to_timeseries(df, days):
    """
    Concatenates `days` sequential values to create a larger feature array.
    """
    parallel_series = [
        df[d:len(df)+1-days+d]
        for d in reversed(range(days))
    ]
    
    cols = ['Date']
    data = [parallel_series[0].Date]
    for i in range(len(parallel_series)):
        s = parallel_series[i]
        for col in ['Open', 'Close', 'High', 'Low', 'Volume']:
            cols.append(f'{col}.{i}')
            data.append(s[col])
            
    return pd.DataFrame(list(zip(*data)), columns=cols)

def split_dataset_by_date(dataset, test_size = 0.2):
    trading_dates = sorted(set(dataset.Date))
    train_dates, test_dates = train_test_split(trading_dates, test_size=test_size)
    train_dataset = dataset[dataset.Date.isin(set(train_dates))].sample(frac=1).reset_index(drop=True)
    test_dataset = dataset[dataset.Date.isin(set(test_dates))].sample(frac=1).reset_index(drop=True)
    return (train_dataset, test_dataset)

def extract_all_data(symbols, feature_days=30, min_date='1990-01-01'):
    symbols = set(symbols)
    failed_csvs = []
    raw_data = {}
    timeseries_feature_data = {}
    for filename in os.listdir(STOCK_DIR):
        symbol = filename.split('.')[0].upper()
        if not symbol in symbols:
            continue

        raw = pd.read_csv(f"{STOCK_DIR}/{filename}")
        raw_data[symbol] = raw[raw.Date >= min_date]
        timeseries_feature_data[symbol] = dataset_to_timeseries(raw_data[symbol], feature_days)

        #if len(raw_data) > 5:
            #break
            
    if failed_csvs:
        logging.warning(f'Failed to read {len(failed_csvs)} CSV files: {failed_csvs}')
            
    all_samples = concat_datasets(timeseries_feature_data)
    train_samples, test_samples = split_dataset_by_date(all_samples)
    return train_samples, test_samples

def create_minibatch(dataset, minibatch_size, num_companies):
    minibatch = []
    for i in range(minibatch_size):
        date = random.choice(dataset.Date)
        dataset_at_date = dataset[dataset.Date == date]

        samples = dataset_at_date.sample(n = num_companies, replace = True)
        samples = samples.as_matrix(columns=samples.columns[1:-1])
        samples = samples.reshape([num_companies, -1, 5])
        samples = samples.transpose((1,0,2))
        minibatch.append(samples)
    return np.stack(minibatch)
    
def minibatch_producer(symbols, feature_days=30, min_date='2013-01-01', minibatch_size=10, num_companies=10):
    all_data = extract_all_data(symbols, feature_days=feature_days, min_date=min_date)
    def produce(train=True, minibatch_size=minibatch_size, num_companies=num_companies):
        return create_minibatch(dataset=all_data[0 if train else 1], minibatch_size=minibatch_size, num_companies=num_companies)
    return produce

companies = ['GOOG', 'AMZN', 'NFLX', 'TSLA', 'FB', 'AAPL', 'INTC', 'QCOM', 'DIS', 'NVDA'] #sp500
next_minibatch = minibatch_producer(companies)

In [None]:
with build_graph(check_numerics = True).as_default():
    checkpoint_file = os.path.abspath(f'checkpoint/trader.ckpt')
    tensorboard_dir = os.path.abspath('tensorboard_logs')
    tensorboard_run = datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S")
    tensorboard_run_dir = f'{tensorboard_dir}/{tensorboard_run}'
    os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True)
    os.makedirs(os.path.dirname(tensorboard_run_dir), exist_ok=True)
    
    input_stock_history = tf.get_default_graph().get_tensor_by_name('inputs/stock_history:0')
    input_initial_money = tf.get_default_graph().get_tensor_by_name('inputs/initial_money_simple:0')

    iteration = tf.get_default_graph().get_tensor_by_name('iteration:0')
    
    buy_coefs = tf.get_default_graph().get_tensor_by_name('trading/trainable_parameters/buy_coefs:0')
    sell_coefs = tf.get_default_graph().get_tensor_by_name('trading/trainable_parameters/sell_coefs:0')
    
    evaluation_normalized_profit = tf.get_default_graph().get_tensor_by_name('evaluation/summaries/daily_variation/values:0')
    evaluation_normalized_profit_mean = tf.get_default_graph().get_tensor_by_name('evaluation/summaries/daily_variation/mean:0')
    all_summaries = tf.summary.merge_all()                 
    
    optimizer = tf.train.MomentumOptimizer(learning_rate=0.0005, momentum=0.9, use_nesterov=True)
    training_op = optimizer.minimize(-evaluation_normalized_profit_mean, global_step = iteration)
    
    saver = tf.train.Saver()
    with tf.summary.FileWriter(tensorboard_run_dir, tf.get_default_graph()) as tensorboard_writer:    
        display(tensorboard.Server.of(tensorboard_dir).badge(run=tensorboard_run))

        with tf.Session() as session:
            tf.add_check_numerics_ops()
            tf.global_variables_initializer().run()
            try:
                #saver.restore(session, checkpoint_file)
                pass
            except:
                logging.warning(f'Failed to restore training state from {checkpoint_file}')
                raise
                
            last_checkout = time.time() - 1
            while True:
                now = time.time()
                if (now - last_checkout) >= 1:
                    last_checkout = now
                    saver.save(session, checkpoint_file)
                    
                    #print([iteration, evaluation_normalized_profit, all_summaries])
                    test_params = {
                        tf.get_default_graph().get_tensor_by_name('inputs/stock_history:0'): next_minibatch(train=False, minibatch_size=100, num_companies=10),
                        tf.get_default_graph().get_tensor_by_name('inputs/initial_money_simple:0'): 50000
                    }
                    result = session.run([iteration, evaluation_normalized_profit, all_summaries], feed_dict=test_params)
                    i = result[0]
                    normalized_evaluation_values = result[1]
                    summaries = result[2]
                                
                    print(f'Epoch [{i}] -- trader_evaluation: {normalized_evaluation_values.mean():.2f}±{normalized_evaluation_values.std():.2f} %/day')
                    tensorboard_writer.add_summary(summaries, i)

                train_params = {
                    tf.get_default_graph().get_tensor_by_name('inputs/stock_history:0'): next_minibatch(train=True, minibatch_size=1, num_companies=10),
                    tf.get_default_graph().get_tensor_by_name('inputs/initial_money_simple:0'): 50000
                }
                session.run(training_op, feed_dict=train_params)