Rough 'demo' of the v0.2 hypersearch funcationality (as of 02/12/19). Imports modified to work off a single file. Open this in colab.research.google.com or modify for a local run. For colab, go to edit-> notebook settings and set GPU for backend to test.

In [0]:
#@title installs
import os
os.chdir('/content/')
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzf ta-lib-0.4.0-src.tar.gz
os.chdir('/content/ta-lib')
! ./configure --prefix=/usr
! make
! make install
os.chdir('/content/')
!pip install python-box
!pip install ta-lib
!pip install gdax

In [0]:
#@title Unzip data
#@markdown Upload bitcoin-historical-data.zip from 
#@markdown https://www.kaggle.com/mczielinski/bitcoin-historical-data
#@markdown to the colab 'Files' directory
!unzip /content/bitcoin-historical-data.zip

In [0]:
#@title utils

import numpy as np
from enum import Enum


class ScoreMode(Enum):
    """Different ways we might consider scoring our runs. This is for BO's sake, not for our RL agent -
    ie helps us decide which hyper combos to pursue."""
    MEAN = 1  # mean of all episodes
    LAST = 2  # final episode (the one w/o killing)
    POS = 3  # max # positive tests
    CONSECUTIVE_POS = 4  # max # *consecutive* positives
    TOTAL = 5
    MIX = 6


MODE = ScoreMode.MEAN


def calculate_score(scores):
    for i, a in enumerate(scores):
        if a == 0.: scores[i] = -1.
    if MODE == ScoreMode.MEAN:
        return np.mean(scores)
    elif MODE == ScoreMode.LAST:
        return scores[-1]
    elif MODE == ScoreMode.MIX:
        return np.mean(scores[:-1]) + scores[-1]
    elif MODE == ScoreMode.POS:
        return sum(1 for x in scores if x > 0)
    elif MODE == ScoreMode.TOTAL:
        return sum(x for x in scores)
    elif MODE == ScoreMode.CONSECUTIVE_POS:
        score, curr_consec = 0, 0
        for i, adv in enumerate(scores):
            if adv > 0:
                curr_consec += 1
                continue
            if curr_consec > score:
                score = curr_consec
            curr_consec = 0
        return score


def add_common_args(parser):
    parser.add_argument('-g', '--gpu-split', type=float, default=1, help="Num ways we'll split the GPU (how many tabs you running?)")
    parser.add_argument('--autoencode', action="store_true", help="If you're running out of GPU memory, try --autoencode which scales things down")


last_good_commit = '6a6e49c'


def raise_refactor():
    raise NotImplemented(f'Restore from {last_good_commit}')

In [0]:
#@title data
data_file = "coinbaseUSD_1-min_data_2014-12-01_to_2018-11-11.csv" #@param {type:"string"}

import time, json, re, pdb
from os import path
from enum import Enum
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy import text
# from utils import raise_refactor, last_good_commit
from sklearn.preprocessing import robust_scale
import os

# From connecting source file, `import engine` and run `engine.connect()`. Need each connection to be separate
# (see https://stackoverflow.com/questions/3724900/python-ssl-problem-with-multiprocessing)
# config_json = json.load(open(os.path.dirname(__file__) + '/../config.json'))
# DB = config_json['DB_HISTORY'].split('/')[-1]
# engine_runs = create_engine(config_json['DB_RUNS'])

# Decide which exchange you want to trade on (significant even in training). Pros & cons; Kraken's API provides more
# details than GDAX (bid/ask spread, VWAP, etc) which means predicting its next price-action is easier for RL. It
# also has a lower minimum trade (.002 BTC vs GDAX's .01 BTC), which gives it more wiggle room. However, its API is
# very unstable and slow, so when you actually go live you'r bot will be suffering. GDAX's API is rock-solid. Look
# into the API stability, it may change by the time you're using this. If Kraken is solid, use it instead.
class Exchange(Enum):
    GDAX = 'gdax'
    KRAKEN = 'kraken'
EXCHANGE = Exchange.KRAKEN

# see {last_good_commit} for imputes (ffill, bfill, zero),
# alex database

def setup_runs_table():
    """Run this function once during project setup (see README). Or just copy/paste the SQL into your runs database
    """
    conn_runs = engine_runs.connect()
    conn_runs.execute("""
        create table if not exists runs
        (
            id uuid not null,
            hypers jsonb not null,
            returns double precision[],
            signals double precision[],
            prices double precision[],
            uniques double precision[]
        );
    """)

class Data(object):
    def __init__(self, ep_len=5000, window=300, arbitrage=False, indicators={}):
        self.ep_len = ep_len
        self.window = window
        self.arbitrage = arbitrage
        self.indicators = indicators

        self.ep_stride = ep_len  # disjoint
        # self.ep_stride = 100  # overlap; shift each episode by x seconds.
        # TODO overlapping stride would cause test/train overlap. Tweak it so train can overlap data, but test gets silo'd

        col_renames = {
            'Timestamp': 'timestamp',
            'Open': 'open',
            'High': 'high',
            'Low': 'low',
            'Close': 'close',
            'Volume_(BTC)': 'volume_btc',
            'Volume_(Currency)': 'volume',
            'Weighted_Price': 'vwap'
        }

        filenames = {
            # 'bitstamp': 'bitstampUSD_1-min_data_2012-01-01_to_2018-06-27.csv',
            'coinbase': data_file,
            # 'coincheck': 'coincheckJPY_1-min_data_2014-10-31_to_2018-06-27.csv'
        }
        primary_table = 'coinbase'
        self.target = f"{primary_table}_close"

        df = None
        for table, filename in filenames.items():
#             df_ = pd.read_csv(path.join(path.dirname('/content/'), 'bitcoin-historical-data', filename))
            df_ = pd.read_csv('/content/'+filename)
            col_renames_ = {k: f"{table}_{v}" for k, v in col_renames.items()}
            df_ = df_.rename(columns=col_renames_)
            ts = f"{table}_timestamp"
            df_[ts] = pd.to_datetime(df_[ts], unit='s')
            df_ = df_.set_index(ts)
            df = df_ if df is None else df.join(df_)

        # too quiet before 2015, time waste. copy() to avoid pandas errors
        df = df.loc['2015':].copy()

        df['month'] = df.index.month
        df['day'] = df.index.day
        df['hour'] = df.index.hour

        # TODO drop null rows? (inner join?)
        # TODO arbitrage
        # TODO indicators

        diff_cols = [
            f"{table}_{k}" for k in
            'open high low close volume_btc volume vwap'.split(' ')
            for table in filenames.keys()
        ]
        df[diff_cols] = df[diff_cols].pct_change()\
            .replace([np.inf, -np.inf], np.nan)\
            .ffill()  # .bfill()?
        df = df.iloc[1:]
        target = df[self.target]  # don't scale price changes; we use that in raw form later
        df = pd.DataFrame(
            robust_scale(df.values, quantile_range=(.1, 100-.1)),
            columns=df.columns, index=df.index
        )
        df[self.target] = target

        df['cash'], df['value'] = 0., 0.

        self.df = df

    def offset(self, ep, step):
        return ep * self.ep_stride + step

    def has_more(self, ep):
        return self.offset(ep + 2, 0) + self.window < self.df.shape[0]
        # return (ep + 1) * self.ep_stride + self.window < self.df.shape[0]

    def get_data(self, ep, step):
        offset = self.offset(ep, step)
        X = self.df.iloc[offset:offset+self.window]
        y = self.df.iloc[offset+self.window]
        return X, y

    def get_prices(self, ep, step):
        offset = self.offset(ep, step)
        return self.df.iloc[offset + self.window:self.ep_len][self.target]

    def reset_cash_val(self):
        self.df['cash'] = 0.
        self.df['value'] = 0.

    def set_cash_val(self, ep, step, cash, value):
        offset = self.offset(ep, step)
        self.df.cash.iloc[offset] = cash
        self.df.value.iloc[offset] = value

    def fetch_more(self):
        raise_refactor()


In [0]:
#@title btc_env

"""BTC trading environment. Trains on BTC price history to learn to buy/sell/hold.

This is an environment tailored towards TensorForce, not OpenAI Gym. Gym environments are
a standard used by many projects (Baselines, Coach, etc) and so would make sense to use; and TForce is compatible with
Gym envs. It's just that there's hoops to go through converting a Gym env to TForce, and it was ugly code. I actually
had it that way, you can search through Git if you want the Gym env; but one day I decided "I'm not having success with
any of these other projects, TForce is the best - I'm just gonna stick to that" and this approach was cleaner.

I actually do want to try NervanaSystems/Coach, that one's new since I started developing. Will require converting this
env back to Gym format. Anyone wanna give it a go?
"""

import random, time, requests, pdb, gdax, math, pickle, os, shutil, copy
from sklearn.model_selection import TimeSeriesSplit
from enum import Enum
import numpy as np
import pandas as pd
import talib.abstract as tlib
from box import Box
from tensorforce.environments import Environment
from tensorforce.execution import Runner
from sklearn import preprocessing
# from data.data import Data, Exchange, EXCHANGE
# from utils import raise_refactor


class Mode(Enum):
    TRAIN = 'train'
    TEST = 'test'
    LIVE = 'live'
    TEST_LIVE = 'test_live'

# See 6fc4ed2 for Scaling states/rewards


class BitcoinEnv(Environment):
    EPISODE_LEN = 1000

    def __init__(self, hypers, cli_args={}):
        """Initialize hyperparameters (done here instead of __init__ since OpenAI-Gym controls instantiation)"""
        self.hypers = h = Box(hypers)
        self.cli_args = cli_args

        # cash/val start @ about $3.5k each. You should increase/decrease depending on how much you'll put into your
        # exchange accounts to trade with. Presumably the agent will learn to work with what you've got (cash/value
        # are state inputs); but starting capital does effect the learning process.
        self.start_cash, self.start_value = 5., 5.  # .4, .4

        # We have these "accumulator" objects, which collect values over steps, over episodes, etc. Easier to keep
        # same-named variables separate this way.
        acc = dict(
            ep=dict(
                i=-1,  # +1 in reset, makes 0
                returns=[],
                uniques=[],
            ),
            step=dict(),  # setup in reset()
        )
        self.acc = Box(train=copy.deepcopy(acc), test=copy.deepcopy(acc))
        self.data = Data(ep_len=self.EPISODE_LEN, arbitrage=h.custom.arbitrage, indicators={})

        # gdax min order size = .01btc; kraken = .002btc
        self.min_trade = {Exchange.GDAX: .01, Exchange.KRAKEN: .002}[EXCHANGE]
        self.update_btc_price()

        # Action space
        # see {last_good_commit_ for action_types other than 'single_discrete'
        # In single_discrete, we allow buy2%, sell2%, hold (and nothing else)
        self.actions_ = dict(type='int', shape=(), num_actions=3)

        # Observation space
        # width = step-window (150 time-steps)
        # height = nothing (1)
        # channels = features/inputs (price actions, OHCLV, etc).
        self.cols_ = self.data.df.shape[1]
        shape = (h.custom.net.step_window, 1, self.cols_)
        self.states_ = dict(type='float', shape=shape)

    def __str__(self): return 'BitcoinEnv'

    def close(self): pass

    @property
    def states(self): return self.states_

    @property
    def actions(self): return self.actions_

    # We don't want random-seeding for reproducibilityy! We _want_ two runs to give different results, because we only
    # trust the hyper combo which consistently gives positive results.
    def seed(self, seed=None): return

    def update_btc_price(self):
        self.btc_price = 8000
        # try:
        #     self.btc_price = int(requests.get(f"https://api.cryptowat.ch/markets/{EXCHANGE.value}/btcusd/price").json()['result']['price'])
        # except:
        #     self.btc_price = self.btc_price or 8000

    def xform_data(self, df):
        # TODO here was autoencoder, talib indicators, price-anchoring
        raise_refactor()

    def get_next_state(self):
        acc = self.acc[self.mode.value]
        X, _ = self.data.get_data(acc.ep.i, acc.step.i)
        return X.values[:, np.newaxis, :]  # height, width(nothing), depth

    def reset(self):
        acc = self.acc[self.mode.value]
        acc.step.i = 0
        acc.step.cash, acc.step.value = self.start_cash, self.start_value
        acc.step.hold_value = self.start_value
        acc.step.totals = Box(
            trade=[self.start_cash + self.start_value],
            hold=[self.start_cash + self.start_value]
        )
        acc.step.signals = []
        if self.mode == Mode.TEST:
            acc.ep.i = self.acc.train.ep.i + 1
        elif self.mode == Mode.TRAIN:
            acc.ep.i += 1

        self.data.reset_cash_val()
        self.data.set_cash_val(acc.ep.i, acc.step.i, 0., 0.)
        return self.get_next_state()

    def execute(self, action):
        acc = self.acc[self.mode.value]
        totals = acc.step.totals
        h = self.hypers

        act_pct = {
            0: -.02,
            1: 0,
            2: .02
        }[action]
        act_btc = act_pct * (acc.step.cash if act_pct > 0 else acc.step.value)

        fee = {
            Exchange.GDAX: 0.0025,  # https://support.gdax.com/customer/en/portal/articles/2425097-what-are-the-fees-on-gdax-
            Exchange.KRAKEN: 0.0026  # https://www.kraken.com/en-us/help/fees
        }[EXCHANGE]

        # Perform the trade. In training mode, we'll let it dip into negative here, but then kill and punish below.
        # In testing/live, we'll just block the trade if they can't afford it
        if act_pct > 0:
            if acc.step.cash < self.min_trade:
                act_btc = -(self.start_cash + self.start_value)
            elif act_btc < self.min_trade:
                act_btc = 0
            else:
                acc.step.value += act_btc - act_btc*fee
            acc.step.cash -= act_btc

        elif act_pct < 0:
            if acc.step.value < self.min_trade:
                act_btc = -(self.start_cash + self.start_value)
            elif abs(act_btc) < self.min_trade:
                act_btc = 0
            else:
                acc.step.cash += abs(act_btc) - abs(act_btc)*fee
            acc.step.value -= abs(act_btc)

        acc.step.signals.append(float(act_btc))  # clipped signal
        # acc.step.signals.append(np.sign(act_pct))  # indicates an attempted trade

        # next delta. [1,2,2].pct_change() == [NaN, 1, 0]
        # pct_change = self.prices_diff[acc.step.i + 1]
        _, y = self.data.get_data(acc.ep.i, acc.step.i)  # TODO verify
        pct_change = y[self.data.target]

        acc.step.value += pct_change * acc.step.value
        total_now = acc.step.value + acc.step.cash
        totals.trade.append(total_now)

        # calculate what the reward would be "if I held", to calculate the actual reward's _advantage_ over holding
        hold_before = acc.step.hold_value
        acc.step.hold_value += pct_change * hold_before
        totals.hold.append(acc.step.hold_value + self.start_cash)

        reward = 0

        acc.step.i += 1

        self.data.set_cash_val(
            acc.ep.i, acc.step.i,
            acc.step.cash/self.start_cash,
            acc.step.value/self.start_value
        )
        next_state = self.get_next_state()

        terminal = int(acc.step.i + 1 >= self.EPISODE_LEN)
        if acc.step.value < 0 or acc.step.cash < 0:
            terminal = True
        if terminal and self.mode in (Mode.TRAIN, Mode.TEST):
            # We're done.
            acc.step.signals.append(0)  # Add one last signal (to match length)
            reward = self.get_return()
            if np.unique(acc.step.signals).shape[0] == 1:
                reward = -(self.start_cash + self.start_value)  # slam if you don't do anything

        if terminal and self.mode in (Mode.LIVE, Mode.TEST_LIVE):
            raise_refactor()

        # if acc.step.value <= 0 or acc.step.cash <= 0: terminal = 1
        return next_state, terminal, reward

    def get_return(self, adv=True):
        acc = self.acc[self.mode.value]
        totals = acc.step.totals
        trade = (totals.trade[-1] / totals.trade[0] - 1)
        hold = (totals.hold[-1] / totals.hold[0] - 1)
        return trade - hold if adv else trade

    def episode_finished(self, runner):
        if self.mode == Mode.TRAIN: return True

        acc = self.acc.test
        totals = acc.step.totals
        signals = np.array(acc.step.signals)
        n_uniques = np.unique(signals).shape[0]
        ret = self.get_return()
        hold_ret = totals.hold[-1] / totals.hold[0] - 1

        acc.ep.returns.append(float(ret))
        acc.ep.uniques.append(n_uniques)

        # Print (limit to note-worthy)
        lt_0 = (signals < 0).sum()
        eq_0 = (signals == 0).sum()
        gt_0 = (signals > 0).sum()
        completion = int(acc.ep.i * self.data.ep_stride / self.data.df.shape[0] * 100)
        steps = f"\tSteps: {acc.step.i}"

        fm = '%.3f'
        print(f"{completion}%{steps}\tTrade: {fm%ret}\tHold: {fm%hold_ret}\tTrades:\t{lt_0}[<0]\t{eq_0}[=0]\t{gt_0}[>0]")
        return True

    def run_deterministic(self, runner, print_results=True):
        next_state, terminal = self.reset(), False
        while not terminal:
            next_state, terminal, reward = self.execute(runner.agent.act(next_state, deterministic=True, independent=True))
        if print_results: self.episode_finished(None)

    def train_and_test(self, agent):
        runner = Runner(agent=agent, environment=self)
        train_steps = 20000  # TODO something self.data.df.shape[0]... self.EPISODE_LEN...

        try:
            while self.data.has_more(self.acc.train.ep.i):
                self.mode = Mode.TRAIN
                # max_episode_timesteps not required, since we kill on (cash|value)<0 or max_repeats
                runner.run(timesteps=train_steps)
                self.mode = Mode.TEST
                self.run_deterministic(runner, print_results=True)
        except IndexError:
            # FIXME data.has_more() issues
            pass
        except KeyboardInterrupt:
            # Lets us kill training with Ctrl-C and skip straight to the final test. This is useful in case you're
            # keeping an eye on terminal and see "there! right there, stop you found it!" (where early_stop & n_steps
            # are the more methodical approaches)
            print('Keyboard interupt, killing training')
            pass

    def run_live(self, agent, test=True):
        raise_refactor()


In [0]:
#@title hypersearch

import argparse, json, math, time, pdb, os, copy, uuid
from pprint import pprint
from box import Box
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from sqlalchemy.sql import text
from tensorforce import TensorForceError
from tensorforce.agents import agents as agents_dict
from tensorforce.core.networks import layer as TForceLayers
from tensorforce.core.networks.network import LayeredNetwork

from sqlalchemy.dialects import postgresql as psql
from hyperopt import fmin, tpe, hp, Trials
from hyperopt.pyll.base import scope

# from btc_env import BitcoinEnv
# import utils
# from data import data


def network_spec(hypers):
    """Builds an array of dicts that conform to TForce's network specification (see their docs) by mix-and-matching
    different network hypers
    """
    net = Box(hypers['net'])
    batch_norm = {"type": "tf_layer", "layer": "batch_normalization"}
    arr = []

    def add_dense(s):
        dense = {
            'size': s,
            'l2_regularization': net.l2,
            'l1_regularization': net.l1
        }
        if not net.batch_norm:
            arr.append({'type': 'dense', 'activation': net.activation, **dense})
            return
        arr.append({'type': 'linear', **dense})
        arr.append(batch_norm)
        arr.append({'type': 'nonlinearity','name': net.activation})
        # FIXME dense dropout bug https://github.com/reinforceio/tensorforce/issues/317
        if net.dropout: arr.append({'type': 'dropout', 'rate': net.dropout})

    # Mid-layer
    for i in range(net.depth_mid):
        arr.append({
            'size': net.width,
            'window': (net.kernel_size, 1),
            'stride': (net.stride, 1),
            'type': 'conv2d',
            # 'bias': net.bias,
            'l2_regularization': net.l2,
            'l1_regularization': net.l1
        })
    arr.append({'type': 'flatten'})

    # Post Dense layers
    if net.flat_dim:
        fc_dim = net.width * (net.step_window / (net.depth_mid * net.stride))
    else:
        fc_dim = net.width * 4
    for i in range(net.depth_post):
        size = fc_dim / (i + 1) if net.funnel else fc_dim
        add_dense(int(size))

    return arr


# @scope.define
# def two_to_the(x):
#     return 2**int(x)

# @scope.define
# def ten_to_the_neg(x):
#     return 10**-int(x)

# @scope.define
# def min_threshold(x, thresh, fallback):
#     """Returns x or `fallback` if it doesn't meet the threshold. Note, if you want to turn a hyper "off" below,
#     set it to "outside the threshold", rather than 0.
#     """
#     return x if (x and x > thresh) else fallback

# @scope.define
# def min_ten_neg(x, thresh, fallback):
#     """Returns 10**-x, or `fallback` if it doesn't meet the threshold. Note, if you want to turn a hyper "off" below,
#     set it to "outside the threshold", rather than 0.
#     """
#     x = 10**-x
#     return x if (x and x > thresh) else fallback

def post_process(hypers):
    hypers = copy.deepcopy(hypers)  # don't modify original
    agent, custom = hypers['ppo_agent'], hypers['custom']

    o = agent['update_mode']
    o['frequency'] = math.ceil(o['batch_size'] / o['frequency'])
    # agent['memory']['capacity'] = BitcoinEnv.EPISODE_LEN * o['batch_size']
    agent['memory']['capacity'] = BitcoinEnv.EPISODE_LEN * MAX_BATCH_SIZE + 1

    agent.update(agent['baseline_stuff'])
    del agent['baseline_stuff']
    if agent['baseline_mode']:
        o = agent['baseline_optimizer']
        # o['num_steps'] = agent['optimization_steps']
        o['optimizer']['learning_rate'] = agent['step_optimizer']['learning_rate']
        o['optimizer']['type'] = agent['step_optimizer']['type']

        agent['baseline']['network'] = network_spec(custom)
        # if main['gae_lambda']: main['gae_lambda'] = main['discount']
    return hypers


# Most hypers come directly from tensorforce/tensorforce/agents/ppo_agent.py, see that for documentation
# Note: Name this something other than "hypers" (eg "space"), easy conflicts with other methods
space = {}
space['agent'] = {
    # 'states_preprocessing': None,
    # 'actions_exploration': None,
    # 'reward_preprocessing': None,

    # I'm pretty sure we don't want to experiment any less than .99 for non-terminal reward-types (which are 1.0).
    # .99^500 ~= .6%, so looses value sooner than makes sense for our trading horizon. A trade now could effect
    # something 2-5k steps later. So .999 is more like it (5k steps ~= .6%)
    'discount': 1.,  # hp.uniform('discount', .9, .99),
}

MAX_BATCH_SIZE = 15
space['memory_model'] = {
    'update_mode': {
        'unit': 'episodes',
        'batch_size': scope.int(hp.quniform('batch_size', 1, MAX_BATCH_SIZE, 1)),  # 5 FIXME
        'frequency': scope.int(hp.quniform('frequency', 1, 3, 1)),  # t-shirt sizes, reverse order
    },

    'memory': {
        'type': 'latest',
        'include_next_states': False,
        'capacity': None,  # 5000  # BitcoinEnv.EPISODE_LEN * MAX_BATCH_SIZE,  # hp.uniform('capacity', 2000, 20000, 500)
    }
}

space['distribution_model'] = {
    # 'distributions': None,
    'entropy_regularization': hp.choice('entropy_regularization', [None, .01]), # scope.min_ten_neg(hp.uniform('entropy_regularization', 0., 5.), 1e-4, .01),
    # 'variable_noise': TODO
}

space['pg_model'] = {
    'baseline_stuff': hp.choice('baseline_stuff', [
        {'baseline_mode': None},
        {
            'baseline': {'type': 'custom'},
            'baseline_mode': 'states',
            'baseline_optimizer': {
                'type': 'multi_step',
                # Consider having baseline_optimizer learning hypers independent of the main learning hypers.
                # At least with PPO, it seems the step_optimizer learning hypers function quite diff0erently than
                # expected; where baseline_optimizer's function more as-expected. TODO Investigate.
                'num_steps': scope.int(hp.quniform('num_steps', 1, 20, 1)),  # 5 FIXME
                'optimizer': {}  # see post_process()
            },
            'gae_lambda': hp.choice('gae_lambda', [1., None]),
            # scope.min_threshold(hp.uniform('gae_lambda', .8, 1.), .9, None)
        }
    ])
}
space['pg_prob_ration_model'] = {
    'likelihood_ratio_clipping': .2,  # scope.min_threshold(hp.uniform('likelihood_ratio_clipping', 0., 1.), .05, None),
}

space['ppo_model'] = {
    # Doesn't seem to matter; consider removing
    'step_optimizer': {
        'type': 'adam',  # hp.choice('type', ['nadam', 'adam']),
        'learning_rate': scope.ten_to_the_neg(hp.uniform('learning_rate', 2., 5.)),
    },

    'optimization_steps': scope.int(hp.quniform('optimization_steps', 1, 50, 1)),  # 5 FIXME

    'subsampling_fraction': .1,  # hp.uniform('subsampling_fraction', 0.,  1.),
}

ppo_agent = {
    **space['agent'],
    **space['memory_model'],
    **space['distribution_model'],
    **space['pg_model'],
    **space['pg_prob_ration_model'],
    **space['ppo_model']
}

space = {
    'ppo_agent': ppo_agent, # 'vpg_agent': ppo_agent, 'trpo_agent': ppo_agent,
    # TODO dqn, ddpg (hierarchical hyperopt)
}

space['custom'] = {
    'agent': 'ppo_agent',

    # Use a handful of TA-Lib technical indicators (SMA, EMA, RSI, etc). Which indicators used and for what time-frame
    # not optimally chosen at all; just figured "if some randos are better than nothing, there's something there and
    # I'll revisit". Help wanted.

    # Currently disabling indicators in general. A good CNN should "see" those automatically in the window, right?
    # If I'm wrong, experiment with these (see commit 6fc4ed2)
    # TODO indicators overhaul
    'indicators_count': 0,
    'indicators_window': 0,

    # This is special. "Risk arbitrage" is the idea of watching two exchanges for the same
    # instrument's price. Let's say BTC is $10k in GDAX and $9k in Kraken. Well, Kraken is a smaller / less popular
    # exchange, so it tends to play "follow the leader". Ie, Kraken will likely try to get to $10k
    # to match GDAX (oversimplifying obviously). This is called "risk arbitrage" ("arbitrage"
    # by itself is slightly different, not useful for us). Presumably that's golden info for the neural net:
    # "Kraken < GDAX? Buy in Kraken!". It's not a gaurantee, so this is a hyper in hypersearch.py.
    # Incidentally I have found it detrimental, I think due to imperfect time-phase alignment (arbitrage code in
    # data.py) which makes it hard for the net to follow.
    # Note: not valuable if GDAX is main (ie, not valuable if the bigger exchange is the main, only
    # if the smaller exchange (eg Kraken) is main)
    'arbitrage': False,  # see 6fc4ed2

    # single = one action (-$x to +$x). multi = two actions: (buy|sell|hold) and (how much?). all_or_none = buy/sell
    # w/ all the cash or value owned
    'action_type': 'single_discrete',  # hp.choice('action_type', ['single_discrete', 'single_continuous', 'multi']),

    # Should rewards be as-is (PNL), or "how much better than holding" (advantage)? if `sharpe` then we discount 1.0
    # and calculate sharpe score at episode-terminal.
    # See 6fc4ed2 for handling Sharpe rewards
    'reward_type': 'sharpe',  # hp.choice('reward_type', ['raw', 'advantage', 'sharpe']),

}
space['custom']['net'] = {
    # Conv / LSTM layers
    'depth_mid': scope.int(hp.quniform('depth_mid', 1, 4, 1)),

    # Dense layers
    'depth_post': scope.int(hp.quniform('depth_post', 1, 3, 1)),

    # Network depth, in broad-strokes of 2**x (2, 4, 8, 16, 32, 64, 128, 256, 512, ..) just so you get a feel for
    # small-vs-large. Later you'll want to fine-tune.
    'width': scope.two_to_the(hp.quniform('width', 4, 6, 1)),

    'batch_norm': hp.choice('batch_norm', [True, False]),

    # Whether to expand-in and shrink-out the nueral network. You know the look, narrower near the inputs, gets wider
    # in the hidden layers, narrower again on hte outputs.
    'funnel': True,  # hp.choice('funnel', [True, False]),

    # Is the first FC layer the same size as the last flattened-conv? Or is it something much smaller,
    # like depth_mid*4?
    'flat_dim': hp.choice('funnel', [True, False]),

    # tanh vs "the relu family" (relu, selu, crelu, elu, *lu). Broad-strokes here by just pitting tanh v relu; then,
    # if relu wins you can fine-tune "which type of relu" later.
    'activation': hp.choice('activation', ['tanh', 'relu']),

    # Regularization: Dropout, L1, L2. You'd be surprised (or not) how important is the proper combo of these. The RL
    # papers just role L2 (.001) and ignore the other two; but that hasn't jived for me. Below is the best combo I've
    # gotten so far, and I'll update as I go.
    # 'dropout': scope.min_threshold(hp.uniform('dropout', 0., .5), .1, None),
    # 'l2': scope.min_ten_neg(hp.uniform('l2', 0., 7.), 1e-6, 0.),
    # 'l1': scope.min_ten_neg(hp.uniform('l1', 0., 7.), 1e-6, 0.),
    'dropout': None,
    'l2': 0.,
    'l1': 0.,

    # LSTM at {last_good_commit}

    # T-shirt size window-sizes, smaller # = more destructive. See comments in build_net_spec()
    'kernel_size': hp.choice('window', [3, 5]),

    # How many ways to divide a window? 1=no-overlap, 2=half-overlap (smaller # = more destructive). See comments
    # in build_net_spec()
    'stride': 2,

    # Size of the window to look at w/ the CNN (ie, width of the image). Would like to have more than 400 "pixels" here,
    # but it causes memory issues the way PPO's MemoryModel batches things. This is made up for via indicators
    'step_window': 300,  # scope.int(hp.quniform('step_window', 200, 500, 50)),
}

# TODO restore get_winner() from git & fix-up

def main():
#     parser = argparse.ArgumentParser()
#     add_common_args(parser)
#     args = parser.parse_args()
    args = []

    # Specify the "loss" function (which we'll maximize) as a single rl_hsearch instantiate-and-run
    def loss_fn(hypers):
        processed = post_process(hypers)
        network = network_spec(processed['custom'])

        agent = processed['ppo_agent']
        ## GPU split
        gpu_split = 1
        if gpu_split != 1:
            fraction = .9 / gpu_split if gpu_split > 1 else gpu_split
            session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=fraction))
            agent['execution'] = {'type': 'single', 'session_config': session_config, 'distributed_spec': None}

        pprint(processed)
        pprint(network)
  
        env = BitcoinEnv(processed, args)
        agent = agents_dict['ppo_agent'](
            states=env.states,
            actions=env.actions,
            network=network,
            **agent
        )

        env.train_and_test(agent)

        acc = env.acc.test
        adv_avg = calculate_score(acc.ep.returns)
        print(hypers, f"\nScore={adv_avg}\n\n")

        df = pd.DataFrame([dict(
            id=uuid.uuid4(),
            hypers=json.dumps(hypers),
            returns=list(acc.ep.returns),
            uniques=list(acc.ep.uniques),
            prices=list(env.data.get_prices(acc.ep.i, 0)),
            signals=list(acc.step.signals),
        )]).set_index('id')
        dtype = {
            'hypers': psql.JSONB,
            **{k: psql.ARRAY(psql.DOUBLE_PRECISION) for k in ['returns', 'signals', 'prices', 'uniques']},
        }
        with data.engine_runs.connect() as conn:
            df.to_sql('runs', conn, if_exists='append', index_label='id', dtype=dtype)

        # TODO restore save_model() from git

        agent.close()
        env.close()
        return -adv_avg  # maximize

        # TODO restore fetching between runs so can pick up where left off, or
        # get updates from other servers

    # set initial max_eval, attempt to load a saved trials object from pickle, if that fails start fresh.
    # grab how many trials were previously run and add max_evals to it for the next run.
    # this allows the hyper parameter search to resume where it left off last.
    # TODO save trials to SQL table and restore from there instead of local pickle. 
    max_evals = 20
    try:
        trialPickle = open('./trial.pickle','rb')
        trials = pickle.load(trialPickle)
        max_evals = len(trials.trials) + max_evals
    except:
        trials = Trials()

    best = fmin(loss_fn, space=space, algo=tpe.suggest, max_evals=max_evals, trials=trials)

    with open('./trial.pickle', 'wb') as f:
            pickle.dump(trials, f)


if __name__ == '__main__':
    main()
