From e0345c8f1672e74c818b5c965f4416f6c303982e Mon Sep 17 00:00:00 2001 From: Tyler Renelle Date: Fri, 9 Feb 2018 12:21:41 -0800 Subject: [PATCH] Don't early-terminate episodes, then we can fit batch_size & remove autoencoder. Switch from abs advantage to Sharpe & cummulative return. Conv step_window back-indexing --- README.md | 1 + btc_env.py | 165 +++++++++++++++++++-------------------- data/data.py | 6 +- hypersearch.py | 37 +++++---- visualize/client/App.jsx | 11 ++- visualize/server.py | 8 +- 6 files changed, 111 insertions(+), 117 deletions(-) diff --git a/README.md b/README.md index 92ce9e3..87a33b2 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ This project is a [TensorForce](https://github.com/reinforceio/tensorforce)-base - [Sutton & Barto](http://amzn.to/2EWvnVf): de-facto textbook on RL basics - [CS 294](http://rll.berkeley.edu/deeprlcourse/): the modern deep-learning spin on ^. +- [Machine Learning for Trading](https://www.udacity.com/course/machine-learning-for-trading--ud501): teaches you algo-trading, stock stuff, and applied RL. This project goes with Episode 26+ of [Machine Learning Guide](http://ocdevel.com/podcasts/machine-learning). Those episodes are tutorial for this project; including an intro to Deep RL, hyperparameter decisions, etc. diff --git a/btc_env.py b/btc_env.py index e9cd7ea..2ccbfea 100755 --- a/btc_env.py +++ b/btc_env.py @@ -10,7 +10,7 @@ env back to Gym format. Anyone wanna give it a go? """ -import random, time, requests, pdb, gdax +import random, time, requests, pdb, gdax, math from enum import Enum import numpy as np import pandas as pd @@ -59,12 +59,10 @@ class Scaler(object): def __init__(self): self.scalers = { self.REWARD: RobustScaler(quantile_range=(5., 95.)), - self.SERIES: RobustScaler(quantile_range=(5., 95.)), self.STATIONARY: RobustScaler(quantile_range=(5., 95.)) } self.data = { self.REWARD: [], - self.SERIES: [], self.STATIONARY: [] } self.done = False @@ -103,12 +101,10 @@ def transform(self, input, kind, force=False): # keep this globally around for all runs forever scalers = {} -# We don't want random-seeding for reproducibilityy! We _want_ two runs to give different results, because we only -# trust the hyper combo which consistently gives positive results! -ALLOW_SEED = False - class BitcoinEnv(Environment): + EPISODE_LEN = 5000 + def __init__(self, hypers, name='ppo_agent'): """Initialize hyperparameters (done here instead of __init__ since OpenAI-Gym controls instantiation)""" self.hypers = Box(hypers) @@ -118,7 +114,7 @@ def __init__(self, hypers, name='ppo_agent'): # cash/val start @ about $3.5k each. You should increase/decrease depending on how much you'll put into your # exchange accounts to trade with. Presumably the agent will learn to work with what you've got (cash/value # are state inputs); but starting capital does effect the learning process. - self.start_cash, self.start_value = .3, .3 + self.start_cash, self.start_value = 1., 1. # We have these "accumulator" objects, which collect values over steps, over episodes, etc. Easier to keep # same-named variables separate this way. @@ -126,10 +122,11 @@ def __init__(self, hypers, name='ppo_agent'): episode=dict( i=0, total_steps=0, - advantages=[], + sharpes=[], + returns=[], uniques=[] ), - step=dict(i=0), # setup in reset() + step=dict(), # setup in reset() tests=dict( i=0, n_tests=0 @@ -150,8 +147,8 @@ def __init__(self, hypers, name='ppo_agent'): # Our data is too high-dimensional for the way MemoryModel handles batched episodes. Reduce it (don't like this) all_data = data.db_to_dataframe(self.conn, arbitrage=self.hypers.arbitrage) - self.all_observations, self.all_prices = self._xform_data(all_data) - self.all_prices_diff = self._diff(self.all_prices, percent=True) + self.all_observations, self.all_prices = self.xform_data(all_data) + self.all_prices_diff = self.diff(self.all_prices, percent=True) # Calculate a possible reward to be used as an average for repeat-punishing self.possible_reward = self.start_value * np.median([p for p in self.all_prices_diff if p > 0]) @@ -193,13 +190,9 @@ def states(self): return self.states_ @property def actions(self): return self.actions_ - def seed(self, seed=None): - if not ALLOW_SEED: return - # self.np_random, seed = seeding.np_random(seed) - # return [seed] - random.seed(seed) - np.random.seed(seed) - tf.set_random_seed(seed) + # We don't want random-seeding for reproducibilityy! We _want_ two runs to give different results, because we only + # trust the hyper combo which consistently gives positive results. + def seed(self, seed=None): return def update_btc_price(self): try: @@ -207,7 +200,7 @@ def update_btc_price(self): except: self.btc_price = self.btc_price or 8000 - def _diff(self, arr, percent=False): + def diff(self, arr, percent=False): series = pd.Series(arr) diff = series.pct_change() if percent else series.diff() diff.iloc[0] = 0 # always NaN, nothing to compare to @@ -219,14 +212,14 @@ def _diff(self, arr, percent=False): # then forward-fill the NaNs. return diff.replace([np.inf, -np.inf], np.nan).ffill().bfill().values - def _xform_data(self, df): + def xform_data(self, df): columns = [] use_indicators = self.hypers.indicators and self.hypers.indicators > 100 tables_ = data.get_tables(self.hypers.arbitrage) percent = self.hypers.pct_change for table in tables_: name, cols, ohlcv = table['name'], table['cols'], table.get('ohlcv', {}) - columns += [self._diff(df[f'{name}_{k}'], percent) for k in cols] + columns += [self.diff(df[f'{name}_{k}'], percent) for k in cols] # Add extra indicator columns if ohlcv and use_indicators: @@ -236,10 +229,10 @@ def _xform_data(self, df): ind[k] = df[f"{name}_{v}"] columns += [ # TODO this is my naive approach, I'm not a TA expert. Could use a second pair of eyes - self._diff(SMA(ind, timeperiod=self.hypers.indicators), percent), - self._diff(EMA(ind, timeperiod=self.hypers.indicators), percent), - self._diff(RSI(ind, timeperiod=self.hypers.indicators), percent), - self._diff(ATR(ind, timeperiod=self.hypers.indicators), percent), + self.diff(SMA(ind, timeperiod=self.hypers.indicators), percent), + self.diff(EMA(ind, timeperiod=self.hypers.indicators), percent), + self.diff(RSI(ind, timeperiod=self.hypers.indicators), percent), + self.diff(ATR(ind, timeperiod=self.hypers.indicators), percent), ] states = np.column_stack(columns) @@ -258,18 +251,17 @@ def _xform_data(self, df): # Currently we're reducing the dimensionality of our states (OHLCV + indicators + arbitrage => 5 or 6 weights) # because TensorForce's memory branch changed Policy Gradient models' batching from timesteps to episodes. # This takes of way too much GPU RAM for us, so we had to cut back in quite a few areas (num steps to train - # per episode, episode batch_size, and especially this:) - ae = AutoEncoder() - states = ae.fit_transform_tied(states) + # per episode, episode batch_size, and especially this: + # ae = AutoEncoder() + # states = ae.fit_transform_tied(states) return states, prices - def use_dataset(self, mode, no_kill=False): + def use_dataset(self, mode, full_set=False): """Fetches, transforms, and stores the portion of data you'll be working with (ie, 80% train data, 20% test data, or the live database). Make sure to call this before reset()! """ self.mode = mode - self.no_kill = no_kill if mode in (Mode.LIVE, Mode.TEST_LIVE): self.conn = data.engine_live.connect() # Work with 6000 timesteps up until the present (play w/ diff numbers, depends on LSTM) @@ -285,25 +277,24 @@ def use_dataset(self, mode, no_kill=False): split = .9 # Using 90% training data. n_train, n_test = int(row_ct * split), int(row_ct * (1 - split)) if mode == mode.TEST: - limit, offset = n_test, n_train - if no_kill is False: - limit = 50000 # he's not likely to get past that, so save some RAM (=time) + offset = n_train + limit = 30000 if full_set else 8000 # should be `n_test` in full_set, getting idx errors else: # Grab a random window from the 90% training data. The random bit is important so the agent # sees a variety of data. The window-size bit is a hack: as long as the agent doesn't die (doesn't cause # `terminal=True`), PPO's MemoryModel can keep filling up until it crashes TensorFlow. This ensures # there's a stopping point (limit). I'd rather see how far he can get w/o dying, figure out a solution. - limit = 6000 - offset = random.randint(0, n_train - limit) + limit = self.EPISODE_LEN + offset_start = 0 if not self.conv2d else self.hypers.step_window + 1 + offset = random.randint(offset_start, n_train - self.EPISODE_LEN) - # self.observations, self.prices = self._xform_data(df) - # self.prices_diff = self._diff(self.prices, percent=True) - self.observations = self.all_observations[offset:offset+limit] + self.offset, self.limit = offset, limit self.prices = self.all_prices[offset:offset+limit] self.prices_diff = self.all_prices_diff[offset:offset+limit] - def _get_next_state(self, i, cash, value, repeats): - series = self.observations[i] + def get_next_state(self, i, cash, value, repeats): + i = i + self.offset + series = self.all_observations[i] stationary = [cash, value, repeats] if self.hypers.scale: # series already scaled in self._xform_data() @@ -315,27 +306,24 @@ def _get_next_state(self, i, cash, value, repeats): # Take note of the +1 here. LSTM uses a single index [i], which grabs the list's end. Conv uses a window, # [-something:i], which _excludes_ the list's end (due to Python indexing). Without this +1, conv would # have a 1-step-behind delayed response. - window = self.observations[i - self.hypers.step_window + 1:i + 1] + window = self.all_observations[i - self.hypers.step_window + 1:i + 1] series = np.expand_dims(window, axis=1) return dict(series=series, stationary=stationary) def reset(self): step_acc, ep_acc = self.acc.step, self.acc.episode - # Cash & value are the real scores - how much we end up with at the end of an episode + step_acc.i = 0 step_acc.cash, step_acc.value = self.start_cash, self.start_value - # But for our purposes, we care more about "how much better is what we made than if we held". We're training - # a trading bot, not an investing bot. So we compare these at the end, calling it "advantage" - step_acc.hold = Box(value=self.start_cash, cash=self.start_value) - start_timestep = 1 - if self.conv2d: - # for conv2d, start at the end of the first window (grab a full window) - start_timestep = self.hypers.step_window - step_acc.i = start_timestep - step_acc.signals = [0] * start_timestep + step_acc.hold_value = self.start_value + step_acc.totals = Box( + trade=[self.start_cash + self.start_value], + hold=[self.start_cash + self.start_value] + ) + step_acc.signals = [] step_acc.repeats = 0 ep_acc.i += 1 - return self._get_next_state(start_timestep, self.start_cash, self.start_value, 0.) + return self.get_next_state(0, self.start_cash, self.start_value, 0.) def execute(self, actions): if self.hypers.single_action: @@ -360,32 +348,35 @@ def execute(self, actions): }[EXCHANGE] reward = 0 abs_sig = abs(signal) - before = Box(cash=step_acc.cash, value=step_acc.value, total=step_acc.cash+step_acc.value) + total_before = step_acc.cash + step_acc.value # Perform the trade. In training mode, we'll let it dip into negative here, but then kill and punish below. # In testing/live, we'll just block the trade if they can't afford it - if signal > 0 and not (self.no_kill and abs_sig > step_acc.cash): + if signal > 0 and abs_sig <= step_acc.cash: step_acc.value += abs_sig - abs_sig*fee step_acc.cash -= abs_sig - elif signal < 0 and not (self.no_kill and abs_sig > step_acc.value): + elif signal < 0 and abs_sig <= step_acc.value: step_acc.cash += abs_sig - abs_sig*fee step_acc.value -= abs_sig # next delta. [1,2,2].pct_change() == [NaN, 1, 0] - diff_loc = step_acc.i + 1 - pct_change = self.prices_diff[diff_loc] + pct_change = self.prices_diff[step_acc.i + 1] + step_acc.value += pct_change * step_acc.value - total = step_acc.value + step_acc.cash - reward += total - before.total + total_now = step_acc.value + step_acc.cash + step_acc.totals.trade.append(total_now) + # Reward is in dollar-change. As we build a great portfolio, the reward should get bigger and bigger (and + # the agent should notice this) + reward += (total_now - total_before) # calculate what the reward would be "if I held", to calculate the actual reward's _advantage_ over holding - before = step_acc.hold - before.value += pct_change * before.value + step_acc.hold_value += pct_change * step_acc.hold_value + step_acc.totals.hold.append(step_acc.hold_value + self.start_cash) # Collect repeated same-action count (homogeneous actions punished below) recent_actions = np.array(step_acc.signals[-step_acc.repeats:]) if np.any(recent_actions > 0) and np.any(recent_actions < 0) and np.any(recent_actions == 0): step_acc.repeats = 0 # reset repeat counter - else: + elif self.hypers.punish_repeats < self.EPISODE_LEN: step_acc.repeats += 1 # by the time we hit punish_repeats, we're doubling punishments / canceling rewards. Note: we don't want to # multiply by `reward` here because repeats are often 0, which means 0 penalty. Hence `possible_reward` @@ -396,16 +387,11 @@ def execute(self, actions): step_acc.i += 1 ep_acc.total_steps += 1 - next_state = self._get_next_state(step_acc.i, step_acc.cash, step_acc.value, step_acc.repeats) + next_state = self.get_next_state(step_acc.i, step_acc.cash, step_acc.value, step_acc.repeats) if self.hypers.scale: reward = self.scaler.transform([reward], Scaler.REWARD)[0] - terminal = int(step_acc.i + 1 >= len(self.observations)) - # Kill and punish if (a) agent ran out of money; (b) is doing nothing for way too long - # The repeats bit isn't just for punishment, but because training can get stuck too long on losers - if not self.no_kill and (step_acc.cash < 0 or step_acc.value < 0 or step_acc.repeats >= self.hypers.punish_repeats): - reward -= 1. # Big penalty. BTC, like $12k - terminal = True + terminal = int(step_acc.i + 1 >= self.limit) if terminal and self.mode in (Mode.TRAIN, Mode.TEST): # We're done. step_acc.signals.append(0) # Add one last signal (to match length) @@ -455,8 +441,8 @@ def execute(self, actions): time.sleep(20) self.last_timestamp = new_timestamp self.df = pd.concat([self.df.iloc[-1000:], new_data], axis=0) # shed some used data, add new - self.observations, self.prices = self._xform_data(self.df) - self.prices_diff = self._diff(self.prices, percent=True) + self.observations, self.prices = self.xform_data(self.df) + self.prices_diff = self.diff(self.prices, percent=True) step_acc.i = self.df.shape[0] - n_new - 1 if live: @@ -475,22 +461,31 @@ def execute(self, actions): def episode_finished(self, runner): step_acc, ep_acc, test_acc = self.acc.step, self.acc.episode, self.acc.tests signals = step_acc.signals - - advantage = ((step_acc.cash + step_acc.value) - (self.start_cash + self.start_value)) - \ - ((step_acc.hold.value + step_acc.hold.cash) - (self.start_cash + self.start_value)) - # per step average advantage, then bring it to a reasonable number (up from ~.0001) - advantage = advantage / step_acc.i * 10000 - if advantage == 0.: advantage = -.01 # no HODLing! - self.acc.episode.advantages.append(advantage) + totals = step_acc.totals n_uniques = float(len(np.unique(signals))) - self.acc.episode.uniques.append(n_uniques) + + # Calculate the Sharpe ratio. + diff = (pd.Series(totals.trade).pct_change() - pd.Series(totals.hold).pct_change())[1:] + mean, std, sharpe = diff.mean(), diff.std(), 0 + if (std, mean) != (0, 0): + # Usually Sharpe has `sqrt(num_trades)` in front (or `num_trading_days`?). Experimenting being creative w/ + # trade-diversity, etc. Give Sharpe some extra info + # breadth = math.sqrt(np.uniques(signals)) + breadth = np.std([np.sign(x) for x in signals]) # get signal direction, amount not as important (and adds complications) + sharpe = breadth * (mean / std) + + cumm_ret = (totals.trade[-1] / totals.trade[0] - 1) - (totals.hold[-1] / totals.hold[0] - 1) + + ep_acc.sharpes.append(float(sharpe)) + ep_acc.returns.append(float(cumm_ret)) + ep_acc.uniques.append(n_uniques) # Print (limit to note-worthy) lt_0 = len([s for s in signals if s < 0]) eq_0 = len([s for s in signals if s == 0]) gt_0 = len([s for s in signals if s > 0]) completion = int(test_acc.i / test_acc.n_tests * 100) - print(f"{completion}%\tSteps: {step_acc.i}\tAdvantage: {'%.3f'%advantage}\tTrades:\t{lt_0}[<0]\t{eq_0}[=0]\t{gt_0}[>0]") + print(f"{completion}%\tSteps: {step_acc.i}\tSharpe: {'%.3f'%sharpe}\tReturn: {'%.3f'%cumm_ret}\tTrades:\t{lt_0}[<0]\t{eq_0}[=0]\t{gt_0}[>0]") return True def run_deterministic(self, runner, print_results=True): @@ -515,8 +510,8 @@ def train_and_test(self, agent, n_steps, n_tests, early_stop): self.use_dataset(Mode.TEST) self.run_deterministic(runner, print_results=True) if early_stop > 0: - advantages = np.array(self.acc.episode.advantages[-early_stop:]) - if test_acc.i >= early_stop and np.all(advantages > 0): + sharpes = np.array(self.acc.episode.sharpes[-early_stop:]) + if test_acc.i >= early_stop and np.all(sharpes > 0): test_acc.i = n_tests test_acc.i += 1 except KeyboardInterrupt: @@ -527,7 +522,7 @@ def train_and_test(self, agent, n_steps, n_tests, early_stop): # On last "how would it have done IRL?" run, without getting in the way (no killing on repeats, 0-balance) print('Running no-kill test-set') - self.use_dataset(Mode.TEST, no_kill=True) + self.use_dataset(Mode.TEST, full_set=True) self.run_deterministic(runner, print_results=True) def run_live(self, agent, test=True): @@ -544,5 +539,5 @@ def run_live(self, agent, test=True): print(f'Starting total: {self.start_cash + self.start_value}') runner = Runner(agent=agent, environment=self) - self.use_dataset(Mode.TEST_LIVE if test else Mode.LIVE, no_kill=True) + self.use_dataset(Mode.TEST_LIVE if test else Mode.LIVE) self.run_deterministic(runner, print_results=True) diff --git a/data/data.py b/data/data.py index 1ee5f40..8e11574 100644 --- a/data/data.py +++ b/data/data.py @@ -298,9 +298,9 @@ def setup_runs_table(): ( id serial not null, hypers jsonb not null, - advantage_avg double precision not null, - advantages double precision[], - actions double precision[], + sharpes double precision[], + returns double precision[], + signals double precision[], prices double precision[], uniques double precision[], flag varchar(16), diff --git a/hypersearch.py b/hypersearch.py index 94ce97e..0d2c820 100755 --- a/hypersearch.py +++ b/hypersearch.py @@ -95,7 +95,7 @@ def build_net_spec(hypers): # This is just my hunch from CNNs I've seen; the filter sizes are much smaller than the downstream denses # (like 32-64-64 -> 512-256). If anyone has better intuition... - size = max([8, int(net.width // 5)]) + size = max([32, int(net.width // 4)]) # if i == 0: size = int(size / 2) # Most convs have their first layer smaller... right? just the first, or what? arr.append({ 'size': size, @@ -223,7 +223,7 @@ def hydrate_baseline(x, flat): } hypers['memory_model'] = { 'update_mode.unit': 'episodes', - 'update_mode.batch_size': 8, # { + 'update_mode.batch_size': 4, # { # 'type': 'bounded', # 'vals': [1, 10], # 'guess': 10, @@ -231,14 +231,14 @@ def hydrate_baseline(x, flat): # }, 'update_mode.frequency': { 'type': 'bounded', - 'vals': [1, 8], - 'guess': 8, + 'vals': [1, 4], + 'guess': 4, 'pre': round }, 'memory.type': 'latest', 'memory.include_next_states': False, - 'memory.capacity': 100000, # { TODO does this matter? + 'memory.capacity': BitcoinEnv.EPISODE_LEN * 4, # { # 'type': 'bounded', # 'vals': [2000, 20000], # 'guess': 5000 @@ -320,7 +320,7 @@ def hydrate_baseline(x, flat): 'indicators': { 'type': 'bounded', 'vals': [0, 600], - 'guess': 600, + 'guess': 300, 'pre': int, 'hydrate': min_threshold(100, False) }, @@ -328,7 +328,7 @@ def hydrate_baseline(x, flat): 'net.depth_mid': { 'type': 'bounded', 'vals': [1, 3], - 'guess': 2, + 'guess': 3, 'pre': round }, # Dense layers @@ -384,7 +384,6 @@ def hydrate_baseline(x, flat): 'hydrate': min_ten_neg(1e-6, 0.) }, - # Instead of using absolute price diffs, use percent-change. 'pct_change': { 'type': 'bool', @@ -406,8 +405,8 @@ def hydrate_baseline(x, flat): # spanking. I didn't raise no investor, I raised a TRADER 'punish_repeats': { 'type': 'bounded', - 'vals': [1000, 5000], - 'guess': 5000, + 'vals': [1000, BitcoinEnv.EPISODE_LEN * 1.5], # more than ep len means don't punish + 'guess': 1000, 'pre': int }, @@ -582,27 +581,27 @@ def execute(self, actions): env.train_and_test(agent, self.cli_args.n_steps, self.cli_args.n_tests, -1) step_acc, ep_acc = env.acc.step, env.acc.episode - adv_avg = utils.calculate_score(ep_acc.advantages) + adv_avg = utils.calculate_score(ep_acc.sharpes) print(flat, f"\nAdvantage={adv_avg}\n\n") sql = """ - insert into runs (hypers, advantage_avg, advantages, uniques, prices, actions, agent, flag) - values (:hypers, :advantage_avg, :advantages, :uniques, :prices, :actions, :agent, :flag) + insert into runs (hypers, sharpes, returns, uniques, prices, signals, agent, flag) + values (:hypers, :sharpes, :returns, :uniques, :prices, :signals, :agent, :flag) returning id; """ row = self.conn_runs.execute( text(sql), hypers=json.dumps(flat), - advantage_avg=adv_avg, - advantages=list(ep_acc.advantages), + sharpes=list(ep_acc.sharpes), + returns=list(ep_acc.returns), uniques=list(ep_acc.uniques), prices=list(env.prices), - actions=list(step_acc.signals), + signals=list(step_acc.signals), agent=self.agent, flag=self.cli_args.net_type ).fetchone() - if ep_acc.advantages[-1] > 0: + if ep_acc.sharpes[-1] > 0: _id = str(row[0]) directory = os.path.join(os.getcwd(), "saves", _id) filestar = os.path.join(directory, _id) @@ -762,13 +761,13 @@ def loss_fn(params): # Every iteration, re-fetch from the database & pre-train new model. Acts same as saving/loading a model to disk, # but this allows to distribute across servers easily conn_runs = data.engine_runs.connect() - sql = "select hypers, advantages, advantage_avg from runs where flag=:f" + sql = "select hypers, sharpes from runs where flag=:f" runs = conn_runs.execute(text(sql), f=args.net_type).fetchall() conn_runs.close() X, Y = [], [] for run in runs: X.append(hypers2vec(run.hypers)) - Y.append([utils.calculate_score(run.advantages)]) + Y.append([utils.calculate_score(run.sharpes)]) boost_model = print_feature_importances(X, Y, feat_names) if args.guess != -1: diff --git a/visualize/client/App.jsx b/visualize/client/App.jsx index 500b410..58c163f 100644 --- a/visualize/client/App.jsx +++ b/visualize/client/App.jsx @@ -23,11 +23,10 @@ class App extends Component { componentDidMount() { fetch('http://localhost:5000').then(res => res.json()).then(data => { data.forEach(d => { - d.reward_avg = d.advantage_avg d.hypers = _.transform(d.hypers, (m,v,k) => { m[k.replace(/\./g, '_')] = typeof v == 'boolean' ? ~~v : v; }); - d.unique_sigs = _.uniq(d.actions).length; + d.unique_sigs = _.uniq(d.signals).length; }); this.forceRerender = true; this.setState({data}); @@ -146,7 +145,7 @@ class App extends Component { svg.select('g').remove(); // start clean let g = svg.append("g").attr("transform", "translate(" + margin.left + "," + margin.top + ")"); - let rewards = data.map(d => d.advantages.map((v,i) => { + let rewards = data.map(d => d.sharpes.map((v,i) => { let y = v; // just human // let y = (d.rewards_agent[i] + v)/2; // human-agent average // y = _.clamp(y, -100, 100); // clamp so we don't break the graph @@ -268,11 +267,11 @@ class App extends Component { mountSignals = () => { const {id} = this.clickedDatum; - fetch(`http://localhost:5000/actions/${id}`).then(res => res.json()).then(this._mountSignals); + fetch(`http://localhost:5000/signals/${id}`).then(res => res.json()).then(this._mountSignals); }; _mountSignals = (data) => { - let {actions, prices} = data; + let {signals, prices} = data; let svg = d3.select("svg#signals"); svg.select('g').remove(); // start fresh @@ -325,7 +324,7 @@ class App extends Component { .enter() .append("circle") .classed('dot', true) - .style('fill', (d,i) => actions[i] < 0 ? 'red' : actions[i] > 0 ? 'green' : 'rgba(0,0,0,0)') + .style('fill', (d,i) => signals[i] < 0 ? 'red' : signals[i] > 0 ? 'green' : 'rgba(0,0,0,0)') .attr("r", 1) .attr("cx", (d,i) => x(i)) .attr("cy", d => y(d)); diff --git a/visualize/server.py b/visualize/server.py index cbddfa9..d9c69ca 100644 --- a/visualize/server.py +++ b/visualize/server.py @@ -15,9 +15,9 @@ def get_runs(): rows = [] conn = engine_runs.connect() # TODO prices/actions in separate route - for row in conn.execute('select id, hypers, advantage_avg, advantages, uniques from runs').fetchall(): + for row in conn.execute('select id, hypers, sharpes, returns, uniques from runs').fetchall(): row = dict(row.items()) - row['advantage_avg'] = utils.calculate_score(row['advantages']) + row['reward_avg'] = utils.calculate_score(row['sharpes']) rows.append(row) conn.close() @@ -25,10 +25,10 @@ def get_runs(): return jsonify(rows) -@app.route("/actions/") +@app.route("/signals/") def get_actions(run_id): conn = engine_runs.connect() - query = 'select actions, prices from runs where id=:run_id' + query = 'select signals, prices from runs where id=:run_id' row = conn.execute(text(query), run_id=run_id).fetchone() conn.close()