In [1]:
import sys
sys.path.insert(0, '../')
import gc
import logging
logging.basicConfig(level=logging.DEBUG)
from time import time
import os
import numpy as np
np.random.seed(42)

from cryptotrader.envs.driver import Apocalipse, get_historical
from cryptotrader.envs.utils import make_env, get_dfs_from_db
from cryptotrader.utils import convert_to, array_normalize
from cryptotrader.models import cn_models

import chainer as cn
import pandas as pd
from bokeh.io import output_notebook
from jupyterthemes import jtplot
output_notebook()
jtplot.style()
%matplotlib inline
# %load_ext line_profiler

In [2]:
seed = 42
epilson = 1e-8
outdir = './logs'
data_dir = './data/'
save_dir = './save/'

# Env params
freq = 30
obs_steps = 50
tax = 0.0025
init_fiat = 100.0
init_crypto = 0.0
test = True

In [3]:
def convert_and_clean(x):
    x = x.apply(convert_to.decimal)
    f = x.rolling(30, center=True, min_periods=1).mean().apply(convert_to.decimal)
    x = x.apply(lambda x: x if x.is_finite() else np.nan)
    return x.combine_first(f)    

In [4]:
# Setup train environment
train_env = Apocalipse(name='train_env', seed=seed)
# Set environment options
train_env.set_freq(freq)
train_env.set_obs_steps(obs_steps)

symbols = ['usdtbtc', 'usdteth', 'usdtetc', 'usdtxrp', 'usdtltc']
for i, symbol in enumerate(symbols):
    df = pd.read_csv(data_dir + symbol + '_fev_may.csv')
    df['date'] = df.date.apply(pd.to_datetime)
    df = df.set_index('date')
    for col in df.columns:
        df[col] = convert_and_clean(df[col])
    train_env.add_df(df=df, symbol=symbol)
    train_env.add_symbol(symbol)
    train_env.set_init_crypto(init_crypto, symbol)
    train_env.set_tax(tax, symbol)

gc.collect()
train_env.set_init_fiat(init_fiat)

# Clean pools
train_env._reset_status()
train_env.clear_dfs()
gc.collect()

train_env.set_training_stage(True)
train_env.set_observation_space()
train_env.set_action_space()
train_env.reset(reset_funds=True, reset_results=True, reset_global_step=True);

# Setup test environment
test_env = Apocalipse(name='test_env', seed=seed)
# Set environment options
test_env.set_freq(freq)
test_env.set_obs_steps(obs_steps)

symbols = ['usdtbtc', 'usdteth', 'usdtetc', 'usdtxrp', 'usdtltc']
for i, symbol in enumerate(symbols):
    df = pd.read_csv(data_dir + symbol + '_may.csv')
    df['date'] = df.date.apply(pd.to_datetime)
    df = df.set_index('date')
    for col in df.columns:
        df[col] = convert_and_clean(df[col])
    test_env.add_df(df=df, symbol=symbol)
    test_env.add_symbol(symbol)
    test_env.set_init_crypto(init_crypto, symbol)
    test_env.set_tax(tax, symbol)

gc.collect()
test_env.set_init_fiat(init_fiat)

# Clean pools
test_env._reset_status()
test_env.clear_dfs()
gc.collect()

test_env.set_training_stage(False)
test_env.set_observation_space()
test_env.set_action_space()
test_env.reset(reset_funds=True, reset_results=True, reset_global_step=True);

INFO:Cryptocoin arbiter agent logging file:[train_env Apocalipse initialization]
Apocalipse Initialized!
ONLINE MODE: False


INFO:Cryptocoin arbiter agent logging file:[<function Apocalipse.set_action_space at 0x7f3d654f2d90>]
Setting environment with 6 symbols.

INFO:Cryptocoin arbiter agent logging file:[test_env Apocalipse initialization]
Apocalipse Initialized!
ONLINE MODE: False


INFO:Cryptocoin arbiter agent logging file:[<function Apocalipse.set_action_space at 0x7f3d654f2d90>]
Setting environment with 6 symbols.



In [5]:
eiie = cn_models.EIIE(train_env.get_step_obs_all().shape, 128, 512)

In [None]:
optimizer = cn.optimizers.Adam(alpha=3e-3)
l2_reg = cn.optimizer.WeightDecay(1e-2)
optimizer.setup(eiie)
optimizer.add_hook(l2_reg)

In [None]:
training_epochs = 1000000
test_interval = 2000
test_epochs = 512
batch_size = 32

def make_batch(env, batch_size):
    obs_batch = []
    target_batch = []
    for i in range(batch_size):
        # Choose some random index
        env.step_idx = env.offset + np.random.randint(high=env.df.shape[0] - env.offset - 1, low=0)
        # Get obs and target and append it to their batches
        obs_batch.append(train_env.get_step_obs_all().values)

        target = (env.df.loc(axis=1)[:,'close'].iloc[env.step_idx + 1].values /
                  env.df.loc(axis=1)[:,'open'].iloc[env.step_idx + 1].values) - 1
        target_batch.append(target.reshape([1,-1]))

    obs_batch = cn_models.batch_states(obs_batch, np, cn_models.phi)
    target_batch = np.swapaxes(cn_models.batch_states(target_batch, np, cn_models.phi), 3, 2)

    return obs_batch, target_batch

## Training loop
t0 = 1e-8
best_score = -np.inf
train_r2_log = []
train_loss_log = []
test_r2_log = []
test_loss_log = []
for epoch in range(training_epochs):
    try:
        t1 = time()

        if epoch == 0:
            optimizer.hyperparam.alpha = 3e-3
        elif epoch == 2000:
            optimizer.hyperparam.alpha = 5e-4
        elif epoch == 6000:
            optimizer.hyperparam.alpha = 7e-5
        elif epoch == 9000:
            optimizer.hyperparam.alpha = 9e-6
        elif epoch == 12000:
            optimizer.hyperparam.alpha = 1e-6

        obs_batch, target_train = make_batch(train_env, batch_size)

        prediction_train = eiie(obs_batch)

        loss = cn.functions.mean_squared_error(prediction_train, target_train)

        eiie.cleargrads()
        loss.backward()

        optimizer.update()

        train_r2 = cn.functions.r2_score(prediction_train, target_train)

        train_loss_log.append(loss.data)
        train_r2_log.append(train_r2.data)
        
        t0 += time() - t1
        print("Training epoch %d/%d, loss: %.08f, r2: %f, samples/sec: %f" % (epoch + 1,
                                                                           training_epochs,
                                                                           loss.data,
                                                                           train_r2.data,
                                                                           (epoch + 1) * batch_size / t0), end='\r')

        if epoch % test_interval == 0 and epoch != 0:
            test_losses = []
            test_scores = []
            for _ in range(test_epochs):
                test_batch, target_test = make_batch(test_env, batch_size)

                # Forward the test data
                prediction_test = eiie(test_batch)

                # Calculate the loss
                loss_test = cn.functions.mean_squared_error(prediction_test, target_test)
                loss_test.to_cpu()
                test_losses.append(loss_test.data)

                # Calculate the accuracy
                test_r2 = cn.functions.r2_score(prediction_test, target_test)
                test_r2.to_cpu()
                test_scores.append(test_r2.data)

                test_loss_log.append(np.mean(test_losses))
                test_r2_log.append(np.mean(test_scores))
                
            if np.mean(test_scores) > best_score:
                best_score = np.mean(test_scores)
                print("\nNew best score:", best_score, end='\r')
                cn.serializers.save_npz(save_dir + 'eiie0.1.npz', eiie, compression=True)

            print('\nval loss: {:.04f}, val r2 score: {:.04f}'.format(
                np.mean(test_losses), np.mean(test_scores)))

    except KeyboardInterrupt:
        print("\nInterrupted by the user. Best score:", best_score)
        break

Training epoch 2001/1000000, loss: 0.00008754, r2: -0.020286, samples/sec: 33.8439287789
New best score: -0.033666
val loss: 0.0002, val r2 score: -0.0337
Training epoch 4001/1000000, loss: 0.00012501, r2: -0.038642, samples/sec: 24.080963
New best score: -0.032467
val loss: 0.0002, val r2 score: -0.0325
Training epoch 6001/1000000, loss: 0.00015903, r2: -0.010188, samples/sec: 24.228203
val loss: 0.0002, val r2 score: -0.0359
Training epoch 8001/1000000, loss: 0.00008065, r2: -0.016498, samples/sec: 24.416351
val loss: 0.0002, val r2 score: -0.0347
Training epoch 10001/1000000, loss: 0.00007054, r2: -0.034572, samples/sec: 24.409284
val loss: 0.0002, val r2 score: -0.0336
Training epoch 12001/1000000, loss: 0.00008389, r2: -0.035735, samples/sec: 24.380954
val loss: 0.0002, val r2 score: -0.0343
Training epoch 14001/1000000, loss: 0.00008011, r2: -0.025324, samples/sec: 24.360963
val loss: 0.0002, val r2 score: -0.0353
Training epoch 15542/1000000, loss: 0.00010362, r2: -0.024060, sam

In [None]:
act = cn_models.CashBias()

train_env._reset_status()
train_env.set_training_stage(False)
train_env.reset(reset_funds=True, reset_results=True, reset_global_step=True);

test_env._reset_status()
test_env.set_training_stage(False)
test_env.reset(reset_funds=True, reset_results=True, reset_global_step=True);

# Load best model
cn.serializers.load_npz(save_dir + 'eiie0.1.npz', obj=eiie)

In [None]:
for i in range(train_env.df.shape[0] - obs_steps - 1):
    action = act(eiie(cn_models.batch_states([train_env.get_step_obs_all().values], np, cn_models.phi)))
    action = array_normalize(np.clip(action.data.flatten(), a_min=0.0, a_max=np.inf))
    train_env.step(action)
    print("Processing obs %d/%d" % (i + 1, train_env.df.shape[0] - obs_steps - 1), end='\r')
train_env.plot_results(train_env._get_results());

In [None]:
for i in range(test_env.df.shape[0] - obs_steps - 1):
    action = act(eiie(cn_models.batch_states([test_env.get_step_obs_all().values], np, cn_models.phi)))
    action = array_normalize(np.clip(action.data.flatten(), a_min=0.0, a_max=np.inf))
    test_env.step(action)
    print("Processing obs %d/%d" % (i + 1, test_env.df.shape[0] - obs_steps - 1), end='\r')
test_env.plot_results(test_env._get_results());