## Install TensorTrade

In [None]:
!python3 -m pip install git+https://github.com/tensortrade-org/tensortrade.git

## Define global variables

In [None]:
n_steps = 1000
n_episodes = 20
window_size = 30
memory_capacity = n_steps * 10
learning_rate = 0.0003
total_timesteps = 500000
save_path = 'agents/'
n_bins = 5             # Number of bins to partition the dataset evenly in order to evaluate class sparsity.
seed = 1337

## Setup data fetching

In [None]:
from tensortrade.data.cdd import CryptoDataDownload

import numpy as np
import pandas as pd
pd.options.mode.use_inf_as_na = True

def prepare_data(df):
    df['volume'] = np.int64(df['volume'])
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by='date', ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['date'] = df['date'].dt.strftime('%Y-%m-%d %I:%M %p')
    return df

def fetch_data():
    cdd = CryptoDataDownload()
    bitfinex_data = cdd.fetch("Bitfinex", "USD", "BTC", "1h")
    bitfinex_data = bitfinex_data[['date', 'open', 'high', 'low', 'close', 'volume']]
    return bitfinex_data

def load_csv(filename):
    df = pd.read_csv('data/' + filename, skiprows=1)
    df.drop(columns=['symbol', 'volume_btc'], inplace=True)

    # Fix timestamp from "2019-10-17 09-AM" to "2019-10-17 09-00-00 AM"
    df['date'] = df['date'].str[:14] + '00-00 ' + df['date'].str[-2:]

    return prepare_data(df)

In [None]:
data = fetch_data()
data

In [None]:
'''
import yfinance as yf

main_ticker = 'BTC-USD'  # TODO: replace this with your own ticker

ticker = yf.Ticker(ticker=main_ticker)

data = ticker.history(period='max', interval='1d')
data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
data['Volume'] = data['Volume'].astype(int)
data = data.reset_index()
data = data.rename(columns={'Open': 'open', 
                            'High': 'high', 
                            'Low': 'low', 
                            'Close': 'close', 
                            'Volume': 'volume', 
                            'Date': 'date'})
#data = prepare_data(data)
data
'''

## Create features for the feed module

In [None]:
import os
import numpy as np
import ta as ta1
import pandas_ta as ta

import quantstats as qs
qs.extend_pandas()

def fix_dataset_inconsistencies(dataframe, fill_value=None):
    dataframe = dataframe.replace([-np.inf, np.inf], np.nan)

    # This is done to avoid filling middle holes with backfilling.
    if fill_value is None:
        dataframe.iloc[0,:] = \
            dataframe.apply(lambda column: column.iloc[column.first_valid_index()], axis='index')
    else:
        dataframe.iloc[0,:] = \
            dataframe.iloc[0,:].fillna(fill_value)

    return dataframe.fillna(axis='index', method='pad').dropna(axis='columns')

def rsi(price: 'pd.Series[pd.Float64Dtype]', period: float) -> 'pd.Series[pd.Float64Dtype]':
    r = price.diff()
    upside = np.minimum(r, 0).abs()
    downside = np.maximum(r, 0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)

def macd(price: 'pd.Series[pd.Float64Dtype]', fast: float, slow: float, signal: float) -> 'pd.Series[pd.Float64Dtype]':
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

def generate_all_pandas_ta_features(data: pd.DataFrame) -> pd.DataFrame:
    """Generate all default indicators from the pandas_ta library"""
    df = data.copy()
    
    strategies = ['candles', 
                  'cycles', 
                  'momentum', 
                  'overlap', 
                  'performance', 
                  'statistics', 
                  'trend', 
                  'volatility', 
                  'volume']
    
    df.index = pd.DatetimeIndex(df.index)
    
    cores = os.cpu_count()
    df.ta.cores = cores
    
    for strategy in strategies:
        df.ta.strategy(strategy, exclude=['kvo'])

    #df = df.set_index('date')

    return df

def generate_all_ta_features(data: pd.DataFrame) -> pd.DataFrame:
    """Generate all default indicators from the ta library"""
    df = data.copy()

    ta1.add_all_ta_features(df, 
                            'open', 
                            'high', 
                            'low', 
                            'close', 
                            'volume', 
                            fillna=True)

    #df = df.set_index('date')

    return df

def generate_all_custom_features(data: pd.DataFrame) -> pd.DataFrame:
    """Generate all custom indicators"""

    df = data.copy()
    df_indexes = df[['date']]

    # Custom indicators
    df = pd.DataFrame.from_dict({
        'prev_open': df['open'].shift(1),
        'prev_high': df['high'].shift(1),
        'prev_low': df['low'].shift(1),
        'prev_close': df['close'].shift(1),
        'prev_volume': df['volume'].shift(1),
        'vol_5': df['close'].rolling(window=5).std().abs(),
        'vol_10': df['close'].rolling(window=10).std().abs(),
        'vol_20': df['close'].rolling(window=20).std().abs(),
        'vol_30': df['close'].rolling(window=30).std().abs(),
        'vol_50': df['close'].rolling(window=50).std().abs(),
        'vol_60': df['close'].rolling(window=60).std().abs(),
        'vol_100': df['close'].rolling(window=100).std().abs(),
        'vol_200': df['close'].rolling(window=200).std().abs(),
        'ma_5': df['close'].rolling(window=5).mean(),
        'ma_10': df['close'].rolling(window=10).mean(),
        'ma_20': df['close'].rolling(window=20).mean(),
        'ma_30': df['close'].rolling(window=30).mean(),
        'ma_50': df['close'].rolling(window=50).mean(),
        'ma_60': df['close'].rolling(window=60).mean(),
        'ma_100': df['close'].rolling(window=100).mean(),
        'ma_200': df['close'].rolling(window=200).mean(),
        'ema_5': ta1.trend.ema_indicator(df['close'], window=5, fillna=True),
        'ema_10': ta1.trend.ema_indicator(df['close'], window=10, fillna=True),
        'ema_20': ta1.trend.ema_indicator(df['close'], window=20, fillna=True),
        'ema_60': ta1.trend.ema_indicator(df['close'], window=60, fillna=True),
        'ema_64': ta1.trend.ema_indicator(df['close'], window=64, fillna=True),
        'ema_120': ta1.trend.ema_indicator(df['close'], window=120, fillna=True),
        'lr_open': np.log(df['open']).diff().fillna(0),
        'lr_high': np.log(df['high']).diff().fillna(0),
        'lr_low': np.log(df['low']).diff().fillna(0),
        'lr_close': np.log(df['close']).diff().fillna(0),
        'r_volume': df['close'].diff().fillna(0),
        'rsi_5': rsi(df['close'], period=5),
        'rsi_10': rsi(df['close'], period=10),
        'rsi_100': rsi(df['close'], period=100),
        'rsi_7': rsi(df['close'], period=7),
        'rsi_28': rsi(df['close'], period=28),
        'rsi_6': rsi(df['close'], period=6),
        'rsi_14': rsi(df['close'], period=14),
        'rsi_24': rsi(df['close'], period=24),
        'macd_normal': macd(df['close'], fast=12, slow=26, signal=9),
        'macd_short': macd(df['close'], fast=10, slow=50, signal=5),
        'macd_long': macd(df['close'], fast=200, slow=100, signal=50),
    })

    df = pd.concat([df_indexes, df], axis='columns')
    #df = df.set_index('date')

    return df

def generate_all_quantstats_features(data: pd.DataFrame) -> pd.DataFrame:
    """Generate all default indicators from the quantstats library"""
    excluded_indicators = [
        'compare',
        'greeks',
        'information_ratio',
        'omega',
        'r2',
        'r_squared',
        'rolling_greeks',
        'warn',
    ]
    
    indicators_list = [f for f in dir(qs.stats) if f[0] != '_' and f not in excluded_indicators]
    
    df = data.copy()
    df = df.set_index('date')
    df.index = pd.DatetimeIndex(df.index)

    for indicator_name in indicators_list:
        try:
            #print(indicator_name)
            indicator = qs.stats.__dict__[indicator_name](df['close'])
            if isinstance(indicator, pd.Series):
                indicator = indicator.to_frame(name=indicator_name)
                df = pd.concat([df, indicator], axis='columns')
        except (pd.errors.InvalidIndexError, ValueError):
            pass

    df = df.reset_index()

    return df

def generate_features(data: pd.DataFrame) -> pd.DataFrame:
    df_pandas_ta = generate_all_pandas_ta_features(data)
    df_ta = generate_all_ta_features(data)
    df_custom = generate_all_custom_features(data)
    df_quantstats = generate_all_quantstats_features(data)

    # Remove potential column duplicates
    data = data.loc[~data.index.duplicated(),:]

    # Concatenate all features
    data = pd.concat([df_pandas_ta, df_ta, df_custom, df_quantstats], axis='columns')

    # Remove potential column duplicates
    data = data.loc[:,~data.columns.duplicated()]

    # A lot of indicators generate NaNs at the beginning of DataFrames, so remove them
    data = data.iloc[200:]
    data = data.reset_index(drop=True)

    data = fix_dataset_inconsistencies(data, fill_value=None)

    data = data.set_index('date')
    data = data[~data.index.duplicated(keep='first')]
    data = data.reset_index()

    return data

In [None]:
data = generate_features(data)
data

## Remove features with low variance before splitting the dataset

In [None]:
from sklearn.feature_selection import VarianceThreshold
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
date = data[['date']].copy()
data = data.drop(columns=['date'])
sel.fit(data)
data[data.columns[sel.get_support(indices=True)]]
data = pd.concat([date, data], axis='columns')
data

## Drop redundant or broken features

In [None]:
to_drop = ['others_dlr', 'compsum']

data = data.drop(columns=to_drop)

data.shape

## Split dataset

In [None]:
from sklearn.model_selection import train_test_split

def split_data(data):
    X = data.copy()
    y = X['close'].pct_change()

    X_train_test, X_valid, y_train_test, y_valid = \
        train_test_split(data, data['close'].pct_change(), train_size=0.67, test_size=0.33, shuffle=False)

    X_train, X_test, y_train, y_test = \
        train_test_split(X_train_test, y_train_test, train_size=0.50, test_size=0.50, shuffle=False)

    return X_train, X_test, X_valid, y_train, y_test, y_valid

In [None]:
X_train, X_test, X_valid, y_train, y_test, y_valid = \
    split_data(data)

## Print basic quantstats report

In [None]:
def print_quantstats_full_report(env, data, output='dqn_quantstats'):
    performance = pd.DataFrame.from_dict(env.action_scheme.portfolio.performance, orient='index')
    net_worth = performance['net_worth'].iloc[window_size:]
    returns = net_worth.pct_change().iloc[1:]

    # WARNING! The dates are fake and default parameters are used!
    returns.index = pd.date_range(start=data['date'].iloc[0], freq='1d', periods=returns.size)

    qs.reports.full(returns)
    qs.reports.html(returns, output=output + '.html')

#print_quantstats_full_report(env, data)

## Get custom dataset statistics

In [None]:
from scipy.stats import iqr

def estimate_outliers(data):
    return iqr(data) * 1.5

def estimate_percent_gains(data, column='close'):
    returns = get_returns(data, column=column)
    gains = estimate_outliers(returns)
    return gains

def get_returns(data, column='close'):
    return fix_dataset_inconsistencies(data[[column]].pct_change(), fill_value=0)

def precalculate_ground_truths(data, column='close', threshold=None):
    returns = get_returns(data, column=column)
    gains = estimate_outliers(returns) if threshold is None else threshold
    binary_gains = (returns[column] > gains).astype(int)
    return binary_gains

def is_null(data):
    return data.isnull().sum().sum() > 0

def is_sparse(data, column='close'):
    binary_gains = precalculate_ground_truths(data, column=column)
    bins = [n * (binary_gains.shape[0] // n_bins) for n in range(n_bins)]
    bins += [binary_gains.shape[0]]
    bins = [binary_gains.iloc[bins[n]:bins[n + 1]] for n in range(n_bins)]
    return all([bin.astype(bool).any() for bin in bins])

def is_data_predictible(data, column):
    return not is_null(data) & is_sparse(data, column)

data.describe(include='all')

## Evaluate outlier sparsity of the data

In [None]:
import matplotlib.pyplot as plt
plt.plot(get_returns(data, column='close'))
plt.show()
is_data_predictible(data, 'close')

## Percentage of the dataset generating rewards (keep between 5% to 15% or just rely on is_data_predictible())

In [None]:
plt.plot(precalculate_ground_truths(data, column='close').iloc[:1000])
plt.show()
percent_rewardable = str(round(100 + precalculate_ground_truths(data, column='close').value_counts().pct_change().iloc[-1] * 100, 2)) + '%'
print(percent_rewardable)

## Threshold to pass to reward scheme

In [None]:
X_train_test = pd.concat([X_train, X_test], axis='index')
#threshold = estimate_percent_gains(X_train_test, 'close')
threshold = estimate_percent_gains(X_train, 'close')
threshold

## Implement basic feature engineering

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

from feature_engine.selection import SelectBySingleFeaturePerformance

In [None]:
rf = RandomForestClassifier(n_estimators=10, 
                            random_state=seed, 
                            n_jobs=7)

sel = SelectBySingleFeaturePerformance(variables=None, 
                                       estimator=rf, 
                                       scoring="roc_auc", 
                                       cv=3, 
                                       threshold=0.50)

sel.fit(X_train, precalculate_ground_truths(X_train, column='close'))

In [None]:
feature_performance = pd.Series(sel.feature_performance_).sort_values(ascending=False)
feature_performance

In [None]:
feature_performance.plot.bar(figsize=(20, 5))
plt.title('Performance of ML models trained with individual features')
plt.ylabel('roc-auc')

In [None]:
features_to_drop = sel.features_to_drop_
features_to_drop

In [None]:
to_drop = list(set(features_to_drop) - set(['open', 'high', 'low', 'close', 'volume']))
len(to_drop)

In [None]:
X_train = X_train.drop(columns=to_drop)
X_test = X_test.drop(columns=to_drop)
X_valid = X_valid.drop(columns=to_drop)

X_train.shape, X_test.shape, X_valid.shape

In [None]:
X_train.columns.tolist()

## Normalize the dataset subsets to make the model converge faster

In [None]:
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler

scaler_type = MinMaxScaler

def get_feature_scalers(X, scaler_type=scaler_type):
    scalers = []
    for name in list(X.columns[X.columns != 'date']):
        scalers.append(scaler_type().fit(X[name].values.reshape(-1, 1)))
    return scalers

def get_scaler_transforms(X, scalers):
    X_scaled = []
    for name, scaler in zip(list(X.columns[X.columns != 'date']), scalers):
        X_scaled.append(scaler.transform(X[name].values.reshape(-1, 1)))
    X_scaled = pd.concat([pd.DataFrame(column, columns=[name]) for name, column in \
                          zip(list(X.columns[X.columns != 'date']), X_scaled)], axis='columns')
    return X_scaled

def normalize_data(X_train, X_test, X_valid):
    X_train_test = pd.concat([X_train, X_test], axis='index')
    X_train_test_valid = pd.concat([X_train_test, X_valid], axis='index')

    X_train_test_dates = X_train_test[['date']]
    X_train_test_valid_dates = X_train_test_valid[['date']]

    X_train_test = X_train_test.drop(columns=['date'])
    X_train_test_valid = X_train_test_valid.drop(columns=['date'])

    train_test_scalers = \
        get_feature_scalers(X_train_test, 
                            scaler_type=scaler_type)
    train_test_valid_scalers = \
        get_feature_scalers(X_train_test_valid, 
                            scaler_type=scaler_type)

    X_train_test_scaled = \
        get_scaler_transforms(X_train_test, 
                              train_test_scalers)
    X_train_test_valid_scaled = \
        get_scaler_transforms(X_train_test_valid, 
                              train_test_scalers)
    X_train_test_valid_scaled_leaking = \
        get_scaler_transforms(X_train_test_valid, 
                              train_test_valid_scalers)

    X_train_test_scaled = \
        pd.concat([X_train_test_dates, 
                   X_train_test_scaled], 
                  axis='columns')
    X_train_test_valid_scaled = \
        pd.concat([X_train_test_valid_dates, 
                   X_train_test_valid_scaled], 
                  axis='columns')
    X_train_test_valid_scaled_leaking = \
        pd.concat([X_train_test_valid_dates, 
                   X_train_test_valid_scaled_leaking], 
                  axis='columns')

    X_train_scaled = X_train_test_scaled.iloc[:X_train.shape[0]]
    X_test_scaled = X_train_test_scaled.iloc[X_train.shape[0]:]
    X_valid_scaled = X_train_test_valid_scaled.iloc[X_train_test.shape[0]:]
    X_valid_scaled_leaking = X_train_test_valid_scaled_leaking.iloc[X_train_test.shape[0]:]

    return (train_test_scalers, 
            train_test_valid_scalers, 
            X_train_scaled, 
            X_test_scaled, 
            X_valid_scaled, 
            X_valid_scaled_leaking)

In [None]:
train_test_scalers, train_test_valid_scalers, X_train_scaled, X_test_scaled, X_valid_scaled, X_valid_scaled_leaking = \
    normalize_data(X_train, X_test, X_valid)

## Save new feature set

In [None]:
import os
cwd = os.getcwd()

train_csv = os.path.join(cwd, 'train.csv')
test_csv = os.path.join(cwd, 'test.csv')
valid_csv = os.path.join(cwd, 'valid.csv')
train_scaled_csv = os.path.join(cwd, 'train_scaled.csv')
test_scaled_csv = os.path.join(cwd, 'test_scaled.csv')
valid_scaled_csv = os.path.join(cwd, 'valid_scaled.csv')
valid_scaled_leaking_csv = os.path.join(cwd, 'valid_scaled_leaking.csv')

#X_train.to_csv(train_csv, index=False)
#X_test.to_csv(test_csv, index=False)
#X_valid.to_csv(valid_csv, index=False)
#X_train.to_csv(train_scaled_csv, index=False)
#X_test.to_csv(test_scaled_csv, index=False)
#X_valid.to_csv(valid_scaled_csv, index=False)
#X_valid.to_csv(valid_scaled_leaking_csv, index=False)

## Write a renderer

In [None]:
# Things to understand here:
# Writing a Renderer

import matplotlib.pyplot as plt

from tensortrade.env.generic import Renderer


class PositionChangeChart(Renderer):
    def __init__(self, color: str = "orange"):
        self.color = "orange"

    def render(self, env, **kwargs):
        history = pd.DataFrame(env.observer.renderer_history)

        actions = list(history.action)
        price = list(history.close)

        buy = {}
        sell = {}

        for i in range(len(actions) - 1):
            a1 = actions[i]
            a2 = actions[i + 1]

            if a1 != a2:
                if a1 == 0 and a2 == 1:
                    buy[i] = price[i]
                else:
                    sell[i] = price[i]

        buy = pd.Series(buy)
        sell = pd.Series(sell)

        fig, axs = plt.subplots(1, 2, figsize=(15, 5))

        fig.suptitle("Performance")

        axs[0].plot(np.arange(len(price)), price, label="price", color=self.color)
        axs[0].scatter(buy.index, buy.values, marker="^", color="green")
        axs[0].scatter(sell.index, sell.values, marker="^", color="red")
        axs[0].set_title("Trading Chart")

        performance_df = pd.DataFrame().from_dict(env.action_scheme.portfolio.performance, orient='index')
        performance_df.plot(ax=axs[1])
        axs[1].set_title("Net Worth")

        plt.show()

## Write a reward scheme encouraging rare volatile upside trades

In [None]:
from tensortrade.env.default.rewards import TensorTradeRewardScheme


class PBR(TensorTradeRewardScheme):
    """A reward scheme for position-based returns.
    * Let :math:`p_t` denote the price at time t.
    * Let :math:`x_t` denote the position at time t.
    * Let :math:`R_t` denote the reward at time t.
    Then the reward is defined as,
    :math:`R_{t} = (p_{t} - p_{t-1}) \cdot x_{t}`.
    Parameters
    ----------
    price : `Stream`
        The price stream to use for computing rewards.
    """

    registered_name = "pbr"

    def __init__(self, price: 'Stream', threshold: float = 0.02, window_size: int = 1) -> None:
        super().__init__()
        self._window_size = self.default('window_size', window_size)
        self._threshold = self.default('threshold', threshold)
        self.position = -1

        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")
        r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        #r[(r > 0.0) & (r <= self._threshold)] = 0.0

        reward = (position * r).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int) -> None:
        self.position = -1 if action == 0 else 1

    def get_reward(self, portfolio: 'Portfolio') -> float:
        return self.feed.next()["reward"]

    def reset(self) -> None:
        """Resets the `position` and `feed` of the reward scheme."""
        self.position = -1
        self.feed.reset()

In [None]:
class AnomalousProfit(TensorTradeRewardScheme):
    """A simple reward scheme that rewards the agent for exceeding a 
    precalculated percentage in the net worth.

    Parameters
    ----------
    threshold : float
        The minimum value to exceed in order to get the reward.

    Attributes
    ----------
    threshold : float
        The minimum value to exceed in order to get the reward.
    """

    registered_name = "anomalous"

    def __init__(self, threshold: float = 0.02, window_size: int = 1):
        self._window_size = self.default('window_size', window_size)
        self._threshold = self.default('threshold', threshold)

    def get_reward(self, portfolio: 'Portfolio') -> float:
        """Rewards the agent for incremental increases in net worth over a
        sliding window.

        Parameters
        ----------
        portfolio : `Portfolio`
            The portfolio being used by the environment.

        Returns
        -------
        int
            Whether the last percent change in net worth exceeds the predefined 
            `threshold`.
        """
        performance = pd.DataFrame.from_dict(portfolio.performance).T
        current_step = performance.shape[0]
        if current_step > 1:
            # Hint: make it cumulative.
            net_worths = performance['net_worth']
            ground_truths = precalculate_ground_truths(performance, 
                                                       column='net_worth', 
                                                       threshold=self._threshold)
            reward_factor = 2.0 * ground_truths - 1.0
            #return net_worths.iloc[-1] / net_worths.iloc[-min(current_step, self._window_size + 1)] - 1.0
            return (reward_factor * net_worths.abs()).iloc[-1]

        else:
            return 0.0

In [None]:
class PenalizedProfit(TensorTradeRewardScheme):
    """A reward scheme which penalizes net worth loss and 
    decays with the time spent.

    Parameters
    ----------
    cash_penalty_proportion : float
        cash_penalty_proportion

    Attributes
    ----------
    cash_penalty_proportion : float
        cash_penalty_proportion.
    """

    registered_name = "penalized"

    def __init__(self, cash_penalty_proportion: float = 0.10):
        self._cash_penalty_proportion = \
            self.default('cash_penalty_proportion', 
                         cash_penalty_proportion)

    def get_reward(self, portfolio: 'Portfolio') -> float:
        """Rewards the agent for gaining net worth while holding the asset.

        Parameters
        ----------
        portfolio : `Portfolio`
            The portfolio being used by the environment.

        Returns
        -------
        int
            A penalized reward.
        """
        performance = pd.DataFrame.from_dict(portfolio.performance).T
        current_step = performance.shape[0]
        if current_step > 1:
            initial_amount = portfolio.initial_net_worth
            net_worth = performance['net_worth'].iloc[-1]
            cash_worth = performance['bitstamp:/USD:/total'].iloc[-1]
            cash_penalty = max(0, (net_worth * self._cash_penalty_proportion - cash_worth))
            net_worth -= cash_penalty
            reward = (net_worth / initial_amount) - 1
            reward /= current_step
            return reward
        else:
            return 0.0

## Setup trading environment

In [None]:
env_config_training = {
    "window_size": 30,  # The number of past samples we want to look at (in hours)
    "max_allowed_loss": 0.90,  # If it goes past 90% loss during the iteration, we don't want to waste time on a "loser".
    "data": X_train,  # The variable that will be used to differentiate training and validation datasets
    "data_scaled": X_train_scaled, 
    "random_start_pct": 0.0
}

env_config_evaluation = {
    "window_size": 30,  # The number of past samples we want to look at (in hours)
    "max_allowed_loss": 0.90,  # If it goes past 90% loss during the iteration, we don't want to waste time on a "loser".
    "data": X_test,  # The variable that will be used to differentiate training and validation datasets
    "data_scaled": X_test_scaled, 
    "random_start_pct": 0.0
}

env_config_validation = {
    "window_size": 30,  # The number of past samples we want to look at (in hours)
    "max_allowed_loss": 1.0,  # If it goes past 90% loss during the iteration, we don't want to waste time on a "loser".
    "data": X_valid,  # The variable that will be used to differentiate training and validation datasets
    "data_scaled": X_valid_scaled, 
    "random_start_pct": 0.0
}

X_unseen = pd.concat([X_test, X_valid], axis='index')
X_unseen_scaled = pd.concat([X_test_scaled, X_valid_scaled_leaking], axis='index')

env_config_unseen = {
    "window_size": 30,  # The number of past samples we want to look at (in hours)
    "max_allowed_loss": 1.0,  # If it goes past 90% loss during the iteration, we don't want to waste time on a "loser".
    "data": X_unseen,  # The variable that will be used to differentiate training and validation datasets
    "data_scaled": X_unseen_scaled, 
    "random_start_pct": 0.0
}

In [None]:
import tensortrade.env.default as default

from tensortrade.feed.core import DataFeed, Stream
from tensortrade.feed.core.base import NameSpace
from tensortrade.env.default.actions import BSH
from tensortrade.env.default.rewards import RiskAdjustedReturns, SimpleProfit
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.oms.orders import TradeType

def create_env(config):
    # TODO: adjust according to your commission percentage, if present
    commission = 0.001
    price = Stream.source(list(config['data']['close']), 
                          dtype='float').rename('USD-BTC')
    bitstamp_options = ExchangeOptions(commission=commission)
    bitstamp = Exchange('bitstamp', 
                        service=execute_order, 
                        options=bitstamp_options)(price)

    cash = Wallet(bitstamp, 10000 * USD)
    asset = Wallet(bitstamp, 0 * BTC)

    portfolio = Portfolio(USD, [cash, asset])

    with NameSpace('bitstamp'):
        features = [
            Stream.source(list(config['data_scaled'][c]), 
                          dtype='float').rename(c) for c in config['data_scaled'].columns[1:]
            #Stream.source(list(config['data_scaled']['lr_close']), dtype='float').rename('lr_close')
        ]

    feed = DataFeed(features)
    feed.compile()

    reward_scheme = PBR(price=price, threshold=threshold)

    #reward_scheme = RiskAdjustedReturns(return_algorithm='sortino',
    #                                    window_size=15)

    #reward_scheme = SimpleProfit(window_size=30)

    #reward_scheme = AnomalousProfit(threshold=threshold)

    #reward_scheme = PenalizedProfit(cash_penalty_proportion=0.1)

    #action_scheme = BSH(
    #    cash=cash,
    #    asset=asset
    #)

    action_scheme = BSH(
        cash=cash,
        asset=asset
    ).attach(reward_scheme)

    renderer_feed = DataFeed([
        Stream.source(list(config['data']['date'])).rename('date'),
        Stream.source(list(config['data']['open']), dtype='float').rename('open'),
        Stream.source(list(config['data']['high']), dtype='float').rename('high'),
        Stream.source(list(config['data']['low']), dtype='float').rename('low'),
        Stream.source(list(config['data']['close']), dtype='float').rename('close'), 
        Stream.source(list(config['data']['volume']), dtype='float').rename('volume'), 
        Stream.sensor(action_scheme, 
                      lambda s: s.action, dtype='float').rename('action')
    ])

    renderer = [
        PositionChangeChart(),
        default.renderers.PlotlyTradingChart(),
    ]

    min_periods = config['window_size']  # Minimum of window_size

    observer = default.observers.TensorTradeObserver(
        portfolio=portfolio,
        feed=feed,
        renderer_feed=renderer_feed,
        window_size=config['window_size'],
        min_periods=min_periods
    )

    stopper = default.stoppers.MaxLossStopper(
        max_allowed_loss=config['max_allowed_loss']
    )

    informer = default.informers.TensorTradeInformer()

    env = default.create(
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        feed=feed,
        renderer_feed=renderer_feed,
        renderer=renderer,
        observer=observer,
        stopper=stopper,
        informer=informer,
        min_periods=min_periods,
        random_start_pct=config['random_start_pct'],
        window_size=config['window_size']
    )

    return env

In [None]:
env = create_env(config=env_config_training)
env.observer.feed.next()

## Setup training

In [None]:
def get_optimal_batch_size(window_size=30, n_steps=1000, batch_factor=4, stride=1):
    """
    lookback = 30          # Days of past data (also named window_size).
    batch_factor = 4       # batch_size = (sample_size - lookback - stride) // batch_factor
    stride = 1             # Time series shift into the future.
    """
    lookback = window_size
    sample_size = n_steps
    batch_size = ((sample_size - lookback - stride) // batch_factor)
    return batch_size

batch_size = get_optimal_batch_size(window_size=window_size, n_steps=n_steps, batch_factor=4)
batch_size

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --bind_all --logdir logs

## TODO: implement tuning

## Agent training

In [None]:
import tensortrade.agents as agents
from tensortrade.agents import A2C
from tensortrade.agents.utils.common import ModelReader, create_envs

envs = create_envs(env)
model = ModelReader(
    agents.agents['a2c']['model']['cnn'][0],
    output_units=[envs[0].action_space.n, 1],
    input_shape=envs[0].observation_space.shape,
    optimizer='adam',
).build_model()
agent = A2C(envs, model, preprocess=False)
agent.fit(target_reward=19)

## Implement validation

In [None]:
#env = create_env(config=env_config_unseen)

In [None]:
agent.play(render=False)

In [None]:
env.render()