In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import seaborn as sns
import tensorflow as tf

In [None]:
from environment import TradingEnvironment
from market_instance import new_market, make_investors

### Configuration for the synthetic market

In [None]:
config = {
    'num_investors': 10,
    'num_rounds_per_day': 3
}

### Our own portfolio

In [None]:
holdings = {
    'cash': 100000.00, 
    'TSLA': 300, 
    'AAPL': 200, 
    'MSFT': 400}

### Create the RL Environment

In [None]:
market = new_market()
environment = TradingEnvironment(config, holdings, market, 0.002)

## The market in action

In [None]:
for _ in range(360):
    market.open()
    daily = environment.let_others_trade()
    market.close()

In [None]:
def plot(ticker):
    sns.set(rc={'figure.figsize':(8.0,4.0)})
    h = market.history_for(ticker)
    s = market.stocks[ticker]
    p_open, p_close, p_high, p_low = np.transpose(h)
    t = np.array(range(len(p_open)))
    sns.lineplot(t, p_open);
    sns.lineplot(t, [s.psi(tt) for tt in t]);
plot('AAPL')

## Perform an Action = Re-Balance

In [None]:
target_weights = np.array([.1, .3, .4, .2])

In [None]:
investor = environment.investor
print(investor, environment.total_wealth())
environment.act(target_weights)
print(investor, environment.total_wealth())

In [None]:
# slightly off-target because of integer stock counts and the trading costs.
environment.normalized_holdings()

### The ANN-based Trader Model

In [None]:
N_LSTM_NEURONS = 128
N_STOCKS = len(holdings)-1
BATCH_SIZE = 128
N_STEPS = 20                  # history length for the LSTM
N_OUT = N_STOCKS + 1          # stocks and cash
# current log returns, positions' weights, cash weight
N_FEATURES = N_STOCKS * 2 + 1

In [None]:
from models import LSTM_TraderModel

In [None]:
lstm_model = LSTM_TraderModel(
    n_neurons=N_LSTM_NEURONS, 
    n_steps=N_STEPS,
    n_features=N_FEATURES,
    n_out=N_OUT)

x = np.random.random(
    [BATCH_SIZE, N_STEPS, N_FEATURES]).astype(np.float32)

y = lstm_model(x) # initialize the model
test_portfolio = lstm_model.portfolio(x)
lstm_model.summary()

In [None]:
x = np.random.random(
    [1, 5, N_FEATURES]).astype(np.float32)
x

In [None]:
lstm_model.portfolio(x)

### Observation And Reward
The observation consists of the normalized holdings and the normalized log returns of the traded assets with respect to the closing prices. 

In [None]:
obs, reward = environment.act([.2, .2, .3, .3])
print("Observations: 4x normalized holdings, 3x normalized log returns:")
print(obs)
print("Reward: %s" % reward)

Observe the time lag. This is a super-expensive method. We may need to consider parallel execution with Ray here...;-(