In [12]:
%load_ext autoreload
%autoreload 2

#standard imports
import numpy as np
import pandas as pd
import numpy as np
import pandas as pd
import sys
sys.path.append("..")

#rl book imports
import rl
from rl.markov_decision_process import MarkovDecisionProcess
from rl.markov_process import State, MarkovProcess, NonTerminal, Terminal

from typing import (Callable, Dict, Generic, Iterator, Iterable, List,
                    Mapping, Optional, Sequence, Tuple, TypeVar, overload)

from rl.distribution import Categorical, Distribution, Constant, Choose
from rl.policy import Policy
from rl.monte_carlo import epsilon_greedy_policy, greedy_policy_from_qvf, glie_mc_control
from rl.function_approx import LinearFunctionApprox, AdamGradient


#custom imports 
import utils as u
import data as dat
import mdp_agent as ag
import baseline_policies as bp
import q_plots as qp
import backtest as btest

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Finite Horizon Trading MDP for mean reverting residuals. 

The goal of this part is to see wether an RL agent can an optimal trading rule using a simple mean reverting residual process

In [15]:
train, test = dat.build_simulated_train_test(N=10)
u.plot_plotly_multiple(train)

Some algorithms we have are :
- `glie_mc_control` (p. 352)
- `glie_sarsa` (p. 358)
-  `q_learning` but needs also `policy_from_q: PolicyFromQType`(p.365)
- `q_learning_experience_replay`but needs also `policy_from_q: PolicyFromQType`(p.393)
- `least_squares_policy_iteration` but takes in argument `initial_target_policy: DeterministicPolicy[S, A]` and `transitions: Iterable[TransitionStep[S, A]]`

#### Baseline trading policies

In [13]:
trader = ag.Trading(train,test)
threshold_policy = bp.ThresholdTradingPolicy(enter_long = 99.95, exit_long=99.99, enter_short = 100.05, exit_short = 100.025)
bt = btest.Backtester(trader, threshold_policy)
bt.summary()

### 1.1 Using glie_mc_control

Train

In [31]:
mdp_trading = ag.Trading(train,test)

states = mdp_trading.generate_start_state("train")

#phi(s,a)
def generate_features(pos,act):
    #generate the phi(s,a) for a given position and action
    #for now we generate linear representation

    return [
        lambda x : 1 if ((x[0].state["position"]==pos)and (x[1] ==act)) else 0, #intercept 
        lambda x : x[0].state["Spot"] if (x[0].state["position"]==pos and (x[1] ==act)) else 0, #slope with respect to spot
    ]

ffs=[ ]
for (pos,a) in [(-1,-1),(-1,0),(-1,1), (0,-1),(0,0),(0,1), (1,-1),(1,0),(1,1)]:
    ffs+= generate_features(pos,a)

approx_0  = LinearFunctionApprox.create(feature_functions=ffs)

epsilon_as_func_of_episodes = lambda k: 1/k

gamma = 0.9

qvfs = glie_mc_control(
    mdp_trading,
    states,
    approx_0,
    gamma,
    epsilon_as_func_of_episodes
)

Get q value

In [39]:
num_iter = 100
final_qvf = None
for i,qvf in enumerate(qvfs):
    if i>=num_iter:
        break
    final_qvf = qvf

Visualize Q

In [42]:
qanalysis = qp.QAnalyzer(test,final_qvf)
qanalysis.plot_snapshot()

Backtest

In [40]:
trading_policy  = greedy_policy_from_qvf(final_qvf, lambda x : [-1,0,1])
bt = btest.Backtester(mdp_trading,trading_policy)
bt.summary()