In [4]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import utils_project_mehdi as u
import numpy as np
import pandas as pd
import sys
sys.path.append("..")
import rl
from rl.markov_decision_process import MarkovDecisionProcess
from rl.markov_process import State, MarkovProcess, NonTerminal, Terminal

from typing import (Callable, Dict, Generic, Iterator, Iterable, List,
                    Mapping, Optional, Sequence, Tuple, TypeVar, overload)

from rl.distribution import Categorical, Distribution, Constant, Choose
from rl.policy import Policy
from rl.monte_carlo import epsilon_greedy_policy, greedy_policy_from_qvf, glie_mc_control
from rl.function_approx import LinearFunctionApprox, AdamGradient

## 1. Finite Horizon Trading MDP for mean reverting residuals. 

The goal of this part is to see wether an RL agent can an optimal trading rule using a simple mean reverting residual process

In [5]:
def generate_ou_process(sigma, mu, kappa, start_date, end_date, S0=100):
    """
    Generates a DataFrame with returns of an Ornstein-Uhlenbeck process over specific dates.

    Parameters:
    - sigma: Volatility of the process.
    - mu: Long-term mean level to which the process reverts.
    - kappa: Rate of reversion to the mean.
    - start_date: Start date of the simulation as a string (YYYY-MM-DD).
    - end_date: End date of the simulation as a string (YYYY-MM-DD).
    - S0: Initial value of the process, default is 100.

    Returns:
    - DataFrame with index as dates and a column 'Value' representing the evolution of the process.
    """
    dates = pd.date_range(start=start_date, end=end_date, freq='B')  # 'B' for business days
    n = len(dates)
    prices = np.zeros(n)
    prices[0] = S0
    dt = 1/252  # assuming 252 trading days in a year

    for t in range(1, n):
        dW = np.random.normal(0, np.sqrt(dt))  # increment of Wiener process
        prices[t] = prices[t-1] + kappa * (mu - prices[t-1]) * dt + sigma * dW

    return pd.DataFrame({
        'Value': prices
    }, index=dates)

def build_simulated_train_test(start='2019-01-01', end='2023-12-31', N = 100):
    #train
    train = []
    for _ in range(N):
        df = generate_ou_process(sigma=0.1, mu=100, kappa=7, start_date=start, end_date=end)
        train.append(df)

    #test
    df = generate_ou_process(sigma=0.1, mu=100, kappa=7, start_date=start, end_date=end)
    return train, df    

train, test = build_simulated_train_test(N=10)

In [6]:
u.plot_plotly_multiple(train)

We need to create :
- ` mdp: MarkovDecisionProcess[S, A]`
- `states: NTStateDistribution[S]`
- `approx_0: QValueFunctionApprox[S, A]`

As input of the control/prediction algorithms we have in the codebase. 
Some algorithms we have are :
- `glie_mc_control` (p. 352)
- `glie_sarsa` (p. 358)
-  `q_learning` but needs also `policy_from_q: PolicyFromQType`(p.365)
- `q_learning_experience_replay`but needs also `policy_from_q: PolicyFromQType`(p.393)
- `least_squares_policy_iteration` but takes in argument `initial_target_policy: DeterministicPolicy[S, A]` and `transitions: Iterable[TransitionStep[S, A]]`


In [7]:
class ThresholdTradingPolicy(Policy[Dict,int]):
    """
    Implements the policy that consists in buying/selling at given thresholds
    """

    def __init__(self, enter_long, exit_long, enter_short, exit_short):
        """
        Setting the thresolds for trading decisions
        """
        self.enter_long = enter_long
        self.enter_short = enter_short
        self.exit_long = exit_long
        self.exit_short = exit_short

    def act(self, state: NonTerminal[Dict])->Distribution[int]:
        St = state.state["Spot"] #current spot, correponds to "t-1" if "t" is the time at the end of the step
        t = state.state["date"]
        pos = state.state["position"] #is +1  -1 or 0 

        action = 0

        if St >= self.enter_short and pos == 0:
            action = -1 #enter short

        if St <= self.exit_short and pos == -1:
            action = 1  #buy back to exit short

        if St <= self.enter_long and pos == 0:
            action = 1 #enter long

        if St >= self.enter_long and pos == 1:
            action = -1 #sell to exit long

        return Constant(action)

In [8]:
def generate_initial_state_from_data(df):
    """
    generates the initial state dictionnary from the dataframes
    """
    S0 = df.iloc[0][0]
    t = df.index[0]
    pos = 0
    return NonTerminal(
        {
            "Spot" : S0,
            "position" : 0,
            "date" : t,
            "data" : df
        }
    )

In [9]:
class Trading(MarkovDecisionProcess[Dict,int]):
    """
    - train is a list of dataframes representing price processes we want to trade
    - test is a dataframe in which we want to evaluate the policy
    - a state is a dictionnary
        {
            "Spot" : price S_t
            "position" : long/short (-1 or 1)
            "date" : current time step
            "data" : dataframe with price process
        }
    - actions : hold, buy or sell
    """

    def __init__(self,train,test):
        self.train = train
        self.test = test
    
    def actions(self, state):
        return [-1,0,1] #short hold buy
  
    def step(self, state, action)->Distribution[Tuple[State[Dict],float]]:
        #get information about current state
        S_t_1 = state.state["Spot"] #current spot, correponds to "t-1" if "t" is the time at the end of the step
        t_1 = state.state["date"]
        data = state.state["data"]
        pos = state.state["position"] #is +1  -1 or 0 

        #Fetch next spot value and compute the return
        t, is_last = u.get_next(t_1, data)
        S_t = data.loc[t][0]
        r =  pos*(S_t - S_t_1)/S_t

        #build next state
        next_state = {
            "Spot" :  S_t,
            "position" : np.sign(pos+action),
            "date" : t,
            "data" : data
        }
        if is_last:
            next_state = Terminal(next_state)
        else:
            next_state = NonTerminal(next_state)
        return Constant((next_state,r))

    
    def generate_start_state(self,which = "train"):
        """
        Generates the initial distribution of the state from the available training data
        """
        if which == "train":
            return Choose( [generate_initial_state_from_data(train_) for train_ in self.train] )
        elif which == "test":
            return Constant(generate_initial_state_from_data(test))  

          

In [10]:
class Backtester():
    """
    This class is to visualize the backtest of a given trading policy
    """

    def __init__(self, trading, policy):
        self.trading = trading #MDP  
        self.policy = policy #policy for the MDP

    def get_returns(self):

        start_states = self.trading.generate_start_state("test") # we take the test set data of the trading policy
        sequence = self.trading.simulate_actions(start_states, self.policy)

        bt = [] #will be used to build the backtest dataframe

        # Loop through each element in the sequence
        for x in sequence:
            bt.append([x.reward, x.state.state["date"]])

        df = pd.DataFrame(bt, columns=['Reward', 'Date'])
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
        self.returns = df


    def summary(self):
        """
        main summary of the backtest
        """
        self.get_returns()
        sharpe = (np.sqrt(252)*self.returns.mean()/self.returns.std())[0]
        u.plot_plotly((1+self.returns).cumprod(),title=f"Sharpe Ratio {round(sharpe,2)}")

In [11]:
bt = Backtester( Trading(train,test),  ThresholdTradingPolicy(enter_long = 99.95, exit_long=99.99, enter_short = 100.05, exit_short = 100.025))
bt.summary()

In [12]:
mdp_trading = Trading(train,test)

states = mdp_trading.generate_start_state("train")

#phi(s,a)

ffs=[ #x is (state, action) ( x[0].state["position"] )
        lambda x: 1,
        lambda x: x[0].state["Spot"],
        lambda x: x[1],
        lambda x: x[0].state["position"]*x[1]*x[0].state["Spot"],
    ]


approx_0  = LinearFunctionApprox.create(feature_functions=ffs)


epsilon_as_func_of_episodes = lambda k: 1/k

gamma = 0.9


qvfs = glie_mc_control(
    mdp_trading,
    states,
    approx_0,
    gamma,
    epsilon_as_func_of_episodes
)

## 2. Challenging the agent : introducing non stationarities

The goal of this part here is to see if the AI agent can learn to rapidly adapt to non stationarities in the data

Add features of the state space : fitted momentum in a lookback window.

## 3. Train the agent on real data

In [15]:
u.plot_plotly(test)

In [71]:
class Train_Test_Builder():
    """

    Prepare real data into [[episodes], test] where test is the start of the trading sample. 

    - main attribute is a data array
    - generates the [[episodes], test] needed to feed the AI agent. 
    - start_trading : date at which we want to start the trading (start of OOS)
    - end_trading : date at which we end the trading (end of OOS distribution)
    - start_train : date at which we start fitting the model
    - end_train : date at which we end sampling from training
    - lookback : minimum lookback that needs to be available for the ai agent  
    - length train : length of each training episode
    
    """

    def __init__(self,data, lookback=30, start_trading = None):
        self.data = data
        self.lookback = lookback
        if start_trading is not None:
            self.start_trading = start_trading
        else: #use 70%/30% split 
            index_at_70_percent = int(len(data) * 0.7)
            date_at_70_percent = data.index[index_at_70_percent]
            self.start_trading = date_at_70_percent

    def buil_test(self):
        start_trading = pd.to_datetime(self.start_trading)  # Convert start_date to datetime if it's a string
        lookback_date = start_trading - pd.offsets.BDay(self.lookback)
        self.test = self.data[lookback_date:]


    def build_train(self,N,length_episode):
        self.train = u.sample_dataframes(self.data,self.start_trading,N,length_episode,uniform=True)

    def build_train_test(self,N,length_episode):
        self.buil_test()
        self.build_train(N,length_episode)

In [74]:
data_builder = Train_Test_Builder(test)
data_builder.build_train_test(100,80)
data_builder.test

Unnamed: 0,Value
2022-05-19,99.958194
2022-05-20,99.961464
2022-05-23,99.969744
2022-05-24,99.959556
2022-05-25,99.977642
...,...
2023-12-25,99.981937
2023-12-26,99.976612
2023-12-27,99.977073
2023-12-28,99.966653
