In [38]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import utils_project as u
import numpy as np
import pandas as pd
from rl.markov_decision_process import MarkovDecisionProcess
from rl.markov_process import State, MarkovProcess, NonTerminal, Terminal

from typing import (Callable, Dict, Generic, Iterator, Iterable, List,
                    Mapping, Optional, Sequence, Tuple, TypeVar, overload)

from rl.distribution import Categorical, Distribution, Constant


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Finite Horizon Trading MDP for mean reverting residuals. 

The goal of this part is to see wether an RL agent can an optimal trading rule using a simple mean reverting residual process

### 1.1 Trading Ornstein Uhlenbeck Process

In [57]:
def generate_ou_process(sigma, mu, kappa, start_date, end_date, S0=100):
    """
    Generates a DataFrame with returns of an Ornstein-Uhlenbeck process over specific dates.

    Parameters:
    - sigma: Volatility of the process.
    - mu: Long-term mean level to which the process reverts.
    - kappa: Rate of reversion to the mean.
    - start_date: Start date of the simulation as a string (YYYY-MM-DD).
    - end_date: End date of the simulation as a string (YYYY-MM-DD).
    - S0: Initial value of the process, default is 100.

    Returns:
    - DataFrame with index as dates and a column 'Value' representing the evolution of the process.
    """
    dates = pd.date_range(start=start_date, end=end_date, freq='B')  # 'B' for business days
    n = len(dates)
    prices = np.zeros(n)
    prices[0] = S0
    dt = 1/252  # assuming 252 trading days in a year

    for t in range(1, n):
        dW = np.random.normal(0, np.sqrt(dt))  # increment of Wiener process
        prices[t] = prices[t-1] + kappa * (mu - prices[t-1]) * dt + sigma * dW

    return pd.DataFrame({
        'Value': prices
    }, index=dates)

df = generate_ou_process(sigma=0.1, mu=100, kappa=7, start_date='2019-01-01', end_date='2023-12-31')

In [58]:
u.plot_plotly(df)

In [46]:
df.loc[pd.to_datetime("2023-12-25")][0]

100.02374692885797

In [None]:
class Trading(MarkovDecisionProcess[Tuple[Dict,int]]):
    """

    - train is a list of dataframes representing price processes we want to trade
    - test is a dataframe in which we want to evaluate the policy
    - a state is a dictionnary
        {
            "Spot" : price S_t
            "position" : long/short (-1 or 1)
            "date" : current time step
            "data" : dataframe with price process
        }

    - actions : hold, buy or sell

    """

    def __init__(self,train,test):
        self.train = train
        self.test = test
    
    def actions(self, state):
        pass

    def step(self, state, action)->Distribution[Tuple[State[Dict],float]]:

        #get information about current state
        S_t_1 = state.state["Spot"] #current spot, correponds to "t-1" if "t" is the time at the end of the step
        t_1 = state.state["date"]
        data = state.state["data"]
        pos = state.state["position"] #is +1  -1 or 0 

        #Fetch next spot value and compute the return
        t, is_last = u.get_next(data)
        S_t = data.loc[t][0]
        r =  pos*(S_t - S_t_1)/S_t

        #build next state
        next_state = {
            "Spot" :  S_t,
            "position" : pos+action,
            "date" : t,
            "data" : data
        }

        if is_last:
            next_state = Terminal(next_state)
        else:
            next_state = NonTerminal(next_state)

        return Constant(next_state)
            

## 2. Challenging the agent : introducing non stationarities

The goal of this part here is to see if the AI agent can learn to rapidly adapt to non stationarities in the data

Add features of teh state space : fitted momentum in a lookback window.

## 3. Train the agent on real data

## 4. Backtest results