In [5]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import utils_project_mehdi as u
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append("..")
import rl
from rl.markov_decision_process import MarkovDecisionProcess
from rl.markov_process import State, MarkovProcess, NonTerminal, Terminal

from typing import (Callable, Dict, Generic, Iterator, Iterable, List,
                    Mapping, Optional, Sequence, Tuple, TypeVar, overload)

from rl.distribution import Categorical, Distribution, Constant, Choose
from rl.policy import Policy
from rl.monte_carlo import epsilon_greedy_policy, greedy_policy_from_qvf, glie_mc_control
from rl.function_approx import LinearFunctionApprox, AdamGradient

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Finite Horizon Trading MDP for mean reverting residuals. 

The goal of this part is to see wether an RL agent can an optimal trading rule using a simple mean reverting residual process

In [6]:
def generate_ou_process(sigma, mu, kappa, start_date, end_date, S0=100):
    """
    Generates a DataFrame with returns of an Ornstein-Uhlenbeck process over specific dates.

    Parameters:
    - sigma: Volatility of the process.
    - mu: Long-term mean level to which the process reverts.
    - kappa: Rate of reversion to the mean.
    - start_date: Start date of the simulation as a string (YYYY-MM-DD).
    - end_date: End date of the simulation as a string (YYYY-MM-DD).
    - S0: Initial value of the process, default is 100.

    Returns:
    - DataFrame with index as dates and a column 'Value' representing the evolution of the process.
    """
    dates = pd.date_range(start=start_date, end=end_date, freq='B')  # 'B' for business days
    n = len(dates)
    prices = np.zeros(n)
    prices[0] = S0
    dt = 1/252  # assuming 252 trading days in a year

    for t in range(1, n):
        dW = np.random.normal(0, np.sqrt(dt))  # increment of Wiener process
        prices[t] = prices[t-1] + kappa * (mu - prices[t-1]) * dt + sigma * dW

    return pd.DataFrame({
        'Value': prices
    }, index=dates)

def build_simulated_train_test(start='2019-01-01', end='2023-12-31', N = 100):
    #train
    train = []
    for _ in range(N):
        df = generate_ou_process(sigma=0.1, mu=100, kappa=7, start_date=start, end_date=end)
        train.append(df)

    #test
    df = generate_ou_process(sigma=0.1, mu=100, kappa=7, start_date=start, end_date=end)
    return train, df    

train, test = build_simulated_train_test(N=10)

In [7]:
u.plot_plotly_multiple(train)

We need to create :
- ` mdp: MarkovDecisionProcess[S, A]`
- `states: NTStateDistribution[S]`
- `approx_0: QValueFunctionApprox[S, A]`

As input of the control/prediction algorithms we have in the codebase. 
Some algorithms we have are :
- `glie_mc_control` (p. 352)
- `glie_sarsa` (p. 358)
-  `q_learning` but needs also `policy_from_q: PolicyFromQType`(p.365)
- `q_learning_experience_replay`but needs also `policy_from_q: PolicyFromQType`(p.393)
- `least_squares_policy_iteration` but takes in argument `initial_target_policy: DeterministicPolicy[S, A]` and `transitions: Iterable[TransitionStep[S, A]]`


In [8]:
class ThresholdTradingPolicy(Policy[Dict,int]):
    """
    Implements the policy that consists in buying/selling at given thresholds
    """

    def __init__(self, enter_long, exit_long, enter_short, exit_short):
        """
        Setting the thresolds for trading decisions
        """
        self.enter_long = enter_long
        self.enter_short = enter_short
        self.exit_long = exit_long
        self.exit_short = exit_short

    def act(self, state: NonTerminal[Dict])->Distribution[int]:
        St = state.state["Spot"] #current spot, correponds to "t-1" if "t" is the time at the end of the step
        t = state.state["date"]
        pos = state.state["position"] #is +1  -1 or 0 

        action = 0

        if St >= self.enter_short and pos == 0:
            action = -1 #enter short

        if St <= self.exit_short and pos == -1:
            action = 1  #buy back to exit short

        if St <= self.enter_long and pos == 0:
            action = 1 #enter long

        if St >= self.enter_long and pos == 1:
            action = -1 #sell to exit long

        return Constant(action)

In [9]:
def generate_initial_state_from_data(df):
    """
    generates the initial state dictionnary from the dataframes
    """
    S0 = df.iloc[0][0]
    t = df.index[0]
    pos = 0
    return NonTerminal(
        {
            "Spot" : S0,
            "position" : 0,
            "date" : t,
            "data" : df
        }
    )

In [10]:
class Trading(MarkovDecisionProcess[Dict,int]):
    """
    - train is a list of dataframes representing price processes we want to trade
    - test is a dataframe in which we want to evaluate the policy
    - a state is a dictionnary
        {
            "Spot" : price S_t
            "position" : long/short (-1 or 1)
            "date" : current time step
            "data" : dataframe with price process
        }
    - actions : hold, buy or sell
    """

    def __init__(self,train,test):
        self.train = train
        self.test = test
    
    def actions(self, state):
        return [-1,0,1] #short hold buy
  
    def step(self, state, action)->Distribution[Tuple[State[Dict],float]]:
        #get information about current state
        S_t_1 = state.state["Spot"] #current spot, correponds to "t-1" if "t" is the time at the end of the step
        t_1 = state.state["date"]
        data = state.state["data"]
        pos = state.state["position"] #is +1  -1 or 0 

        #Fetch next spot value and compute the return
        t, is_last = u.get_next(t_1, data)
        S_t = data.loc[t][0]
        r =  pos*(S_t - S_t_1)/S_t

        #build next state
        next_state = {
            "Spot" :  S_t,
            "position" : np.sign(pos+action),
            "date" : t,
            "data" : data
        }
        if is_last:
            next_state = Terminal(next_state)
        else:
            next_state = NonTerminal(next_state)
        return Constant((next_state,r))

    
    def generate_start_state(self,which = "train"):
        """
        Generates the initial distribution of the state from the available training data
        """
        if which == "train":
            return Choose( [generate_initial_state_from_data(train_) for train_ in self.train] )
        elif which == "test":
            return Constant(generate_initial_state_from_data(test))  

          

In [11]:
class Backtester():
    """
    This class is to visualize the backtest of a given trading policy
    """

    def __init__(self, trading, policy):
        self.trading = trading #MDP  
        self.policy = policy #policy for the MDP

    def get_returns(self):

        # show progress of the backtest
        print("about to generate start states")

        start_states = self.trading.generate_start_state("test") # we take the test set data of the trading policy
        sequence = self.trading.simulate_actions(start_states, self.policy)

        print("got sequence")

        bt = [] #will be used to build the backtest dataframe

        # Loop through each element in the sequence

        print("about to loop through the sequence")

        for x in sequence:
            bt.append([x.reward, x.state.state["date"]])

        print("looped through the sequence")

        print("Creating dataframe")
        df = pd.DataFrame(bt, columns=['Reward', 'Date'])
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
        self.returns = df


    def summary(self):
        """
        main summary of the backtest
        """
        print("about to get returns")
        self.get_returns()
        print("got returns")
        sharpe = (np.sqrt(252)*self.returns.mean()/self.returns.std())[0]
        u.plot_plotly((1+self.returns).cumprod(),title=f"Sharpe Ratio {round(sharpe,2)}")

In [12]:
print(type(ThresholdTradingPolicy(enter_long = 99.95, exit_long=99.99, enter_short = 100.05, exit_short = 100.025)))

<class '__main__.ThresholdTradingPolicy'>


In [13]:
bt = Backtester( Trading(train,test),  ThresholdTradingPolicy(enter_long = 99.95, exit_long=99.99, enter_short = 100.05, exit_short = 100.025))
bt.summary()

about to get returns
about to generate start states
got sequence
about to loop through the sequence
looped through the sequence
Creating dataframe
got returns


In [18]:
mdp_trading = Trading(train,test)

states = mdp_trading.generate_start_state("train")

ffs=[ #x is state/ action
        lambda x: 1,
        lambda x: x[0].state["Spot"],
        lambda x: x[1],
        lambda x: x[1]*x[0].state["Spot"],
    ]


approx_0  = LinearFunctionApprox.create(feature_functions=ffs)


epsilon_as_func_of_episodes = lambda k: 1/k

gamma = 0.9


qvfs = glie_mc_control(
    mdp_trading,
    states,
    approx_0,
    gamma,
    epsilon_as_func_of_episodes
)

# Plotting

In [19]:
mdp_trading.actions(states)

[-1, 0, 1]

In [20]:
print(type(qvfs))

# first, get the plicy from the qvfs
actionsnew = mdp_trading.actions(states)
print(type(actionsnew))
# get the last element of the qvfs
actionsnew = lambda x :mdp_trading.actions(states)

# x = 0
# def get_last_element(generator):
#     last_element = None
#     for element in generator:
#         last_element = element
#         x += 1
#     return last_element

# last_element = get_last_element(qvfs)
# print(x)

def get_first_element(generator):
    for element in generator:
        return element

first_element = get_first_element(qvfs)

# print(last_element)  # Output will be 9, the last element of the generator

greedy_policy = greedy_policy_from_qvf(first_element, actionsnew)


# last_qvf = qvfs[-1]
# last_qvf_values = last_qvf.values[-1]
# print(last_qvf_values)

# greedy_policy = greedy_policy_from_qvf(last_qvf, actions)

# print(type(greedy_policy))
# print(greedy_policy)

<class 'generator'>
<class 'list'>


In [23]:
print(type(greedy_policy.act(state)))

AttributeError: 'Choose' object has no attribute 'state'

In [22]:
greedy_policy.act(state)

NameError: name 'state' is not defined

In [68]:
# generate backtest from the policy
bt = Backtester( Trading(train,test), greedy_policy)

In [24]:
state = mdp_trading.generate_start_state("test").sample()
print(state)

greedy_policy.act(state)

NonTerminal(state={'Spot': 100.0, 'position': 0, 'date': Timestamp('2019-01-01 00:00:00'), 'data':                  Value
2019-01-01  100.000000
2019-01-02  100.001229
2019-01-03   99.999657
2019-01-04  100.002545
2019-01-07  100.001656
...                ...
2023-12-25  100.016958
2023-12-26  100.016790
2023-12-27  100.011095
2023-12-28  100.007360
2023-12-29  100.012390

[1304 rows x 1 columns]})


Constant(value=-1)

In [25]:
bt.summary()

about to get returns
about to generate start states
got sequence
about to loop through the sequence
looped through the sequence
Creating dataframe
got returns


In [26]:
# plot the q value as a function of the spot price for action 1 and position 0

qvals = []
spotPrices =[i for i in range(0, 100)]
for i in range(0, 100):
    state = NonTerminal({
        "Spot" : i,
        "position" : 0,
        "date" : pd.to_datetime("2023-01-01"),
        "data" : test
    })
    qvals.append(greedy_policy.act(state).value)


In [27]:
import plotly.graph_objs as go
# import plotly.offline as pyo

trace = go.Scatter(
    x = spotPrices,
    y = qvals,
    mode = 'lines+markers',
    name = 'Q Values'
)
layout = go.Layout(
    title = 'Q Values vs Spot Prices',
    xaxis = dict(title = 'Spot Prices'),
    yaxis = dict(title = 'Q Values')
)
fig = go.Figure(data=[trace], layout=layout)
fig.show()

In [28]:
# get q value for a given state and action
state = NonTerminal({
    "Spot" : 100,
    "position" : 0,
    "date" : pd.to_datetime("2023-01-01"),
    "data" : test
})
action = 1
greedy_policy.act(state).value

-1

In [1]:
import plotly.graph_objects as go
import numpy as np

# Example time series data
time = np.linspace(0, 10, 100)  # Replace with your actual time series data
y_values = np.sin(time)  # Example line plot, replace with your actual data

# Example value function (modify according to your actual function)
def value_function(y):
    return 1 - abs(y)  # Example function

# Generate heatmap data based on the value function
y_range = np.linspace(-1, 1, 100)
heatmap_z = np.array([[value_function(y) for y in y_range] for _ in time])

# Custom colorscale (white to red, adjust as needed)
colorscale = [
    [0, "white"],  # Values at 0 will be white
    [0.5, "red"],  # Values at 0.5 will be red
    [1, "white"]   # Values at 1 will be white again
]

# Create the heatmap
heatmap = go.Heatmap(
    z=heatmap_z,
    x=time,
    y=y_range,
    colorscale=colorscale,
    showscale=False  # Hide the heatmap color scale
)

# Create the line plot
line_plot = go.Scatter(x=time, y=y_values, mode='lines', name='Line Plot')

# Create the figure and add the heatmap and line plot
fig = go.Figure(data=[heatmap, line_plot])

# Customize layout
fig.update_layout(title='Line Plot with Colored Background')

# Show the figure
fig.show()

## 2. Challenging the agent : introducing non stationarities

The goal of this part here is to see if the AI agent can learn to rapidly adapt to non stationarities in the data

Add features of the state space : fitted momentum in a lookback window.

## 3. Train the agent on real data