# Duelling Deep Q-Networks on Crude Oil hourly data

In this notebook a recurrent DQN model is trained on the crude oil hourly dataset, enriched with the ATR and session volume data.

In [None]:
from datetime import datetime
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch

from gym_anytrading.agents.DDQN_agent import DDQNTradingAgent
from gym_anytrading.envs.future_env import FuturesEnv
from utils import MetricLogger

In [None]:
torch.manual_seed(0)
np.random.seed(0)

In [None]:
data_path = 'data/CL1!_adj.csv'
save_dir = Path("models/ddqn_cl_checkpoints") / datetime.now().strftime("%Y-%m-%dT%H-%M-%S")

In [None]:
cl_df = pd.read_csv(data_path, index_col=0, parse_dates=True)
cl_df

## Preprocessing

The preprocessing phase consist in the following phases:

- addition of cyclical features for the time-related variables
- standardization of the data

In [None]:
def add_cyclical_features(df):
    df['date'] = pd.to_datetime(df.index.copy(), format='%Y-%m-%d %H:%M:%S')
    df['hour_sin'] = df['date'].apply(lambda x: np.sin(x.hour * (2. * np.pi / 24)))
    df['hour_cos'] = df['date'].apply(lambda x: np.cos(x.hour * (2. * np.pi / 24)))
    df['day_sin'] = df['date'].apply(lambda x: np.sin(x.day * (2. * np.pi / 30)))
    df['day_cos'] = df['date'].apply(lambda x: np.cos(x.day * (2. * np.pi / 30)))
    df['month_sin'] = df['date'].apply(lambda x: np.sin(x.month * (2. * np.pi / 12)))
    df['month_cos'] = df['date'].apply(lambda x: np.cos(x.month * (2. * np.pi / 12)))
    df = df.drop('date', axis=1)
    return df

In [None]:
cl_df = add_cyclical_features(cl_df)

## Train Loop

In [None]:
def train_loop(env, episodes: int, agent, logger):
    for episode in range(episodes):

        state = env.reset()

        while True:

            action = agent.act(state)

            next_state, reward, done, info = env.step(action)

            agent.cache(state, next_state, action, reward, done)

            q, loss = agent.learn()

            logger.log_step(reward, loss, q)

            state = next_state

            if done:
                print('info: ', info)
                break

        logger.log_episode()

        account_value = env.get_account_value()

        logger.record(
            episode=episode,
            epsilon=agent.exploration_rate,
            step=agent.curr_step,
            account_value=account_value)

## Training

Only 80% of the total data is used during the training phase. That's because we want to avoid the model simply memorizing the dataset (overfitting) and obtaining a model that is generalized well in all market situations.

The DQN model is a reccurrent neural network with  3 layer and with an hidden size of 64, the neural network will be given as input the hourly data of the previous week.

In [None]:
training_portion = 0.8
train_cl_df = cl_df[0:int(np.floor(len(cl_df) * training_portion))]
episodes = 400
window_size = 115
env = FuturesEnv(df=train_cl_df,
                 window_size=window_size,
                 frame_bound=(window_size, len(train_cl_df)))
agent = DDQNTradingAgent(
    env.observation_space.shape[1],
    env.action_space.n,
    save_dir)
logger = MetricLogger(save_dir)

In [None]:
train_loop(env, episodes, agent, logger)

## Testing the model

Now that the model has been trained, let's test its performance on the testing data (out of sample data).

In [None]:
def model_testing(env, agent):

    i = 0

    state = env.reset()

    while True:

        action = agent.act(state, eval_mode=True)

        next_state, reward, done, info = env.step(action)

        state = next_state

        if done:
            print('info: ', info)
            break

        print(f'step: {i}, reward: {reward}, account_value: {env.get_account_value()}, action: {action}, position: {info["position"]}')

        i += 1

In [None]:
test_cl_df = cl_df[int(np.floor(len(cl_df) * training_portion)):]
env = FuturesEnv(df=test_cl_df,
                 window_size=window_size,
                 frame_bound=(window_size, len(test_cl_df)))

In [None]:
model_testing(env, agent)

In [None]:
plt.cla()
env.render_all()
plt.title('Testing')

> comment on the model performance