In [1]:
import torch
import torch.nn as nn

import numpy as np
import pandas as pd

from sklearn.preprocessing import RobustScaler


from Direction_DQN.model import train_model, evaluate_model, Agent
from Direction_DQN.utils import get_data, show_train_result, show_test_result

In [2]:
PARAMS = None

hyperparams = {
    'gamma': 0.99,  # discount factor
    'epsilon': 1.0,  # exploration rate
    'epsilon_min': 0.01,  # minimum exploration rate
    'epsilon_decay': 0.95,  # decay rate for exploration prob
    'learning_rate': 0.001,  # learning rate
    'batch_size': 256,  # size of minibatch
    'alpha': 0.6,  # alpha for prioritized experience replay
    'beta_start': 0.4,  # initial value of beta
    'beta_decay': 0.98, # decay rate for beta
    "beta_max": 1, # minimum value of beta
}


In [3]:
btc = get_data('Direction_DQN/data/btc.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inner[self.returns < 0] = stddev(self.returns, 20)




In [4]:
feature_cols = btc.drop(['Start', 'End','Open', 'High', 'Low', 'Volume', 'Market Cap','Average','VWAP','Close','Returns', 'Log Returns'],axis=1).columns

price_col = btc['Log Returns'].name

In [5]:
train_data = btc[btc['Start'] < '2024-01-01']
test_data = btc[btc['Start'] >= '2024-01-01']

In [6]:
rs = RobustScaler()

In [7]:
train_X = train_data[feature_cols].to_numpy()
train_y = train_data[price_col].to_numpy().reshape(-1,1)

train_X = rs.fit_transform(train_X)

test_X = test_data[feature_cols].to_numpy()
test_y = test_data[price_col].to_numpy().reshape(-1,1)

test_X = rs.transform(test_X)

In [8]:
train_X.shape

(4916, 83)

In [9]:
episodes = 100

agent = Agent(
    state_size=len(feature_cols),
    action_size=2,
    strategy='double-dqn',
    hyperparams=hyperparams,
    device = 'mps',
    memory_size=1000,
    pretrained=False,
    model_name='directional'
)

for episode in range(1,episodes+1):
    
    ep, total_eps, reward, average_loss  = train_model(agent, episode, train_X, train_y, ep_count=episodes,batch_size=hyperparams['batch_size']
                                            )

    # Show training results
    show_train_result(ep, total_eps, reward, average_loss)

Episode 1/100 - Train Position: 0.4277                                            Train Loss: 0.1480
Episode 2/100 - Train Position: 10.4063                                            Train Loss: 0.1570
Episode 3/100 - Train Position: 13.8781                                            Train Loss: 0.0415
Episode 4/100 - Train Position: 12.4649                                            Train Loss: 0.0253
Episode 5/100 - Train Position: 3.4443                                            Train Loss: 0.0266
Episode 6/100 - Train Position: 9.3360                                            Train Loss: 6.8193
Episode 7/100 - Train Position: -9.4136                                            Train Loss: 36.2020
Episode 8/100 - Train Position: 10.6516                                            Train Loss: 0.0087
Episode 9/100 - Train Position: -0.8150                                            Train Loss: 0.0142
Episode 10/100 - Train Position: 4.7501                                            T

In [13]:
from collections import Counter

for episode in range(1,2):
    
    reward, history  = evaluate_model(agent, episode, test_X, test_y)
                                            
    # Show test results
    show_test_result(reward, Counter([h['pred'] for h in history]))

Test Rewards: -0.7526
Action Counts: Counter({0: 365})


In [11]:
worst_drop = np.argmin(np.array([h['reward'] for h in history]))

In [12]:
history[worst_drop]

{'true': 0, 'pred': 0, 'reward': -0.1167454794049263}