In [61]:
from finrl.config_tickers import DOW_30_TICKER
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
import pandas as pd
import numpy as np

TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2021-10-01'
TEST_START_DATE = '2021-10-01'
TEST_END_DATE = '2023-03-01'

df = YahooDownloader(start_date=TRAIN_START_DATE,
                     end_date=TEST_END_DATE,
                     ticker_list=DOW_30_TICKER).fetch_data()

# INDICATORS = ['rsi_14', 'macd', 'boll', 'atr_14', 'volume', 'adx_14', 'kdjk']
INDICATORS = [
    "rsi_14",
    "macd",
    "boll",
    "atr_14",
    "volume",
    "adx",
    "kdjk",
]
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=INDICATORS,
                     use_turbulence=True,
                     use_vix=True,
                     user_defined_feature=False)

processed = fe.preprocess_data(df)
processed = processed.copy()
processed = processed.fillna(0)
processed = processed.replace(np.inf, 0)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Shape of DataFrame:  (97013, 8)


[*********************100%%**********************]  1 of 1 completed

Successfully added technical indicators
Shape of DataFrame:  (3310, 8)





Successfully added vix
Successfully added turbulence index


In [62]:
processed.set_index('date', inplace=True)
df=processed[processed['tic'] == 'AAPL']
df.drop(columns=['tic'], inplace=True)
df.head()

Unnamed: 0_level_0,open,high,low,close,volume_x,day,rsi_14,macd,boll,atr_14,volume_y,adx,kdjk,vix,turbulence
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-01-04,7.6225,7.660714,7.585,6.470741,493729600,0,100.0,0.0,6.470741,1.189973,493729600,100.0,-457.22218,20.040001,0.0
2010-01-05,7.664286,7.699643,7.616071,6.481928,601904800,1,100.0,0.000251,6.476335,1.210159,601904800,100.0,-625.541087,19.35,0.0
2010-01-06,7.656429,7.686786,7.526786,6.378824,552160000,2,9.153375,-0.002877,6.443831,1.208259,552160000,66.378205,-638.397311,19.16,0.0
2010-01-07,7.5625,7.571429,7.466071,6.367032,477131200,3,8.232265,-0.004656,6.424631,1.203901,477131200,64.460544,-582.443355,19.059999,0.0
2010-01-08,7.510714,7.571429,7.466429,6.409363,447610800,4,33.934413,-0.003839,6.421578,1.204015,447610800,63.553542,-539.099649,18.129999,0.0


In [63]:
df.columns

Index(['open', 'high', 'low', 'close', 'volume_x', 'day', 'rsi_14', 'macd',
       'boll', 'atr_14', 'volume_y', 'adx', 'kdjk', 'vix', 'turbulence'],
      dtype='object')

In [64]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Activation
from collections import deque
import numpy as np
import random
import gymnasium as gym
import gym
from gym import spaces

class CustomStockEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, df, window_size=10, transaction_fee=0.01):
        super(CustomStockEnv, self).__init__()
        self.df = df
        self.window_size = window_size
        self.transaction_fee = transaction_fee
        self.shape = (window_size, len(df.columns))
        self.action_space = spaces.Discrete(3)  # Actions: 0 = sell, 1 = hold, 2 = buy
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)
        self.current_step = 0
        self.last_price = 0
        self.last_action = 1  # start with 'hold'

    def step(self, action):
        self.current_step += 1
        done = self.current_step + self.window_size >= len(self.df)
        current_price = self.df.loc[self.df.index[self.current_step], 'close']
        next_state = self.df.iloc[self.current_step:self.current_step + self.window_size].values

        # Calculate reward
        volatility = self.df.loc[self.df.index[max(0, self.current_step - 10):self.current_step], 'close'].std()
        reward = self.improved_reward(current_price, self.last_price, action, self.last_action, self.transaction_fee, volatility)

        # Update last price and action for next step
        self.last_price = current_price
        self.last_action = action

        return next_state, reward, done, {}

    def reset(self):
        self.current_step = 0
        self.last_price = self.df.loc[self.df.index[0], 'close']
        self.last_action = 1  # reset to 'hold'
        return self.df.iloc[self.current_step:self.current_step + self.window_size].values

    def improved_reward(self, current_close, previous_close, current_action, previous_action, transaction_fee, volatility):
        relative_price_change = (current_close - previous_close) / previous_close if previous_close != 0 else 0
        log_return = np.log(relative_price_change + 1)

        # Risk-adjusted return (simplified Sharpe ratio)
        risk_adjusted_return = log_return / volatility if volatility != 0 else log_return

        # Transaction fee adjustment
        tf_adjustment = (1 - transaction_fee) if current_action in [0, 2] else 1

        # Reward calculation
        if current_action in [0, 2]:  # sell or buy
            reward = risk_adjusted_return * tf_adjustment
        elif current_action == 1 and previous_action == 2:  # hold after buy
            reward = log_return
        elif current_action == 1 and previous_action == 0:  # hold after sell
            reward = -log_return
        else:
            reward = 0  # No action taken

        return reward

    def render(self, mode='human'):
        pass

    def close(self):
        pass



class DQNAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.99,
                 epsilon=0.1, epsilon_decay=0.995, epsilon_min=0.01,
                 buffer_size=10000, batch_size=32):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        self.gamma = gamma  # discount rate
        self.epsilon = epsilon  # exploration rate
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        # First fully connected layer
        model.add(Dense(256, input_dim=(self.state_size)))
        model.add(BatchNormalization())
        model.add(Activation('relu'))

        # Second fully connected layer
        model.add(Dense(128))
        model.add(BatchNormalization())
        model.add(Activation('relu'))

        # Output layer with softmax activation
        model.add(Dense(self.action_size, activation='softmax'))

        model.compile(loss='mean_squared_error',
                      optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def improved_reward(current_close, previous_close, current_action, previous_action, transaction_fee, volatility):
        relative_price_change = (current_close - previous_close) / previous_close
        log_return = np.log(relative_price_change + 1)
        
        # Risk-adjusted return (simplified Sharpe ratio)
        risk_adjusted_return = log_return / volatility if volatility != 0 else log_return
        
        # Transaction fee adjustment
        tf_adjustment = (1 - transaction_fee) if current_action in ['buy', 'sell'] else 1
        
        # Reward calculation
        if current_action == 'buy' or current_action == 'sell':
            reward = risk_adjusted_return * tf_adjustment
        elif current_action == 'nothing' and previous_action == 'buy':
            reward = log_return  # Reward for holding after a buy
        elif current_action == 'nothing' and previous_action == 'sell':
            reward = -log_return  # Cost for holding when should have sold
        else:
            reward = 0  # No action taken
        
        return reward

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def choose_action(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return

        minibatch = random.sample(self.memory, self.batch_size)
        states = []
        next_states = []
        targets_f = []

        for state, action, reward, next_state, done in minibatch:
            states.append(np.reshape(state, (self.state_size,)))  # Append reshaped state to list
            if not done:
                next_states.append(np.reshape(next_state, (self.state_size,)))  # Append reshaped next_state to list

        # Convert lists to NumPy arrays for prediction
        states_np = np.array(states)
        next_states_np = np.array(next_states)

        # Predict all next Q-values in one network pass if there are next states to predict
        if len(next_states) > 0:
            q_next = self.model.predict(next_states_np)
            q_next_max = np.amax(q_next, axis=1)

        # Calculate targets for each experience in the minibatch
        index = 0
        for i, (state, action, reward, next_state, done) in enumerate(minibatch):
            if done:
                target = reward
            else:
                target = reward + self.gamma * q_next_max[index]
                index += 1
            # Update the target for the action taken
            target_f = self.model.predict(np.array([np.reshape(state, (self.state_size,))]))
            target_f[0][action] = target
            targets_f.append(target_f[0])

        # Perform a single batch update to the model
        self.model.train_on_batch(states_np, np.array(targets_f))

        # Epsilon decay
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)



env= CustomStockEnv(df=df, window_size=20)
# Create the env and init the agent
state_size = env.window_size * len(df.columns)  # This accounts for all features over the window
action_size = 3 # buy, sell, hold
agent = DQNAgent(state_size=state_size, action_size=action_size)

episodes = 1000  # Define the number of episodes for training
for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    total_reward = 0

    while True:
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        # Store transition in replay buffer
        agent.remember(state, action, reward, next_state, done)

        state = next_state
        total_reward += reward

        if done:
            print(f"Episode: {e + 1}/{episodes}, Score: {total_reward}, Epsilon: {agent.epsilon:.2f}")
            break

        # Train the agent with the experience of the episode
        agent.replay()

    # Optionally save the model
#      if (e + 1) % 50 == 0:
#        agent.save(f'cartpole_model_{e+1}.h5')



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21

KeyboardInterrupt: 

In [None]:
df.set_index('date', inplace=True)


In [None]:
processed