In [2]:
### **1. Environment Setup**

# Import necessary libraries
import os
import talib
import pandas as pd
import numpy as np
from gymnasium import Env
from gymnasium.spaces import Discrete, Box, Space
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import csv

In [3]:
# Constants
DATA_FILE = 'ethusd_hourly_klines.csv'
MODEL_PATH = 'ppo_trading_agent.zip'
TRADE_LOG_FILE = 'trade_logs.csv'
TENSORBOARD_LOG_DIR = './tensorboard_logs'

In [4]:


# Apply feature engineering
# data = add_features(data)
data = pd.read_csv(DATA_FILE)
# Validate feature engineering
print("Data After Feature Engineering:")
print(data.head())
print("Feature Summary:")
print(data.describe())

# Split into train and eval datasets
split_index = int(len(data) * 0.6)
train_data = data.iloc[:split_index]
eval_data = data.iloc[split_index:]

# Validate split
print(f"Training Data Shape: {train_data.shape}, Evaluation Data Shape: {eval_data.shape}")

# Features and target columns
features = ['RSI', 'MACD', 'MACD_signal', 'ADX', 'ATR', 
            'rolling_mean', 'rolling_std', 'close_lag1', 'close_lag2', 
            'RSI_MACD', 'Bollinger_width']

# Standardize selected features
scaler = StandardScaler()
scaled_features = [feature for feature in features if feature != 'RSI']

train_data.loc[:, scaled_features] = scaler.fit_transform(train_data[scaled_features])
eval_data.loc[:, scaled_features] = scaler.transform(eval_data[scaled_features])

# Validate scaling
print("Training Data After Scaling:")
print(train_data[scaled_features].head())
print("Evaluation Data After Scaling:")
print(eval_data[scaled_features].head())

Data After Feature Engineering:
             timestamp         open         high          low        close  \
0  2011-02-20 07:00:00  1998.188697  2000.628878  1996.080408  1996.957397   
1  2011-04-14 05:00:00  2004.707732  2012.918771  2001.045870  2002.934554   
2  2012-01-15 18:00:00  2031.258871  2034.890974  2023.948789  2029.250030   
3  2013-09-17 07:00:00  2027.712261  2026.221531  2024.229547  2025.955918   
4  2016-04-03 07:00:00  2012.432518  2020.176673  2006.577967  2010.687263   

        volume        RSI      MACD  MACD_signal  Bollinger_upper  ...  \
0  5426.994023  43.647033  0.270205     0.266111      2031.952680  ...   
1  4122.127479  76.318985 -4.871150    -4.530413      2038.776413  ...   
2  8299.960540  43.864101  0.094041     0.093102      2031.601097  ...   
3  2260.198308  14.809031  1.362103     0.995704      2036.394248  ...   
4  6077.519512  64.272326  0.240628     0.232053      2062.883475  ...   

         ATR  rolling_mean  rolling_std   close_lag1  

In [5]:
class SignalTradingEnvironment(Env):
    def __init__(self, data, features, action_conditions):
        super(SignalTradingEnvironment, self).__init__()
        self.data = data
        self.features = features
        self.action_conditions = action_conditions  # Functions for conditions
        self.current_step = 0
        
        # Define action space (0: Hold, 1: Buy, 2: Sell)
        self.action_space = Discrete(3)
        
        # Define observation space (signals)
        self.observation_space = Box(
            low=-np.inf, high=np.inf, shape=(len(features),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        return self.data[self.features].iloc[self.current_step].values

    def step(self, action):
        state = self.data[self.features].iloc[self.current_step].values
        reward = self.calculate_reward(action)
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        next_state = self.data[self.features].iloc[self.current_step].values if not done else None
        return next_state, reward, done, {}

    def calculate_reward(self, action):
        for correct_action, condition in self.action_conditions.items():
            if condition(self.data.iloc[self.current_step]):
                return 1 if action == correct_action else -1
        return -0.1  # Small penalty for taking no useful action

    def render(self):
        print(f"Step: {self.current_step}, State: {self.data[self.features].iloc[self.current_step].values}")


In [6]:
def buy_condition(row):
    return row['RSI'] < 30 and row['MACD'] > row['MACD_signal']

def sell_condition(row):
    return row['RSI'] > 70 and row['MACD'] < row['MACD_signal']

def hold_condition(row):
    return not buy_condition(row) and not sell_condition(row)

action_conditions = {
    1: buy_condition,  # 1: Buy
    2: sell_condition, # 2: Sell
    0: hold_condition  # 0: Hold
}


In [8]:
from keras import Sequential
from keras.src.layers import Dense
from keras.src.optimizers import Adam
from collections import deque
import random

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(24, activation='relu', input_dim=self.state_size),
            Dense(24, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [10]:
env = SignalTradingEnvironment(data2, features=['RSI', 'MACD', 'MACD_signal'], action_conditions=action_conditions)
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)

import logging
import numpy as np

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# Training loop with live logging
episodes = 100
batch_size = 32

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    total_reward = 0
    logging.info(f"Starting Episode {e+1}/{episodes}")
    
    for time in range(len(data2) - 1):
        # Agent decides an action
        action = agent.act(state)
        
        # Execute action in the environment
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size]) if next_state is not None else None
        
        # Save experience in memory
        agent.remember(state, action, reward, next_state, done)
        
        # Update state and accumulate reward
        state = next_state
        total_reward += reward
        
        # Log step details
        logging.info(f"Step: {time + 1}, Action: {action}, Reward: {reward}, Total Reward: {total_reward}")
        
        if done:
            logging.info(f"Episode {e+1}/{episodes} Completed - Total Reward: {total_reward}")
            break
        
        # Train the agent if enough experience is collected
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)

    logging.info(f"Finished Episode {e+1}/{episodes} with Total Reward: {total_reward}")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-12-20 00:36:26,581 - Starting Episode 1/100
2024-12-20 00:36:26,609 - Step: 1, Action: 1, Reward: -1, Total Reward: -1
2024-12-20 00:36:26,631 - Step: 2, Action: 2, Reward: 1, Total Reward: 0
2024-12-20 00:36:26,654 - Step: 3, Action: 2, Reward: -1, Total Reward: -1
2024-12-20 00:36:26,679 - Step: 4, Action: 2, Reward: -1, Total Reward: -2
2024-12-20 00:36:26,705 - Step: 5, Action: 1, Reward: -1, Total Reward: -3
2024-12-20 00:36:26,730 - Step: 6, Action: 0, Reward: -1, Total Reward: -4
2024-12-20 00:36:26,754 - Step: 7, Action: 0, Reward: 1, Total Reward: -3
2024-12-20 00:36:26,778 - Step: 8, Action: 1, Reward: 1, Total Reward: -2
2024-12-20 00:36:26,803 - Step: 9, Action: 2, Reward: -1, Total Reward: -3
2024-12-20 00:36:26,826 - Step: 10, Action: 1, Reward: 1, Total Reward: -2
2024-12-20 00:36:26,850 - Step: 11, Action: 0, Reward: -1, Total Reward: -3
2024-12-20 00:36:26,873 - Step: 12, Action: 0, Reward: -1

: 