In [4]:
from collections import deque
import numpy as np
import random
import tensorflow as tf

In [37]:
# DQN Agent Class
class DQN_Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(24, input_dim=self.state_size, activation='relu'),
            tf.keras.layers.Dense(24, activation='relu'),
            tf.keras.layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return 0  # Return 0 when not enough samples to replay

        minibatch = random.sample(self.memory, batch_size)
        total_loss = 0

        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])

            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target

            # Train the model and calculate loss
            history = self.model.fit(state, target_f, epochs=1, verbose=0)
            total_loss += history.history['loss'][0]

        return total_loss / batch_size


# Training function for a single timeframe
def train_dqn_for_timeframe(data, state_size, action_size, model_name, epochs=10, batch_size=32):
    if len(data) <= state_size:
        print(f"Not enough data to train for {model_name}. Skipping...")
        return

    agent = DQN_Agent(state_size, action_size)

    for epoch in range(epochs):
        print(f"Training {model_name}: Epoch {epoch + 1}/{epochs}")
        total_reward = 0
        total_loss = 0
        steps = 0

        for i in range(len(data) - state_size - 1):
            state = data.iloc[i:i + state_size].values.reshape(1, -1)
            next_state = data.iloc[i + 1:i + state_size + 1].values.reshape(1, -1)
            action = agent.act(state)

            # Example reward: difference in close prices
            reward = next_state[0, -1] - state[0, -1]

            done = i == len(data) - state_size - 2
            agent.remember(state, action, reward, next_state, done)

            # Train the model
            loss = agent.replay(batch_size)
            print(i,loss)
            total_reward += reward
            total_loss += loss
            steps += 1

        # Print average reward and loss for the epoch
        avg_reward = total_reward / steps if steps > 0 else 0
        avg_loss = total_loss / steps if steps > 0 else 0
        print(f"Epoch {epoch + 1}/{epochs} - Avg Reward: {avg_reward:.4f}, Avg Loss: {avg_loss:.4f}")

    # Save the trained model
    model_path = f"{model_name}.h5"
    agent.model.save(model_path)
    print(f"Model {model_name} trained and saved as {model_path}")




In [38]:
# Fetch data for different timeframes
from utils import fetch_historical_data

df_1m = fetch_historical_data('BTCUSDT', '1m')
df_1h = fetch_historical_data('BTCUSDT', '1h')
df_4h = fetch_historical_data('BTCUSDT', '4h')
df_1d = fetch_historical_data('BTCUSDT', '1d')

In [39]:
df_1m

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-11 02:58:00,97186.85,97220.08,97160.61,97197.90,20.80138
2024-12-11 02:59:00,97197.90,97218.71,97187.82,97207.29,12.39133
2024-12-11 03:00:00,97207.29,97297.85,97186.39,97297.85,16.95063
2024-12-11 03:01:00,97297.85,97330.60,97197.06,97200.51,40.13409
2024-12-11 03:02:00,97200.50,97411.73,97200.50,97411.72,28.02955
...,...,...,...,...,...
2024-12-11 19:33:00,100845.83,100846.03,100766.24,100779.90,16.95940
2024-12-11 19:34:00,100779.91,100810.76,100779.90,100810.75,27.14930
2024-12-11 19:35:00,100810.76,100837.95,100776.44,100801.34,15.07928
2024-12-11 19:36:00,100801.34,100815.44,100761.00,100777.23,28.41349


In [40]:
df_1h

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-10-31 04:00:00,72353.75,72380.91,72224.38,72273.99,532.75988
2024-10-31 05:00:00,72273.99,72384.17,72207.85,72282.00,464.23150
2024-10-31 06:00:00,72282.01,72320.64,72152.00,72212.12,669.03855
2024-10-31 07:00:00,72212.13,72478.00,72150.00,72417.19,634.52539
2024-10-31 08:00:00,72417.19,72452.00,72149.79,72276.05,672.13547
...,...,...,...,...,...
2024-12-11 15:00:00,99515.99,100800.00,99414.72,100361.64,5764.60580
2024-12-11 16:00:00,100361.64,101090.90,100295.61,100575.99,2950.52479
2024-12-11 17:00:00,100576.00,100982.07,99660.15,99781.90,2298.27737
2024-12-11 18:00:00,99781.89,100786.30,99608.23,100388.71,2207.59419


In [41]:
df_4h

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-06-28 04:00:00,61828.17,61834.41,61236.00,61350.01,2679.62058
2024-06-28 08:00:00,61350.01,61792.34,61212.11,61690.00,3397.52649
2024-06-28 12:00:00,61690.00,61875.00,60751.00,61062.01,7645.83227
2024-06-28 16:00:00,61062.00,61119.58,60079.41,60123.73,5131.47495
2024-06-28 20:00:00,60123.73,60450.00,60063.00,60427.84,2016.39413
...,...,...,...,...,...
2024-12-11 00:00:00,96593.00,97625.76,95658.24,97398.43,6101.81371
2024-12-11 04:00:00,97398.42,97790.36,97181.70,97334.60,2882.63101
2024-12-11 08:00:00,97334.59,98472.57,97316.78,98292.61,3915.05820
2024-12-11 12:00:00,98292.61,100800.00,98072.33,100361.64,10646.28169


In [42]:
df_1d

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-03-18,40917.89,42325.02,40135.04,41757.51,45408.00969
2022-03-19,41757.51,42400.00,41499.29,42201.13,29067.18108
2022-03-20,42201.13,42296.26,40911.00,41262.11,30653.33468
2022-03-21,41262.11,41544.22,40467.94,41002.25,39426.24877
2022-03-22,41002.26,43361.00,40875.51,42364.13,59454.94294
...,...,...,...,...,...
2024-12-07,99740.84,100439.18,98844.00,99831.99,14931.94590
2024-12-08,99831.99,101351.00,98657.70,101109.59,14612.99688
2024-12-09,101109.60,101215.93,94150.05,97276.47,53949.11595
2024-12-10,97276.48,98270.00,94256.54,96593.00,51708.68933


In [None]:


# Training models for each timeframe
train_dqn_for_timeframe(df_1m['close'], state_size=10, action_size=3, model_name="BTCUSDT_1m", epochs=10)
train_dqn_for_timeframe(df_1h['close'], state_size=10, action_size=3, model_name="BTCUSDT_1h", epochs=10)
train_dqn_for_timeframe(df_4h['close'], state_size=10, action_size=3, model_name="BTCUSDT_4h", epochs=10)
train_dqn_for_timeframe(df_1d['close'], state_size=10, action_size=3, model_name="BTCUSDT_1d", epochs=10)

Training BTCUSDT_1m: Epoch 1/10
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 0
29 0
30 0
31 4074230.9745788574
32 141120.296333313
33 63688.41912174225
34 40769.33128374815
35 22464.909202575684
36 20572.408814430237
37 16832.30374622345
38 62043.021540641785
39 46621.97052169847
40 43604.15052866936
41 23703.525081276894
42 39015.40900552273
43 62826.91944074631
44 517932.56731796265
45 411489.0266723633
46 144828.76662544906
47 132723.69809150696
48 143358.28324890137
49 242531.4014840126
50 49402.65629148483
51 35367.230174064636
52 67832.37964439392
53 76791.81343650818
54 101856.53360965848
55 97597.67701339722
56 108317.07531356812
57 87371.34119033813
58 266354.8063812256
59 973999.8842474669
60 16908831.708984375
61 5225241.536865234
62 1284576.9261131287
63 392721.4580478668
64 63754.811915397644
65 17211.09861010313
66 3237.3390942937694
67 3970.1199242807925
68 2795.039864346385
69 5705.1