Step 1: Installing Necessary Libraries

In [8]:
# pip install ccxt stable-baselines3 gym

Step 2: Data Collection with CCXT

In [9]:
import ccxt
import pandas as pd

# Initialize MEXC exchange
exchange = ccxt.mexc({
    'enableRateLimit': True,
})

def fetch_candle_data(symbol, timeframe='1m', limit=100):
    # Fetch historical candles
    candles = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)
    
    # Convert to DataFrame
    columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
    df = pd.DataFrame(candles, columns=columns)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df

# Example: Fetch last 100 minutes of BTC/USDT
df = fetch_candle_data('BTC/USDT')
print(df)

             timestamp      open      high       low     close     volume
0  2023-11-25 12:58:00  37721.71  37721.71  37721.70  37721.71   0.946657
1  2023-11-25 12:59:00  37721.71  37721.71  37721.70  37721.71   4.276544
2  2023-11-25 13:00:00  37721.71  37721.71  37714.69  37714.99   7.124302
3  2023-11-25 13:01:00  37714.99  37715.00  37713.03  37713.03   2.961123
4  2023-11-25 13:02:00  37713.03  37713.03  37705.05  37705.05   3.882039
..                 ...       ...       ...       ...       ...        ...
95 2023-11-25 14:33:00  37709.34  37718.33  37709.34  37718.30   2.627977
96 2023-11-25 14:34:00  37718.30  37718.38  37718.30  37718.37   4.753171
97 2023-11-25 14:35:00  37718.37  37718.45  37717.62  37717.63  13.224757
98 2023-11-25 14:36:00  37717.63  37717.63  37709.34  37709.35   3.324909
99 2023-11-25 14:37:00  37709.35  37709.35  37709.34  37709.35   1.749011

[100 rows x 6 columns]


Step 3: Defining the Reinforcement Learning Environment

In [10]:
def discretize(price, data, num_buckets=20):
    """
    Discretize the closing price into one of the defined buckets.
    """
    bucket_size = (data['close'].max() - data['close'].min()) / num_buckets
    index = int((price - data['close'].min()) / bucket_size)
    return min(index, num_buckets - 1)  # Ensure index is within bounds

In [11]:
import numpy as np
import random

class TradingEnv:
    def __init__(self, data):
        self.data = data
        self.reset()

    def reset(self):
        self.current_step = 0
        self.done = False
        return self._next_observation()

    def _next_observation(self):
        # Discretize the closing price
        discretized_price = discretize(self.data.iloc[self.current_step]['close'], self.data)
        return np.array([discretized_price])

    def step(self, action):
        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            self.done = True

        reward = 0
        next_closing_price = self.data.iloc[self.current_step]['close']
        previous_closing_price = self.data.iloc[self.current_step - 1]['close']

        if (action == 0 and next_closing_price < previous_closing_price) or \
           (action == 1 and next_closing_price > previous_closing_price):
            reward = 1  # Correct prediction
        else:
            reward = -1  # Incorrect prediction

        return self._next_observation(), reward, self.done

env = TradingEnv(df)

Step 4: Implementing the Q-learning Algorithm & Model Training

In [12]:
def train_q_learning(env, episodes=1000, learning_rate=0.01, discount_factor=0.9, num_buckets=20):
    action_size = 2  # Red or Green
    q_table = np.zeros((num_buckets, action_size))

    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        episode_actions = []
        episode_rewards = []

        while not env.done:
            # Exploration vs Exploitation
            action = random.choice([0, 1]) if random.uniform(0, 1) < 0.1 else np.argmax(q_table[state[0]])

            next_state, reward, done = env.step(action)
            total_reward += reward
            episode_actions.append(action)
            episode_rewards.append(reward)

            # Q-Table update
            old_value = q_table[state[0], action]
            next_max = np.max(q_table[next_state[0]])
            new_value = (1 - learning_rate) * old_value + learning_rate * (reward + discount_factor * next_max)
            q_table[state[0], action] = new_value

            state = next_state

        print(f"Episode: {e+1}, Total Reward: {total_reward}")

    return q_table, episode_actions, episode_rewards

q_table, actions, rewards = train_q_learning(env)

Episode: 1, Total Reward: -15
Episode: 2, Total Reward: -13
Episode: 3, Total Reward: -17
Episode: 4, Total Reward: -13
Episode: 5, Total Reward: -15
Episode: 6, Total Reward: 1
Episode: 7, Total Reward: -5
Episode: 8, Total Reward: 7
Episode: 9, Total Reward: -3
Episode: 10, Total Reward: 5
Episode: 11, Total Reward: 3
Episode: 12, Total Reward: 5
Episode: 13, Total Reward: 3
Episode: 14, Total Reward: 9
Episode: 15, Total Reward: 11
Episode: 16, Total Reward: 9
Episode: 17, Total Reward: 3
Episode: 18, Total Reward: -1
Episode: 19, Total Reward: 7
Episode: 20, Total Reward: -1
Episode: 21, Total Reward: -1
Episode: 22, Total Reward: 3
Episode: 23, Total Reward: 9
Episode: 24, Total Reward: 1
Episode: 25, Total Reward: 3
Episode: 26, Total Reward: 5
Episode: 27, Total Reward: 5
Episode: 28, Total Reward: 7
Episode: 29, Total Reward: 5
Episode: 30, Total Reward: 7
Episode: 31, Total Reward: 11
Episode: 32, Total Reward: 5
Episode: 33, Total Reward: 3
Episode: 34, Total Reward: 7
Episod

Step 5: Visualizing the Results

In [15]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_results(data, actions, rewards):
    # Create a subplot
    fig = make_subplots(rows=2, cols=1)

    # Plotting price data
    fig.add_trace(
        go.Scatter(x=data.index, y=data['close'], mode='lines', name='Close Price'),
        row=1, col=1
    )

    # Plotting actions
    fig.add_trace(
        go.Scatter(x=data.index, y=actions, mode='lines', name='Actions', line=dict(width=2, dash='dot')),
        row=2, col=1
    )

    # Plotting rewards
    fig.add_trace(
        go.Scatter(x=data.index, y=rewards, mode='lines', name='Rewards', line=dict(width=2, dash='dot')),
        row=2, col=1
    )

    # Update layout
    fig.update_layout(
        height=800, width=None, title_text="Price Data, Actions, and Rewards",
        template="plotly_dark",
        autosize=True
    )
    fig.update_yaxes(title_text="Close Price", row=1, col=1)
    fig.update_yaxes(title_text="Actions / Rewards", row=2, col=1)

    fig.show()

# Example usage
plot_results(df, actions, rewards)
