# RL-Enhanced Market Making for Crypto Assets

This notebook demonstrates how to enhance the Avellaneda-Stoikov market making model with Reinforcement Learning.

In [None]:
# Import necessary libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import gym

from datetime import datetime, timedelta

import sys
import os
import importlib.util



# Configure plotting

plt.style.use('ggplot')

%matplotlib inline



# Add project root to path

sys.path.append('..')

In [None]:
# Import project modules
from src.models.avellaneda_stoikov import AvellanedaStoikovModel
from src.models.rl_enhanced_model import RLEnhancedModel, MarketMakingEnv
from src.data.data_processor import DataProcessor

## 1. Set Up RL Environment for Market Making

In [None]:
# Generate synthetic market data
data_processor = DataProcessor()

# Generate date range
start_date = datetime.now() - timedelta(days=30)
end_date = datetime.now()
timestamps = pd.date_range(start=start_date, end=end_date, freq='1min')

# Generate price data
np.random.seed(42)
price = 2000.0  # Starting ETH price
prices = [price]

# Random walk price process
for _ in range(len(timestamps) - 1):
    returns = np.random.normal(0, 0.001)  # Small random returns
    price *= (1 + returns)
    prices.append(price)

# Create DataFrame
market_data = pd.DataFrame({
    'open': prices,
    'high': [p * (1 + np.random.uniform(0, 0.002)) for p in prices],
    'low': [p * (1 - np.random.uniform(0, 0.002)) for p in prices],
    'close': prices,
    'volume': np.random.uniform(10, 100, size=len(timestamps))
}, index=timestamps)

# Add technical features
market_data = data_processor.add_technical_features(market_data)

# Create market making environment
env = MarketMakingEnv(
    market_data=market_data,
    initial_capital=10000.0,
    max_inventory=20,
    transaction_fee=0.001,
    reward_scaling=1.0,
    trading_horizon=1000
)

# Reset and get initial observation
initial_obs = env.reset()
print(f"Initial observation: {initial_obs}")
print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

## 2. Implement Q-Learning for Market Making

In [None]:
# Simple Q-learning implementation (simplified for demonstration)
import numpy as np
from collections import defaultdict

class QLearningAgent:
    def __init__(self, action_space, alpha=0.1, gamma=0.99, epsilon=0.1):
        self.action_space = action_space
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate
        self.q_table = defaultdict(lambda: np.zeros(action_space.shape[0] * 10))  # Discretized action space
        
    def discretize_state(self, state):
        # Discretize continuous state space
        # This is a simplified version - in practice, would use better discretization
        return tuple(np.round(state, 1))
    
    def discretize_action(self, action_idx):
        # Convert discrete action index to continuous action
        # Simplified discretization of the continuous action space
        n_actions = self.action_space.shape[0] * 10
        discretized = np.zeros(self.action_space.shape[0])
        
        # Map index to multi-dimensional action
        for i in range(self.action_space.shape[0]):
            bin_size = 10
            action_comp = (action_idx % bin_size) / bin_size
            discretized[i] = self.action_space.low[i] + action_comp * (self.action_space.high[i] - self.action_space.low[i])
            action_idx = action_idx // bin_size
            
        return discretized
    
    def select_action(self, state):
        # Epsilon-greedy action selection
        state_key = self.discretize_state(state)
        
        if np.random.random() < self.epsilon:
            # Explore: random action
            action_idx = np.random.randint(0, len(self.q_table[state_key]))
        else:
            # Exploit: best action
            action_idx = np.argmax(self.q_table[state_key])
            
        return self.discretize_action(action_idx), action_idx
    
    def update(self, state, action_idx, reward, next_state, done):
        # Q-learning update
        state_key = self.discretize_state(state)
        next_state_key = self.discretize_state(next_state)
        
        # Calculate TD target
        if done:
            target = reward
        else:
            target = reward + self.gamma * np.max(self.q_table[next_state_key])
            
        # Update Q-value
        self.q_table[state_key][action_idx] += self.alpha * (target - self.q_table[state_key][action_idx])

# Note: This is a simplified implementation for demonstration purposes.
# In practice, would use more sophisticated RL algorithms like DQN, PPO, etc.
# The state and action discretization is also simplified.

# Train the agent
agent = QLearningAgent(env.action_space, alpha=0.1, gamma=0.99, epsilon=0.3)
n_episodes = 10
max_steps = 100

for episode in range(n_episodes):
    state = env.reset()
    total_reward = 0
    
    for step in range(max_steps):
        # Select action
        action, action_idx = agent.select_action(state)
        
        # Take action
        next_state, reward, done, info = env.step(action)
        
        # Update agent
        agent.update(state, action_idx, reward, next_state, done)
        
        # Update state and reward
        state = next_state
        total_reward += reward
        
        if done:
            break
            
    print(f"Episode {episode+1}: Total Reward = {total_reward:.2f}, Final Inventory = {info['inventory']}")

## 3. Compare Base Model vs RL-Enhanced Model

In [None]:
# Initialize models
base_model = AvellanedaStoikovModel(risk_aversion=1.0, time_horizon=1.0, volatility=0.01)
rl_model = RLEnhancedModel(base_model=base_model)

# Test both models on the same price series
test_points = 100
mid_prices = market_data['mid_price'].iloc[:test_points].values
volatility = market_data['volatility'].iloc[:test_points].values

# Store quotes for comparison
base_quotes = []
rl_quotes = []

for i in range(test_points):
    # Update both models with current volatility
    base_model.set_parameters(volatility=volatility[i])
    
    # Calculate quotes
    base_bid, base_ask = base_model.calculate_optimal_quotes(mid_prices[i])
    rl_bid, rl_ask = rl_model.calculate_optimal_quotes(mid_prices[i], {'volatility': volatility[i]})
    
    base_quotes.append((base_bid, base_ask))
    rl_quotes.append((rl_bid, rl_ask))

# Convert to arrays for easier analysis
base_quotes = np.array(base_quotes)
rl_quotes = np.array(rl_quotes)

# Plot the spreads
plt.figure(figsize=(12, 6))
plt.plot(base_quotes[:, 1] - base_quotes[:, 0], label='Base Model Spread')
plt.plot(rl_quotes[:, 1] - rl_quotes[:, 0], label='RL-Enhanced Spread')
plt.title('Spread Comparison: Base vs RL-Enhanced Model')
plt.xlabel('Time Step')
plt.ylabel('Spread')
plt.legend()
plt.grid(True)
plt.show()

# Calculate average spreads
base_avg_spread = np.mean(base_quotes[:, 1] - base_quotes[:, 0])
rl_avg_spread = np.mean(rl_quotes[:, 1] - rl_quotes[:, 0])

print(f"Base Model - Average Spread: {base_avg_spread:.6f}")
print(f"RL-Enhanced Model - Average Spread: {rl_avg_spread:.6f}")
print(f"Difference: {(rl_avg_spread - base_avg_spread) / base_avg_spread * 100:.2f}%")