# Market Making Parameter Optimization

Bayesian optimization using Optuna to find optimal market-making parameters.

**Objective**: Maximize post-fee net profit (Sharpe ratio) while constraining max drawdown.

**Parameters to optimize**:
- `min_spread`: Minimum spread to quote
- `trade_size`: Base order size
- `max_size`: Maximum position per market
- `stop_loss_pct`: Stop-loss threshold
- `take_profit_pct`: Take-profit target
- `volatility_threshold`: Max volatility to trade
- `inventory_skew_factor`: Quote skew for inventory management

In [None]:
import os
IN_COLAB = 'COLAB_GPU' in os.environ or 'google.colab' in str(get_ipython())

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_DIR = '/content/drive/MyDrive/polymarket-data'
    !git clone https://github.com/jsseoi/polymarket-trading-bot.git /content/polymarket-trading-bot 2>/dev/null || \
        (cd /content/polymarket-trading-bot && git pull)
    os.chdir('/content/polymarket-trading-bot')
    !pip install -q pandas numpy matplotlib pyarrow optuna requests pyyaml structlog ratelimit
else:
    DATA_DIR = 'data'

import sys
sys.path.insert(0, '.')

import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

from src.strategies.market_making import (
    MarketMakingStrategy, MarketMakingParams, FEE_POLITICAL, FEE_CRYPTO
)
from src.backtesting.mm_engine import MarketMakingEngine, MMBacktestConfig

optuna.logging.set_verbosity(optuna.logging.WARNING)
plt.style.use('seaborn-v0_8-darkgrid')
print('Setup complete')

In [None]:
# Generate or load data
engine = MarketMakingEngine()

HIST_FILE = f'{DATA_DIR}/mm_historical.json'
if os.path.exists(HIST_FILE):
    count = engine.load_data(HIST_FILE)
    print(f'Loaded {count} historical snapshots')
    dates = [s.timestamp for s in engine.all_snapshots]
    START = min(dates)
    END = max(dates)
else:
    print('No historical data found, using synthetic data')
    count = engine.generate_mm_synthetic_data(num_markets=30, days=90, seed=42)
    print(f'Generated {count} synthetic snapshots')
    START = datetime.now() - timedelta(days=90)
    END = datetime.now()

print(f'Date range: {START} to {END}')

In [None]:
# Optimization objective
def objective(trial):
    # Sample parameters
    params = MarketMakingParams(
        min_spread=trial.suggest_float('min_spread', 0.005, 0.05),
        trade_size=trial.suggest_float('trade_size', 20.0, 150.0),
        max_size=trial.suggest_float('max_size', 100.0, 500.0),
        stop_loss_pct=trial.suggest_float('stop_loss_pct', -15.0, -2.0),
        take_profit_pct=trial.suggest_float('take_profit_pct', 0.5, 10.0),
        volatility_threshold=trial.suggest_float('volatility_threshold', 0.03, 0.20),
        inventory_skew_factor=trial.suggest_float('inventory_skew_factor', 0.0, 0.8),
        sleep_period_hours=trial.suggest_float('sleep_period_hours', 0.5, 4.0),
        min_liquidity=5000.0,
        min_volume_24h=10000.0,
        fee_config=FEE_POLITICAL,
    )

    # Constraint: max_size must be >= 2x trade_size
    if params.max_size < params.trade_size * 2:
        return float('-inf')

    strategy = MarketMakingStrategy(params)

    config = MMBacktestConfig(
        start_date=START,
        end_date=END,
        initial_capital=1000.0,
        fill_aggression=0.5,
        use_random_fills=False,  # Deterministic for optimization
    )

    try:
        result = engine.run_mm(strategy, config)
    except Exception:
        return float('-inf')

    # Penalize high drawdown
    if result.max_drawdown_pct > 0.15:  # >15% drawdown penalty
        return result.sharpe_ratio - (result.max_drawdown_pct - 0.15) * 10

    # Penalize too few trades (not enough data)
    if result.total_trades < 10:
        return float('-inf')

    return result.sharpe_ratio

print('Objective function defined')

In [None]:
# Run optimization
study = optuna.create_study(
    direction='maximize',
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(),
)

N_TRIALS = 200  # Increase for better results (takes longer)
print(f'Running {N_TRIALS} optimization trials...')
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)

print(f'\nBest trial:')
print(f'  Sharpe Ratio: {study.best_value:.3f}')
print(f'  Parameters:')
for k, v in study.best_params.items():
    print(f'    {k}: {v:.4f}')

In [None]:
# Validate best parameters with Monte Carlo
best = study.best_params
best_params = MarketMakingParams(
    min_spread=best['min_spread'],
    trade_size=best['trade_size'],
    max_size=best['max_size'],
    stop_loss_pct=best['stop_loss_pct'],
    take_profit_pct=best['take_profit_pct'],
    volatility_threshold=best['volatility_threshold'],
    inventory_skew_factor=best['inventory_skew_factor'],
    sleep_period_hours=best['sleep_period_hours'],
    fee_config=FEE_POLITICAL,
)

# Monte Carlo validation with random fills
n_runs = 100
mc_results = []
for i in range(n_runs):
    s = MarketMakingStrategy(best_params)
    c = MMBacktestConfig(
        start_date=START, end_date=END,
        initial_capital=1000.0,
        fill_aggression=0.5,
        use_random_fills=True,
    )
    r = engine.run_mm(s, c)
    mc_results.append({
        'return_pct': r.total_return_pct,
        'sharpe': r.sharpe_ratio,
        'max_dd': r.max_drawdown_pct,
        'win_rate': r.win_rate,
        'trades': r.total_trades,
    })

mc_df = pd.DataFrame(mc_results)

print(f'Optimized Parameters Monte Carlo ({n_runs} runs):')
print(f'  Return: {mc_df["return_pct"].mean():.1%} +/- {mc_df["return_pct"].std():.1%}')
print(f'  Sharpe: {mc_df["sharpe"].mean():.2f} +/- {mc_df["sharpe"].std():.2f}')
print(f'  Max DD: {mc_df["max_dd"].mean():.1%} +/- {mc_df["max_dd"].std():.1%}')
print(f'  Win Rate: {mc_df["win_rate"].mean():.1%}')
print(f'  P(positive): {(mc_df["return_pct"] > 0).mean():.1%}')
print(f'  5th percentile return: {mc_df["return_pct"].quantile(0.05):.1%}')
print(f'  95th percentile return: {mc_df["return_pct"].quantile(0.95):.1%}')

In [None]:
# Visualization: Optimization history & parameter importance
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Optimization history
trials_df = study.trials_dataframe()
axes[0, 0].scatter(trials_df.index, trials_df['value'], alpha=0.3, s=10, color='#2196F3')
axes[0, 0].plot(trials_df['value'].cummax(), color='red', linewidth=2, label='Best so far')
axes[0, 0].set_xlabel('Trial')
axes[0, 0].set_ylabel('Sharpe Ratio')
axes[0, 0].set_title('Optimization History')
axes[0, 0].legend()

# MC return distribution
axes[0, 1].hist(mc_df['return_pct'], bins=25, color='#4CAF50', alpha=0.7, edgecolor='white')
axes[0, 1].axvline(0, color='red', linestyle='--', label='Break-even')
axes[0, 1].axvline(mc_df['return_pct'].median(), color='blue', linestyle='--',
                    label=f'Median: {mc_df["return_pct"].median():.1%}')
axes[0, 1].set_xlabel('Return %')
axes[0, 1].set_title('Optimized Return Distribution')
axes[0, 1].legend()

# Parameter importance (manual: correlation with objective)
param_names = [k for k in best.keys()]
importances = []
valid = trials_df[trials_df['value'] > float('-inf')]
for p in param_names:
    col = f'params_{p}'
    if col in valid.columns:
        corr = abs(valid[col].corr(valid['value']))
        importances.append(corr if not np.isnan(corr) else 0)
    else:
        importances.append(0)

sorted_idx = np.argsort(importances)[::-1]
axes[1, 0].barh(
    [param_names[i] for i in sorted_idx],
    [importances[i] for i in sorted_idx],
    color='#9C27B0', alpha=0.7
)
axes[1, 0].set_xlabel('Correlation with Sharpe')
axes[1, 0].set_title('Parameter Importance')

# Return vs Drawdown tradeoff
axes[1, 1].scatter(mc_df['max_dd'], mc_df['return_pct'], alpha=0.5, color='#FF5722')
axes[1, 1].set_xlabel('Max Drawdown %')
axes[1, 1].set_ylabel('Return %')
axes[1, 1].set_title('Return vs Risk')

plt.tight_layout()
plt.show()

In [None]:
# Save optimized parameters
import json

output = {
    'optimized_params': study.best_params,
    'best_sharpe': study.best_value,
    'n_trials': N_TRIALS,
    'monte_carlo': {
        'n_runs': n_runs,
        'mean_return': mc_df['return_pct'].mean(),
        'std_return': mc_df['return_pct'].std(),
        'mean_sharpe': mc_df['sharpe'].mean(),
        'mean_max_dd': mc_df['max_dd'].mean(),
        'prob_positive': float((mc_df['return_pct'] > 0).mean()),
    },
    'timestamp': datetime.now().isoformat(),
}

output_path = f'{DATA_DIR}/optimized_params.json'
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w') as f:
    json.dump(output, f, indent=2)

print(f'Saved optimized parameters to {output_path}')
print(f'\nOptimal parameters for production:')
print(json.dumps(study.best_params, indent=2))

---

## Summary

After optimization:
1. Copy `optimized_params.json` to your deployment config
2. Run paper trading with optimized params for 1-2 weeks
3. If paper trading confirms backtest results, proceed to Phase 2 (live trading)