# Genetic Algorithm Optimization of Credit Spreads
This notebook searches for the best parameter set for `synthetic_market.run_simulation()` using a genetic algorithm.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from deap import base, creator, tools
import synthetic_market as sm

The genetic algorithm explores a small range of strategy parameters. Fitness is defined as total return minus maximum drawdown.

In [None]:
SEARCH_BOUNDS = dict(
    ema_period =(20, 100), # int
    risk_fraction =(0.01, 0.06),# float
    short_leg_low =(5, 20), # float (min distance)
    short_leg_high =(10, 30), # float (max distance, must ≥ low)
    spread_w_low =(1, 5), # int
    spread_w_high =(4, 10), # int (≥ low)
    trade_credit_ratio=(0.2, 0.5), # float
)

In [None]:
random.seed(0)
np.random.seed(0)

creator.create('FitnessMax', base.Fitness, weights=(1.0,))
creator.create('Individual', list, fitness=creator.FitnessMax)

def gen_ind():
    b = SEARCH_BOUNDS
    return creator.Individual([
        np.random.randint(b['ema_period'][0], b['ema_period'][1] + 1),
        np.random.uniform(*b['risk_fraction']),
        np.random.uniform(*b['short_leg_low']),
        np.random.uniform(*b['short_leg_high']),
        np.random.randint(b['spread_w_low'][0], b['spread_w_low'][1] + 1),
        np.random.randint(b['spread_w_high'][0], b['spread_w_high'][1] + 1),
        np.random.uniform(*b['trade_credit_ratio']),
    ])

def mate(i1, i2):
    tools.cxOnePoint(i1, i2)
    return i1, i2

def mutate(ind):
    ind[0] = int(np.clip(ind[0] + int(random.gauss(0,5)), *SEARCH_BOUNDS['ema_period']))
    ind[1] = float(np.clip(ind[1] + random.gauss(0,0.005), *SEARCH_BOUNDS['risk_fraction']))
    ind[2] = float(np.clip(ind[2] + random.gauss(0,1), *SEARCH_BOUNDS['short_leg_low']))
    ind[3] = float(np.clip(ind[3] + random.gauss(0,1), *SEARCH_BOUNDS['short_leg_high']))
    ind[4] = int(np.clip(ind[4] + int(random.gauss(0,1)), *SEARCH_BOUNDS['spread_w_low']))
    ind[5] = int(np.clip(ind[5] + int(random.gauss(0,1)), *SEARCH_BOUNDS['spread_w_high']))
    ind[6] = float(np.clip(ind[6] + random.gauss(0,0.02), *SEARCH_BOUNDS['trade_credit_ratio']))
    if ind[3] < ind[2]:
        ind[3] = ind[2]
    if ind[5] < ind[4]:
        ind[5] = ind[4]
    return (ind,)

In [None]:
def eval_ind(ind):
    keys = list(SEARCH_BOUNDS)
    # Check for invalid short_leg_distance before passing to simulation
    short_leg_low = ind[2]
    short_leg_high = ind[3]
    if short_leg_high < short_leg_low:
        print(f'Warning: Invalid short_leg_distance in eval_ind: ({short_leg_low}, {short_leg_high}). Returning low fitness.')
        return (-1e9,)

    # Check for invalid spread_width_range as well, following the same logic
    spread_w_low = int(ind[4])
    spread_w_high = int(ind[5])
    if spread_w_high < spread_w_low:
        print(f'Warning: Invalid spread_width_range in eval_ind: ({spread_w_low}, {spread_w_high}). Returning low fitness.')
        return (-1e9,)

    cfg = {
        'ema_period': int(ind[0]),
        'risk_fraction': ind[1],
        'short_leg_distance': (short_leg_low, short_leg_high),
        'spread_width_range': (spread_w_low, spread_w_high),
        'trade_credit_ratio': ind[6],
        'seed': 123,
    }
    try:
        res = sm.run_simulation(cfg)
        score = res.total_return - res.max_drawdown
        return (score,)
    except ValueError as e:
        print(f'Error during simulation for individual {ind}: {e}')
        return (-1e9,)

In [None]:
toolbox = base.Toolbox()
toolbox.register('individual', gen_ind)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)
toolbox.register('mate', mate)
toolbox.register('mutate', mutate)
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('evaluate', eval_ind)

POP = 40
GENS = 25
CX = 0.5
MUT = 0.3

pop = toolbox.population(n=POP)
best_scores = []

for g in range(GENS):
    fits = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fits):
        ind.fitness.values = fit
    best = tools.selBest(pop, 1)[0]
    best_scores.append(best.fitness.values[0])

    offspring = toolbox.select(pop, len(pop))
    offspring = list(map(toolbox.clone, offspring))

    for c1, c2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < CX:
            toolbox.mate(c1, c2)
            del c1.fitness.values, c2.fitness.values

    for ind in offspring:
        if random.random() < MUT:
            toolbox.mutate(ind)
            del ind.fitness.values

    invalid = [ind for ind in offspring if not ind.fitness.valid]
    fits = map(toolbox.evaluate, invalid)
    for ind, fit in zip(invalid, fits):
        ind.fitness.values = fit

    pop[:] = offspring

champ = tools.selBest(pop, 1)[0]

In [None]:
plt.figure(figsize=(6,4))
plt.plot(best_scores, marker='o')
plt.xlabel('Generation')
plt.ylabel('Best Fitness')
plt.title('Best Fitness by Generation')
plt.show()

champ_cfg = {
    'ema_period': int(champ[0]),
    'risk_fraction': champ[1],
    'short_leg_distance': (champ[2], champ[3]),
    'spread_width_range': (int(champ[4]), int(champ[5])),
    'trade_credit_ratio': champ[6],
    'seed': 123,
}
results = sm.run_simulation_multi(champ_cfg, seeds=list(range(25)))
results['score'] = results['total_return'] - results['max_drawdown']
results['score'].hist(figsize=(6,4))
plt.title('Distribution of Fitness over Seeds')
plt.xlabel('Fitness')
plt.ylabel('Frequency')
plt.show()

print('Champion params:', champ_cfg)
print('Score mean:', round(results['score'].mean(), 4))
print('Score std:', round(results['score'].std(), 4))

## Champion Parameters
- `ema_period`: ...
- `risk_fraction`: ...
- `short_leg_distance`: ...
- `spread_width_range`: ...
- `trade_credit_ratio`: ...