In [97]:
import gymnasium as gym
import numpy as np
env = gym.make('Blackjack-v1', natural=False, sab=False)

In [98]:
def create_blackjack_matrices():
    actions = [1, 0] # Hit, Stand
    
    # Hard totals (player total, dealer upcard: 2-10, A)
    hard_totals = np.array([[np.random.choice(actions) for _ in range(10)] for _ in range(17)])
    
    # Soft totals (player total with an Ace, dealer upcard: 2-10, A)
    soft_totals = np.array([[np.random.choice(actions) for _ in range(10)] for _ in range(8)])
    
    return (hard_totals, soft_totals)

In [99]:
def get_decision(strategy, player_sum, dealer_upcard, is_soft):
    hard_totals, soft_totals = strategy
    dealer_idx = dealer_upcard - 2  # Adjust dealer card index to match array indexing
    
    if is_soft == 1:
        row_index = max(0, min(20 - player_sum, soft_totals.shape[0] - 1))
        return soft_totals[row_index, dealer_idx]
    else:
        row_index = max(0, min(20 - player_sum, hard_totals.shape[0] - 1))
        return hard_totals[row_index, dealer_idx]  # Adjusted index for hard_totals


In [100]:
# Evaluate a strategy by playing multiple games
def evaluate_strategy(strategy, episodes=50000):
    total_reward = 0
    for _ in range(episodes):
        obs, _ = env.reset()
        done = False
        while not done:
            player_sum, dealer_card, usable_ace = obs
            if dealer_card == 1:
                dealer_card = 11
            action = get_decision(strategy, player_sum, dealer_card, usable_ace)
            obs, reward, done, _, _ = env.step(action)
        total_reward += reward
    return total_reward / episodes

In [101]:
# Selection: Select the top-performing strategies
def select_population(population, fitness, num_selected):
    selected_indices = np.argsort(fitness)[-num_selected:]
    selected_population = [population[i] for i in selected_indices]
    return np.array(selected_population, dtype=object)

In [102]:
# Crossover: Combine two parent strategies to create a child strategy
def crossover(parent1, parent2):
    rng = np.random.default_rng(420)
    mask = rng.integers(0, 2, parent1.shape, dtype=bool)
    child = np.where(mask, parent1, parent2)
    return child

In [103]:
# Mutation: Randomly modify parts of a strategy
def mutate(strategy, mutation_rate):
    rng = np.random.default_rng(420)
    mutation_mask = rng.random(strategy.shape) < mutation_rate
    strategy[mutation_mask] = 1 - strategy[mutation_mask]
    return strategy

In [None]:
population_size = 50
generations = 1
mutation_rate = 0.01

# Initialize the population
population = [create_blackjack_matrices() for _ in range(population_size)]

# Run the genetic algorithm
for generation in range(generations):
    # Evaluate the fitness of each strategy
    fitness = [evaluate_strategy(strategy) for strategy in population]
    
    # Print the best fitness in the current generation
    print(f"Generation {generation + 1}: Best Fitness = {max(fitness)}")
    
    # Select the top-performing strategies
    num_selected = len(population) // 2
    selected_population = select_population(population, fitness, num_selected)
    # Create the next generation
    next_generation = []
    for _ in range(population_size):
        parent1_idx, parent2_idx = rng.integers(0, selected_population.shape[0], size=2)
        parent1, parent2 = selected_population[parent1_idx], selected_population[parent2_idx]
        child = crossover(parent1, parent2)
        child = mutate(child, mutation_rate)
        next_generation.append(child)
    
    population = next_generation

# Evaluate the best strategy
best_strategy = population[np.argmax(fitness)]
final_fitness = evaluate_strategy(best_strategy, episodes=50000)
print("Final Best Strategy:")
print(best_strategy)
print(f"Final Best Strategy Fitness: {final_fitness}")

Generation 1: Best Fitness = -0.25718
Final Best Strategy:
[array([[0, 0, 1, 1, 0, 0, 1, 1, 0, 0],
        [1, 1, 1, 0, 0, 0, 0, 1, 0, 0],
        [0, 1, 1, 1, 0, 1, 0, 0, 1, 1],
        [0, 0, 1, 1, 1, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
        [1, 1, 0, 1, 0, 1, 0, 1, 1, 0],
        [1, 1, 1, 0, 1, 1, 0, 0, 0, 1],
        [0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
        [1, 1, 1, 1, 0, 1, 0, 1, 1, 1],
        [1, 1, 1, 0, 0, 1, 0, 1, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
        [0, 0, 1, 1, 1, 0, 1, 1, 0, 0],
        [0, 1, 0, 0, 1, 0, 1, 0, 0, 0],
        [1, 1, 0, 0, 1, 0, 1, 0, 1, 0],
        [1, 0, 1, 1, 0, 1, 1, 1, 0, 0],
        [0, 1, 0, 1, 0, 0, 0, 0, 1, 1]])
 array([[0, 1, 0, 1, 1, 0, 1, 0, 1, 0],
        [0, 1, 0, 1, 1, 1, 0, 1, 0, 0],
        [1, 0, 0, 0, 0, 1, 0, 1, 0, 1],
        [0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
        [1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
        [1, 1, 0, 0, 1, 1, 0, 1, 1, 1],
        [1, 0, 1, 1,