In [4]:
import gymnasium as gym
import numpy as np
env = gym.make('Blackjack-v1', natural=False, sab=False)

In [None]:
def create_blackjack_matrices():
    actions = [1, 0] # Hit, Stand
    
    # Hard totals (player total, dealer upcard: 2-10, A)
    hard_totals = np.array([[np.random.choice(actions) for _ in range(10)] for _ in range(16)])
    
    # Soft totals (player total with an Ace, dealer upcard: 2-10, A)
    soft_totals = np.array([[np.random.choice(actions) for _ in range(10)] for _ in range(8)])
    
    return (hard_totals, soft_totals)


def get_decision(strategy, player_sum, dealer_upcard, is_soft):

    hard_totals, soft_totals = strategy
    dealer_idx = dealer_upcard - 2  # Adjust dealer card index to match array indexing
    
    if is_soft == 1:
        return soft_totals[20 - player_sum, dealer_idx]
    else:
        return hard_totals[20 - player_sum, dealer_idx]  # Adjusted index for hard_totals
    


(13, 10, 0)


In [None]:
population_size = 50
generations = 100
mutation_rate = 0.1

# Evaluate a strategy by playing multiple games
def evaluate_strategy(strategy, episodes=1000):
    total_reward = 0
    for _ in range(episodes):
        obs, _ = env.reset()
        done = False
        while not done:
            player_sum, dealer_card, usable_ace = obs
            if dealer_card == 1:
                dealer_card = 11
            action = get_decision(strategy, player_sum, dealer_card, usable_ace)
            obs, reward, done, _, _ = env.step(action)
        total_reward += reward
    return total_reward / episodes

# Selection: Select the top-performing strategies
def select_population(population, fitness, num_selected):
    selected_indices = np.argsort(fitness)[-num_selected:]
    selected_population = [population[i] for i in selected_indices]
    return selected_population
# Crossover: Combine two parent strategies to create a child strategy
def crossover(parent1, parent2):
    mask = rng.integers(0, 2, parent1.shape, dtype=bool)
    child = np.where(mask, parent1, parent2)
    return child
# Mutation: Randomly modify parts of a strategy
def mutate(strategy, mutation_rate):
    mutation_mask = rng.random(strategy.shape) < mutation_rate
    strategy[mutation_mask] = 1 - strategy[mutation_mask]
    return strategy

# Initialize the population
population = [create_blackjack_matrices() for _ in range(population_size)]

# Run the genetic algorithm
for generation in range(generations):
    # Evaluate the fitness of each strategy
    fitness = [evaluate_strategy(strategy) for strategy in population]
    
    # Print the best fitness in the current generation
    print(f"Generation {generation + 1}: Best Fitness = {max(fitness)}")
    
    # Select the top-performing strategies
    num_selected = population_size // 2
    selected_population = select_population(population, fitness, num_selected)
    
    # Create the next generation
    next_generation = []
    for _ in range(population_size):
        parent1, parent2 = np.random.choice(selected_population, 2, replace=False)
        child = crossover(parent1, parent2)
        child = mutate(child, mutation_rate)
        next_generation.append(child)
    
    population = next_generation

# Evaluate the best strategy
best_strategy = population[np.argmax(fitness)]
final_fitness = evaluate_strategy(best_strategy, episodes=10000)
print(f"Final Best Strategy Fitness: {final_fitness}")