# Blackjack AI Analysis

Watch the AI analyze hands and see the results of its decisions.

In [1]:
import time
from blackjack_ai.environment import BlackjackEnv
from blackjack_ai.agents import QLearningAgent

def display_hand_and_decision(dealer_card, player_hand, action, show_full_dealer=False, dealer_hand=None):
    print("\n" + "="*50)
    if show_full_dealer and dealer_hand:
        print(f"Dealer's full hand: {', '.join(str(card) for card in dealer_hand)}")
    else:
        print(f"Dealer shows: {dealer_card}")
    print(f"AI's hand: {', '.join(str(card) for card in player_hand)}")
    
    action_names = ['Stand', 'Hit', 'Double Down']
    print(f"\nAI's decision: {action_names[action]}")
    print("="*50)
    time.sleep(0)  # Pause between hands

def train_agent(training_episodes=10000):
    print("Training AI on", training_episodes, "episodes...")
    env = BlackjackEnv()
    agent = QLearningAgent()
    
    for episode in range(training_episodes):
        state = env.reset()
        done = False
        while not done:
            action = agent.get_action(state, is_training=True)  # Enable training mode
            next_state, reward, done, _ = env.step(action)
            agent.update(state, action, reward, next_state, done)
            state = next_state
        
        if (episode + 1) % 1000 == 0:
            print(f"Training episode {episode + 1}/{training_episodes}, Exploration rate: {agent.exploration_rate:.4f}")
    
    print("Training complete!")
    agent.save_policy("trained_policy.npy")
    return agent

def analyze_hands(num_hands=100, agent=None):
    env = BlackjackEnv()
    if agent is None:
        agent = QLearningAgent()
        # Try to load pre-trained policy
        if not agent.load_policy("trained_policy.npy"):
            print("No trained policy found. Training new agent...")
            agent = train_agent()
    
    wins = losses = pushes = 0
    print("\nStarting hand analysis...\n")
    
    for hand in range(num_hands):
        state = env.reset()
        done = False
        
        print(f"\nHand {hand + 1}/{num_hands}")
        print(f"Current record - Wins: {wins} | Losses: {losses} | Pushes: {pushes}")
        
        # Get AI's decision for this hand
        action = agent.get_action(state, is_training=False)  # Disable training mode
        
        # Show the initial situation and AI's decision
        display_hand_and_decision(env.dealer_hand[0], env.player_hand, action)
        
        # Execute the AI's decision and show result
        next_state, reward, done, info = env.step(action)
        
        # If the AI hit or doubled and didn't bust, show the updated hand
        while not done and action == 1:  # If AI hit and can still play
            state = next_state
            action = agent.get_action(state, is_training=False)  # Disable training mode
            display_hand_and_decision(env.dealer_hand[0], env.player_hand, action)
            next_state, reward, done, info = env.step(action)
        
        # Show final result with dealer's complete hand
        print("\nFinal Hands:")
        display_hand_and_decision(env.dealer_hand[0], env.player_hand, action, True, env.dealer_hand)
        
        print("\nResult:", end=" ")
        if reward > 0:
            print("WIN! 🎉")
            wins += 1
        elif reward < 0:
            print("LOSS 😢")
            losses += 1
        else:
            print("PUSH (Tie) 🤝")
            pushes += 1
            
        print(f"Exploration Rate (epsilon): {agent.exploration_rate:.4f}")
        
        if hand == num_hands - 1:
            print("\nFinal Statistics:")
            print(f"Total Wins: {wins}")
            print(f"Total Losses: {losses}")
            print(f"Total Pushes: {pushes}")
            print(f"Win Rate: {(wins/num_hands):.2%}")
            print(f"Final Exploration Rate: {agent.exploration_rate:.4f}")

# First train the agent, then analyze hands
trained_agent = train_agent()
analyze_hands(100, trained_agent)

Training AI on 10000 episodes...
Training episode 1000/10000, Exploration rate: 0.0100
Training episode 2000/10000, Exploration rate: 0.0100
Training episode 3000/10000, Exploration rate: 0.0100
Training episode 4000/10000, Exploration rate: 0.0100
Training episode 5000/10000, Exploration rate: 0.0100
Training episode 6000/10000, Exploration rate: 0.0100
Training episode 7000/10000, Exploration rate: 0.0100
Training episode 8000/10000, Exploration rate: 0.0100
Training episode 9000/10000, Exploration rate: 0.0100
Training episode 10000/10000, Exploration rate: 0.0100
Training complete!

Starting hand analysis...


Hand 1/100
Current record - Wins: 0 | Losses: 0 | Pushes: 0

Dealer shows: 10 of Diamonds
AI's hand: A of Spades, 4 of Clubs

AI's decision: Hit

Dealer shows: 10 of Diamonds
AI's hand: A of Spades, 4 of Clubs, K of Spades

AI's decision: Hit

Final Hands:

Dealer's full hand: 10 of Diamonds, 9 of Hearts
AI's hand: A of Spades, 4 of Clubs, K of Spades, J of Spades

AI's decis

In [8]:
from blackjack_ai.agents import QLearningAgent, BasicStrategyAgent, RandomAgent
from blackjack_ai.environment import BlackjackEnv
from scipy.stats import binomtest

N_GAMES = 1000

def run_agent(agent, n_games=N_GAMES):
    env = BlackjackEnv()
    wins = losses = pushes = 0
    for _ in range(n_games):
        state = env.reset()
        done = False
        while not done:
            action = agent.get_action(state, is_training=False)
            next_state, reward, done, _ = env.step(action)
            state = next_state
        if reward > 0:
            wins += 1
        elif reward < 0:
            losses += 1
        else:
            pushes += 1
    return wins, losses, pushes

# If you have a trained QLearningAgent, load it here
try:
    q_agent = QLearningAgent()
    q_agent.load_policy("trained_policy.npy")
except Exception:
    q_agent = QLearningAgent()  # fallback if no policy

agents = [
    ("Q-Learning", q_agent),
    ("Basic Strategy", BasicStrategyAgent()),
    ("Random", RandomAgent())
]

results = []
for name, agent in agents:
    wins, losses, pushes = run_agent(agent, N_GAMES)
    win_rate = wins / N_GAMES
    pval = binomtest(wins, N_GAMES, 0.42, alternative='two-sided').pvalue
    results.append((name, wins, losses, pushes, win_rate, pval))

print("\n=== Baseline Comparison (1000 games each) ===")
for name, wins, losses, pushes, win_rate, pval in results:
    print(f"\n{name} Agent:")
    print(f"  Wins:   {wins} ({win_rate:.2%})")
    print(f"  Losses: {losses}")
    print(f"  Pushes: {pushes}")
    print(f"  p-value vs 42% win rate: {pval:.4f}")


=== Baseline Comparison (1000 games each) ===

Q-Learning Agent:
  Wins:   377 (37.70%)
  Losses: 497
  Pushes: 126
  p-value vs 42% win rate: 0.0059

Basic Strategy Agent:
  Wins:   390 (39.00%)
  Losses: 482
  Pushes: 128
  p-value vs 42% win rate: 0.0546

Random Agent:
  Wins:   239 (23.90%)
  Losses: 684
  Pushes: 77
  p-value vs 42% win rate: 0.0000
