In [115]:
import rlcard
from rlcard import models
from rlcard.agents import DQNAgent
from rlcard.utils import set_seed
import torch
import numpy as np


In [116]:
set_seed(24)

# Load environment
env = rlcard.make('blackjack')


In [117]:
agent = DQNAgent(
    num_actions=env.num_actions,
    state_shape=env.state_shape[0],
    mlp_layers=[64,128, 64],
)
agent.q_estimator.qnet.load_state_dict(torch.load('./final_model.pth'))  
env.set_agents([agent])  

  agent.q_estimator.qnet.load_state_dict(torch.load('./final_model.pth'))


In [118]:
def print_state(state):
    """Print the current game state in a clean format"""
    print(f"Your Hand Total: {state['obs'][0]}")
    print(f"Dealer Shows: {state['obs'][1]}")
    
    # Show actual cards from raw_obs for better context
    if 'raw_obs' in state and state['raw_obs']:
        player_cards = state['raw_obs'].get('player0 hand', [])
        dealer_cards = state['raw_obs'].get('dealer hand', [])
        if player_cards:
            print(f"Your Cards: {' '.join(player_cards)}")
        if dealer_cards:
            print(f"Dealer Visible Cards: {' '.join(dealer_cards)}")
    
    action_map = {0: "Hit", 1: "Stand"}
    legal_actions = [action_map[i] for i in state['legal_actions']]
    print(f"Available Actions: {', '.join(legal_actions)}")

def get_action_name(action):
    """Convert action number to action name"""
    action_map = {0: "Hit", 1: "Stand"}
    return action_map.get(action, f"Unknown({action})")

In [119]:
def play_single_game():
    """Play a single game using the CORRECT method"""
    print("\n" + "=" * 20)
    print("NEW BLACKJACK GAME")
    print("=" * 20)
    
    # Run the complete game - this handles reset internally
    trajectories, payoffs = env.run(is_training=False)
    
    # Get the ACTUAL initial state from the trajectory
    initial_state = None
    if trajectories and len(trajectories) > 0 and len(trajectories[0]) > 0:
        # First step in trajectory contains the initial deal
        initial_state = trajectories[0][0]
    
    if initial_state and isinstance(initial_state, dict) and 'obs' in initial_state:
        print("\nINITIAL DEAL:")
        # print(f"Your Hand Total: {initial_state['obs'][0]}")
        # print(f"Dealer Shows: {initial_state['obs'][1]}")
        
        if 'raw_obs' in initial_state and initial_state['raw_obs']:
            player_cards = initial_state['raw_obs'].get('player0 hand', [])
            dealer_cards = initial_state['raw_obs'].get('dealer hand', [])
            if player_cards:
                print(f"Your Cards: {' '.join(player_cards)}")
            if dealer_cards:
                print(f"Dealer Visible Cards: {' '.join(dealer_cards)}")
        
        action_map = {0: "Hit", 1: "Stand"}
        if 'legal_actions' in initial_state:
            legal_actions = [action_map[i] for i in initial_state['legal_actions']]
            print(f"Available Actions: {', '.join(legal_actions)}")
    
    # Store initial cards for consistency check from the ACTUAL game
    initial_player_cards = []
    if initial_state and 'raw_obs' in initial_state and initial_state['raw_obs']:
        initial_player_cards = initial_state['raw_obs'].get('player0 hand', []).copy()
    
    # Extract player trajectory
    player_trajectory = trajectories[0]
    
    print(f"\nGAME ACTIONS:")
    
    action_map = {0: "Hit", 1: "Stand"}
    action_count = 0
    
    # Look for explicit action fields in trajectory
    for i, step in enumerate(player_trajectory):
        if isinstance(step, dict) and 'action' in step:
            action = step['action']
            action_name = action_map.get(action, f"Action{action}")
            action_count += 1
            
            print(f"Decision {action_count}: Agent chose to {action_name}")
            
            if 'obs' in step:
                hand_total = step['obs'][0]
                print(f"  → Hand total after action: {hand_total}")
                
                if 'raw_obs' in step and step['raw_obs']:
                    player_cards = step['raw_obs'].get('player0 hand', [])
                    if player_cards:
                        print(f"  → Cards after action: {' '.join(player_cards)}")
    
    # If no explicit actions found, infer from state changes
    if action_count == 0:
        previous_state = None
        decision_num = 0
        
        for i, step in enumerate(player_trajectory):
            if isinstance(step, dict) and 'obs' in step:
                current_total = step['obs'][0]
                current_cards = []
                
                if 'raw_obs' in step and step['raw_obs']:
                    current_cards = step['raw_obs'].get('player0 hand', [])
                
                # Compare with previous state to detect changes
                if previous_state is not None:
                    prev_total = previous_state['obs'][0]
                    prev_cards = []
                    if 'raw_obs' in previous_state and previous_state['raw_obs']:
                        prev_cards = previous_state['raw_obs'].get('player0 hand', [])
                    
                    # Check if cards changed (indicating a Hit)
                    if len(current_cards) > len(prev_cards):
                        decision_num += 1
                        new_card = [card for card in current_cards if card not in prev_cards]
                        
                        print(f"Decision {decision_num}: Agent chose to Hit")
                        print(f" -> Hand before: {prev_total} with cards {' '.join(prev_cards)}")
                        print(f" -> Drew card: {' '.join(new_card)}")
                        print(f" -> Hand after: {current_total} with cards {' '.join(current_cards)}")
                        
                        if current_total > 21:
                            print(f" -> BUST! (Over 21)")
                            break
                    
                    # Check if totals are same but different step (indicating Stand)
                    elif len(current_cards) == len(prev_cards) and current_total == prev_total and i > 1:
                        decision_num += 1
                        print(f"Decision {decision_num}: Agent chose to Stand")
                        print(f" -> Player stands with {current_total}")
                        break
                
                previous_state = step
    
    print(f"\nFINAL RESULT:")
    
    # Get final state from the last meaningful step
    final_player_cards = []
    final_dealer_cards = []
    final_player_total = 0
    final_dealer_total = 0
    
    # Look for final state in trajectory (work backwards from end)
    for step in reversed(player_trajectory):
        if isinstance(step, dict) and 'obs' in step:
            final_player_total = step['obs'][0]
            final_dealer_total = step['obs'][1]
            
            if 'raw_obs' in step and step['raw_obs']:
                final_player_cards = step['raw_obs'].get('player0 hand', [])
                final_dealer_cards = step['raw_obs'].get('dealer hand', [])
                if final_player_cards and final_dealer_cards:
                    break
    
    # Display final hands
    if final_player_cards:
        print(f"Your Final Hand: {' '.join(final_player_cards)} (Total: {final_player_total})")
    else:
        print(f"Your Final Total: {final_player_total}")
        
    if final_dealer_cards:
        print(f"Dealer Final Hand: {' '.join(final_dealer_cards)} (Total: {final_dealer_total})")
    else:
        print(f"Dealer Final Total: {final_dealer_total}")
    
    # Get payoff and determine outcome
    payoff = payoffs[0] if payoffs else 0
    
    if payoff > 0:
        outcome_text = "YOU WIN!"
    elif payoff < 0:
        outcome_text = "You Lost"
    else:
        outcome_text = "It's a Tie"
    
    print(f"\n{outcome_text}")
    print(f"Payoff: {payoff}")
    print("\n" + "-" * 50)
    
    return payoff

In [120]:
def play_batch_games(num_games=10):
    """Play multiple games and show statistics"""
    print(f"\nPlaying {num_games} games...")
    
    results = []
    wins = 0
    losses = 0
    ties = 0
    
    for game_num in range(num_games):
        print(f"\n--- Game {game_num + 1} ---")
        try:
            payoff = play_single_game()
            results.append(payoff)
            
            if payoff > 0:
                wins += 1
            elif payoff < 0:
                losses += 1
            else:
                ties += 1
                
        except Exception as e:
            print(f"Error in game {game_num + 1}: {e}")
            losses += 1
            results.append(-1)
    
    # Show statistics
    print(f"\nGAME STATISTICS:")
    print(f"Total Games: {num_games}")
    print(f"Wins: {wins} ({wins/num_games*100:.1f}%)")
    print(f"Losses: {losses} ({losses/num_games*100:.1f}%)")
    print(f"Ties: {ties} ({ties/num_games*100:.1f}%)")
    if results:
        print(f"Average Payoff: {sum(results)/len(results):.3f}")
    
    return results

In [121]:
# Main execution
print("BLACKJACK AI GAME")
print("=" * 30)

while True:
    print("\nOptions:")
    print("1. Play single game")
    print("2. Play batch games")
    print("3. Exit")
    
    choice = input("\nEnter your choice (1-3): ").strip()
    
    if choice == '1':
        play_single_game()
    elif choice == '2':
        try:
            num_games = int(input("How many games? (default 10): ") or "10")
            play_batch_games(num_games)
        except ValueError:
            print("Invalid number, using default of 10")
            play_batch_games(10)
    elif choice == '3':
        print("\nThanks for playing!")
        break
    else:
        print("Invalid choice, please try again.")


BLACKJACK AI GAME

Options:
1. Play single game
2. Play batch games
3. Exit

Playing 100 games...

--- Game 1 ---

NEW BLACKJACK GAME

INITIAL DEAL:
Your Cards: D9 ST
Dealer Visible Cards: D8
Available Actions: Hit, Stand

GAME ACTIONS:
Decision 1: Agent chose to Stand
 -> Player stands with 19

FINAL RESULT:
Your Final Hand: D9 ST (Total: 19)
Dealer Final Hand: H8 D8 S3 (Total: 19)

It's a Tie
Payoff: 0

--------------------------------------------------

--- Game 2 ---

NEW BLACKJACK GAME

INITIAL DEAL:
Your Cards: C9 ST
Dealer Visible Cards: H9
Available Actions: Hit, Stand

GAME ACTIONS:
Decision 1: Agent chose to Stand
 -> Player stands with 19

FINAL RESULT:
Your Final Hand: C9 ST (Total: 19)
Dealer Final Hand: SA H9 (Total: 20)

You Lost
Payoff: -1

--------------------------------------------------

--- Game 3 ---

NEW BLACKJACK GAME

INITIAL DEAL:
Your Cards: C9 C6
Dealer Visible Cards: H4
Available Actions: Hit, Stand

GAME ACTIONS:
Decision 1: Agent chose to Stand
 -> Player