In [27]:
# Imports
import random
import numpy as np
import tensorflow as tf

2024-09-20 23:12:48.515087: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [28]:
# Function to make a (shuffled) deck of cards
def create_a_deck():
    cards = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11]
    deck_of_cards = cards * 4
    random.shuffle(deck_of_cards)
    return deck_of_cards

# Function to draw a card (pop a card off the top of a shuffled deck)
def draw_card(deck):
    return deck.pop()

# Function to make a Blackjack hand (house only has one card for now)
def blackjack_hand(deck):
    player_cards = [draw_card(deck), draw_card(deck)]
    house_card = draw_card(deck)
    return player_cards, house_card

# Function to reset the deck
def reset_deck():
    return create_a_deck()

In [24]:
deck = create_a_deck()
player, house = blackjack_hand(deck)
print(player, house[-1])

[3, 10] 3


In [None]:
# Function to play a game (aces are treated as just 11s)
def play_a_game():
    deck = create_a_deck()
    player_cards, house_cards = blackjack_hand(deck)
    

In [47]:
# Function to create a model with two inputs: player cards and visible house card
# 2 ouputs: 1 if player wins, -1 if player loses
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(2,)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Output either 0 (stay) or 1 (hit)
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    return model

# Function to decide the next move
def rnn_decide_action(model, player_total, house_card):
    state = np.array([[player_total, house_card]])
    action_probs = model.predict(state, verbose=0)
    action = np.argmax(action_probs)
    return action

# Function to simulate the house's actions
def house_play(deck, house_cards):
    while sum(house_cards) < 17:
        house_cards.append(draw_card(deck))
    return house_cards

# Function to play a game and return the result (1 = player win, 0 = house win, -1 = bust)
def play_game(model, deck):
    player_cards, house_card = blackjack_hand(deck)
    house_cards = [house_card]
    
    # Player turn
    player_total = sum(player_cards)
    while player_total < 21:
        action = rnn_decide_action(model, player_total, house_card)
        if action == 1:  # Hit
            player_cards.append(draw_card(deck))
            player_total = sum(player_cards)
            if player_total > 21:  # Bust
                return -1  # Player bust
            elif player_total == 21:
                return 1
        else:
            break

    # House turn if player didn't bust
    if player_total <= 21:
        house_cards = house_play(deck, house_cards)
        house_total = sum(house_cards)
        if house_total > 21 or player_total > house_total:
            return 1  # Player win
        else:
            return -1

# Main loop to train the model
def train_model():
    model = create_model()
    num_games = 1000
    X = []  # Features (player_total, house_card)
    y = []  # Labels (0 = stay, 1 = hit)
    
    for _ in range(num_games):
        deck = reset_deck()
        player_cards, house_card = blackjack_hand(deck)  # Initial hand
        player_total = sum(player_cards)
        
        while player_total < 21:  # Play until player busts or wins
            action = rnn_decide_action(model, player_total, house_card)
            X.append([player_total, house_card])  # Record the state (features)
            
            if action == 1:  # Hit
                player_cards.append(draw_card(deck))
                player_total = sum(player_cards)
                if player_total > 21:
                    y.append(-1)  # Bust, so this was a bad decision
                    break
                elif player_total == 21:
                    y.append(1)  # Reached 21, this was a good decision
                    break
            else:  # Stay
                house_cards = house_play(deck, [house_card])
                house_total = sum(house_cards)
                if house_total > 21 or player_total > house_total:
                    y.append(1)  # Player wins
                else:
                    y.append(-1)  # House wins
                break

    X = np.array(X)
    y = np.array(y)
    model.fit(X, y, epochs=50)
    return model

trained_model = train_model()
deck = create_a_deck()
result = play_game(trained_model, deck)
print(f"Game result: {'Player wins' if result == 1 else 'House wins' if result == 0 else 'Player busts'}")

Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.4905   
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: -1.0745 
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: -2.0269 
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: -2.7174 
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 960us/step - loss: -4.2719
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: -4.5258 
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 918us/step - loss: -7.3215
Epoch 8/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 993us/step - loss: -8.7317
Epoch 9/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: -10.1973 
Epoch 10/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step -

In [54]:
def test_model(model, num_games=1000):
    results = {
        "wins": 0,
        "losses": 0,
        "busts": 0
    }
    
    for _ in range(num_games):
        deck = create_a_deck()
        result = play_game(model, deck)
        
        if result == 1:
            results["wins"] += 1
        elif result == -1:
            results["losses"] += 1
        else:
            results["busts"] += 1
    
    # Print test results
    total_games = results["wins"] + results["losses"] + results["busts"]
    print(f"Total games: {total_games}")
    print(f"Wins: {results['wins']}")
    print(f"Losses: {results['losses']}")
    print(f"Busts: {results['busts']}")
    print(f"Win Rate: {results['wins'] / total_games * 100:.2f}%")
    
    return results

test_results = test_model(trained_model, num_games=1000)

Total games: 1000
Wins: 395
Losses: 601
Busts: 4
Win Rate: 39.50%
