<a href="https://colab.research.google.com/github/sushant21csu463/RLLAB/blob/main/BlackJack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

# Define constants for the Blackjack problem
NUM_STATES = 32  # Including usable ace and current sum
NUM_ACTIONS = 2  # 0: Stick, 1: Hit
NUM_EPISODES = 500000
GAMMA = 1.0  # Discount factor

# Initialize state values and state visit counts
V = np.zeros(NUM_STATES)
N = {}  # Dictionary to track visit counts for each state

def generate_episode():
    """Generate an episode using the basic Blackjack strategy."""
    episode = []
    usable_ace = False
    player_sum = 0

    # Initial state: (usable_ace, player_sum, dealer_card)
    state = (usable_ace, player_sum, np.random.randint(1, 11))

    while True:
        action = 1 if player_sum < 20 else 0  # Stick if sum is 20 or 21, hit otherwise
        episode.append((state, action))

        if action == 0:  # Stick
            break

        # Hit: Draw a new card
        card = np.random.randint(1, 11)
        player_sum += card

        # Check for usable ace
        if card == 1 and player_sum + 10 <= 21:
            usable_ace = True
            player_sum += 10

        # Check for bust
        if player_sum > 21:
            if usable_ace:
                usable_ace = False
                player_sum -= 10
            else:
                break

        state = (usable_ace, player_sum, state[2])  # Update state

    return episode

def run_monte_carlo():
    """Run Monte Carlo to estimate state values."""
    for _ in range(NUM_EPISODES):
        episode = generate_episode()
        G = 0  # Return

        # Update state values using Monte Carlo update rule
        for t in reversed(range(len(episode))):
            state, action = episode[t]
            G = GAMMA * G + 1  # Discounted return
            if state not in N:
                N[state] = 0
            N[state] += 1
            V[state[1]] += (G - V[state[1]]) / N[state]

if __name__ == "__main__":
    run_monte_carlo()

    # Print the estimated state values
    print("State Values:")
    for i in range(NUM_STATES):
        print(f"State {i}: {V[i]:.4f}")


State Values:
State 0: 4.3631
State 1: 0.0000
State 2: 4.0416
State 3: 3.8734
State 4: 3.7019
State 5: 3.5353
State 6: 3.3554
State 7: 3.1749
State 8: 3.0213
State 9: 2.7338
State 10: 2.5621
State 11: 3.1238
State 12: 2.9685
State 13: 2.7336
State 14: 2.6175
State 15: 2.4261
State 16: 2.2935
State 17: 2.1421
State 18: 2.0386
State 19: 1.8871
State 20: 1.0000
State 21: 1.0000
State 22: 0.0000
State 23: 0.0000
State 24: 0.0000
State 25: 0.0000
State 26: 0.0000
State 27: 0.0000
State 28: 0.0000
State 29: 0.0000
State 30: 0.0000
State 31: 0.0000
