In [None]:
#Niveen Abdul-Mohsen (bvn9ad)
# Reinforcement Learning (CS 4771) - Fgure 5.1
# Monte Carlo On policy
# i used numpy for numerical operations and matplotlib for plotting

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from collections import defaultdict

class BlackjackEnvironment:
    """
    Blackjack Environment Implementation
    
    From Sutton & Barto Section 5.1, Example 5.1:
    - The object of blackjack is to obtain cards whose sum is as great as 
      possible without exceeding 21
    - All face cards count as 10, and an ace can count either as 1 or as 11
    - Each player competes independently against the dealer
    - The game begins with two cards dealt to both dealer and player
    - One of the dealer's cards is face up (showing card) and one is face down
    
    State Space (200 states total):
    - Player's current sum (12-21): 10 possible values
    - Dealer's showing card (Ace-10): 10 possible values  
    - Whether player holds a usable ace (True/False): 2 possible values
    - Total: 10 × 10 × 2 = 200 states
    
    Note: Player sums below 12 are not included because the player should 
    always hit with sum < 12 (no decision to be made)
    
    Reward Structure:
    - +1 for winning
    - -1 for losing
    - 0 for draw
    - All intermediate rewards are 0
    - Discount factor γ = 1 (no discounting) as stated in textbook
    
    Environment Assumptions (from textbook):
    - Cards are dealt from an infinite deck (with replacement)
    - No advantage to keeping track of cards already dealt
    """
    
    def __init__(self):
        """
        Initialize the blackjack environment
        """
        # TODO: Initialize deck probabilities for infinite deck assumption
        # Each card from 1-10 has equal probability (with 10 representing 10, J, Q, K)
        pass
    
    def draw_card(self):
        """
        Draw a card from infinite deck
        
        Returns:
            int: Card value (1-10, where 1 is Ace)
        
        Note from textbook: 
        - Cards are dealt with replacement (infinite deck assumption)
        - This simplifies the problem by making draws independent
        """
        # TODO: Implement random card draw
        # Face cards (J, Q, K) all count as 10
        # Ace is represented as 1 (can be counted as 11 via usable_ace flag)
        pass
    
    def usable_ace(self, hand):
        """
        Determine if the hand has a usable ace
        
        From textbook Section 5.1:
        "If the player holds an ace that he could count as 11 without going bust,
        then the ace is said to be usable. In this case it is always counted as 11
        because counting it as 1 would make the sum 11 or less, in which case there
        is no decision to be made because, obviously, the player should always hit."
        
        Args:
            hand: List of card values
            
        Returns:
            bool: True if hand has usable ace, False otherwise
        """
        # TODO: Check if hand contains an ace (value 1)
        # TODO: Check if counting ace as 11 keeps sum <= 21
        pass
    
    def sum_hand(self, hand):
        """
        Calculate the sum of a hand, accounting for usable ace
        
        Args:
            hand: List of card values
            
        Returns:
            int: Sum of hand (with ace counted optimally)
        """
        # TODO: Calculate sum
        # TODO: If usable ace exists, add 10 to sum (counting ace as 11 instead of 1)
        pass
    
    def is_bust(self, hand):
        """
        Check if hand is bust (sum > 21)
        
        Args:
            hand: List of card values
            
        Returns:
            bool: True if bust, False otherwise
        """
        # TODO: Return True if sum_hand(hand) > 21
        pass
    
    def reset(self):
        """
        Start a new episode
        
        From textbook:
        "The game begins with two cards dealt to both dealer and player.
        One of the dealer's cards is face up and the other is face down."
        
        Returns:
            state: Tuple of (player_sum, dealer_showing, usable_ace)
        """
        # TODO: Deal two cards to player
        # TODO: Deal two cards to dealer (one face up, one face down)
        # TODO: Check for natural (immediate 21)
        # TODO: Return initial state as (player_sum, dealer_showing, usable_ace)
        pass
    
    def step(self, action):
        """
        Execute one step in the environment
        
        Args:
            action: 0 = stick, 1 = hit
            
        Returns:
            next_state: Next state after action
            reward: Reward received
            done: Whether episode is complete
            
        From textbook Section 5.1:
        "The player's actions are to hit or to stick."
        """
        # TODO: If action is hit (1):
        #   - Draw a card
        #   - Add to player hand
        #   - Check if bust (done=True, reward=-1)
        #   - Update state
        
        # TODO: If action is stick (0):
        #   - Dealer plays according to fixed policy
        #   - Dealer hits on sum < 17, sticks on sum >= 17
        #   - Determine winner and assign reward
        #   - Set done=True
        pass
    
    def dealer_policy(self):
        """
        Dealer's fixed strategy
        
        From textbook Section 5.1:
        "The dealer hits or sticks according to a fixed strategy without choice:
        he sticks on any sum of 17 or greater, and hits otherwise."
        
        Returns:
            final_sum: Dealer's final hand sum
            is_bust: Whether dealer went bust
        """
        # TODO: Dealer hits until sum >= 17
        # TODO: Return final sum and bust status
        pass
    
    def natural(self, hand):
        """
        Check if hand is a natural (Ace + 10-card = 21 with 2 cards)
        
        From textbook:
        "If the player has 21 immediately (an ace and a 10-card), it is called 
        a natural. He then wins unless the dealer also has a natural, in which 
        case the game is a draw."
        
        Args:
            hand: List of card values
            
        Returns:
            bool: True if natural, False otherwise
        """
        # TODO: Check if hand has exactly 2 cards
        # TODO: Check if hand contains an ace and a 10-value card
        # TODO: Check if sum equals 21
        pass



def simple_policy(state):
    """
    The policy to evaluate (from Figure 5.1 caption)
    
    From textbook Section 5.1:
    "Consider the policy that sticks if the player's sum is 20 or 21, 
    and otherwise hits."
    
    This is a fixed, deterministic policy:
    - Stick (action 0) if player sum is 20 or 21
    - Hit (action 1) otherwise
    
    Args:
        state: Tuple of (player_sum, dealer_showing, usable_ace)
        
    Returns:
        action: 0 = stick, 1 = hit
    """
    # TODO: Extract player_sum from state
    # TODO: Return 0 (stick) if player_sum >= 20
    # TODO: Return 1 (hit) otherwise
    pass
