# Reinforcement Learning

Game is played with infinite deck of cards
Each draw from deck results in value between 1 and 10 (uniformly distributed) with a color
of red (probability 1/3) or black (probability 2/3)
At the start of the game both the player and dealer draw one black card

Each turn the player may either stick or hit
If player hits, then she draws another card from the deck
If player sticks, she receives no further cards
The value of players cards is added (black cards) or subtracted (red cards)
If player’s sum exceeds 21, or becomes less than 1, then she “goes bust” and loses the game
(reward -1)

If the player sticks, then the dealer starts taking turns. The dealer always sticks on any sum of
17 or greater, and hits otherwise. If the dealer goes bust, then the player wins; otherwise the
outcome-win (reward +1), lose (reward -1), or draw (reward 0) - is the player with the largest
sum.
1. Build a simulator that simulates the draw of cards and play of the game between
player and dealer.
2. Then, implement a Q-learning approach to maximize expected reward in this game

# Part 1

In [None]:
import random

In [None]:
class Game:
    #initializing
    def __init__(self):
        self.start = False
        self.player_action = None
        self.dealer_action = None
        self.player_val = 0
        self.dealer_val = 0
        self.reward = None
    #value when game just started
    def start_game(self):
        self.player_val +=1
        self.dealer_val +=1

    #how player's value changes according to decision   
    def player_decision(self):

        if self.player_action == "Hit":
            color = random.choices(["Black", "Red"], weights = [2/3, 1/3])
            number = random.randint(1,10)
            if color[0]== "Black":
                self.player_val = self.player_val + number
            elif color[0]== "Red":
                self.player_val = self.player_val - number

    #how dealer's value changes according to decision
    def dealer_decision(self):
        if self.player_action == "Stick":
            if self.dealer_val >= 17:
                self.dealer_action = "Stick"
            else:
                self.dealer_action = "Hit"
                color = random.choices(["Black", "Red"], weights = [2/3, 1/3])
                number = random.randint(1,10)
                if color[0] == "Black":
                    self.dealer_val = self.dealer_val + number
                elif color[0] == "Red":
                    self.dealer_val = self.dealer_val - number
            
    #calculation of reward
    def reward_calc(self):
        if self.player_val <1 or self.player_val>21:
            self.reward = -1
        elif self.dealer_val <1 or self.dealer_val >21:
            self.reward = +1
        elif self.player_val == self.dealer_val:
            self.reward = 0
    
    #Execution of game
    def game_processing(self):
        self.start_game()
#         print(self.player_val)
#         print(self.dealer_val)
        self.player_action = input("Choose your action: ")
        self.player_decision()
        self.dealer_decision()
        print(self.player_val)
        print(self.dealer_val)
        
        self.reward_calc()

        while (self.player_val >=1 and self.player_val <=21) and (self.dealer_val >=1 and self.dealer_val<=21) and (self.player_val != self.dealer_val):
            self.player_action = input("Choose your action: ")
            self.player_decision()
            self.dealer_decision()
            print(self.player_val)
            print(self.dealer_val)
            self.reward_calc()
        return self.reward
   

In [None]:
Game().game_processing()

Choose your action: Hit
4
1
Choose your action: Hit
9
1
Choose your action: Stick
9
-8


1

# Part 2

In [None]:
class QGame:
    #initializing
    def __init__(self):
        self.start = False
        self.player_action = None
        self.dealer_action = None
        self.player_val = 0
        self.dealer_val = 0
        self.reward = None
        self.Qvalue = []
        self.Q = []
        self.Q_last = []
#         self.Q_new = []
        self.weight_hit = 0.0
        self.weight_stick = 0.0
        self.epsilon = 0.2
        self.alpha = 0.3
        self.gamma = 0.9
    #value when game just started
    def start_game(self):
        self.player_val +=1
        self.dealer_val +=1

    #how player's value changes according to decision   
    def player_decision(self):

        if self.player_action == "Hit":
            color = random.choices(["Black", "Red"], weights = [2/3, 1/3])
            number = random.randint(1,10)
            if color[0]== "Black":
                self.player_val = self.player_val + number
            elif color[0]== "Red":
                self.player_val = self.player_val - number

    #how dealer's value changes according to decision
    def dealer_decision(self):
        if self.player_action == "Stick":
            if self.dealer_val >= 17:
                self.dealer_action = "Stick"
            else:
                self.dealer_action = "Hit"
                color = random.choices(["Black", "Red"], weights = [2/3, 1/3])
                number = random.randint(1,10)
                if color[0] == "Black":
                    self.dealer_val = self.dealer_val + number
                elif color[0] == "Red":
                    self.dealer_val = self.dealer_val - number
            
    #calculation of reward
    def reward_calc(self):
        if self.player_val <1 or self.player_val>21:
            self.reward = -1
        elif self.dealer_val <1 or self.dealer_val >21:
            self.reward = +1
        elif self.player_val == self.dealer_val:
            self.reward = 0
        else:
            self.reward = 0
    
    #updating Q table
    def update_Qtable(self):
        if len(self.Q) == 0:
            self.Qvalue = 1.0
        else:
#             print(self.Qvalue)
#             print(self.Q_last[2])
#             print("reward", self.reward)
#             print("gamma", self.gamma)
#             print("alpha", self.alpha)
            self.Qvalue = self.Q_last[2] + self.alpha * ((self.reward + self.gamma * max(self.Q,key=lambda item:item[2])[-1] - self.Q_last[-1]))
        
        self.Q.append(((self.player_val, self.dealer_val), self.player_action, self.Qvalue))
#         self.Q[self.state_action_last] = self.q_last + self.alpha * ((reward + self.gamma*max_q_next) - self.q_last)
      
    #Epsilon greedy as chosen bandit strategy to choose the best action
    def epsilon_greedy(self):
        if len(self.Q) != 0:
           chosen_action = max(self.Q,key=lambda item:item[2])[1]
           if chosen_action == 'Hit':
                self.weight_hit = 1-self.epsilon
                self.weight_stick = self.epsilon
           else:
                self.weight_stick = 1-self.epsilon
                self.weight_hit = self.epsilon            

        
    #Execution of game
    def game_processing(self):
        self.start_game()
#         print(self.player_val)
#         print(self.dealer_val)
        self.player_action = input("Choose your action: ")
        self.player_decision()
        self.dealer_decision()
        print(self.player_val)
        print(self.dealer_val)
        
        self.reward_calc()
        self.Q_last = ((self.player_val, self.dealer_val), self.player_action, 1.0)
        self.update_Qtable()
        self.epsilon_greedy()
        while (self.player_val >=1 and self.player_val <=21) and (self.dealer_val >=1 and self.dealer_val<=21) and (self.player_val != self.dealer_val):
            self.player_action = random.choices(["Hit", "Stick"], weights = [self.weight_hit, self.weight_stick])[0]
            print(self.player_action)
            self.player_decision()
            self.dealer_decision()
            print(self.player_val)
            print(self.dealer_val)
            self.reward_calc()
            self.Q_last = ((self.player_val, self.dealer_val), self.player_action, 1.0)
            self.update_Qtable()
            self.Q_last = (self.Q[-1])
            self.epsilon_greedy()
        return self.reward
   

In [None]:
QGame().game_processing()

Choose your action: Stick
1
-7


1