In [2]:
""" Playgame routine.  Plays one game between StratA and StratB.  Outputs return to A
    Added output of cards and plays to allow ML"""

def playgame(GameDeck, StratA, StratB, verbose = False):

    # Deal 
    cardA = GameDeck.deal()
    cardB = GameDeck.deal()
    if verbose: print("Card A: ", cardA, " Card B: ", cardB)
    playA = ""
    playB = ""
    
    # Player A decides
    playA = StratA.play(cardA,"A")
    if verbose: print("Player A: ", playA)

    # if Player A pass, showdown for $2    
    if playA == "Pass":
        if cardA > cardB:
            payout = 1
        elif cardB > cardA:
            payout = -1
        else:
            payout = 0
    # if Player A raises, player B decides
    else:
        playB = StratB.play(cardB,"B")
        if verbose: print("Player B: ", playB)
        
        #if player B calls, showdown for $4
        if playB == "Call":
            if cardA > cardB:
                payout = 2
            elif cardB > cardA:
                payout = -2
            else:
                assert (cardA == cardB)
                payout = 0
        # if player B folds, A gets the ante
        else:
            payout = 1
    if verbose: 
        print("Payout: ",payout)
        print("")
    return {'winA':payout,'playA':playA,'playB':playB,'cardA':cardA,'cardB':cardB}
        

In [3]:
""" Deck class  Defines the deck.  For now only a discrete set 0 - n-1"""

class Deck:
    def __init__(self, decksize):
        self.decksize = decksize
        self.cards = range(self.decksize)      
        
    def deal(self):
        import random              # is it ok to have this here?
        card_delt = random.randint(0,self.decksize -1 )
        return card_delt
    

In [4]:
"""  Strategy Class.  Sets standards for all strategies 
    Create a subclass for each strategy"""

class Strategy:
    def __init__(self):
        pass
        # self.gamedeck = GameDeck
        # self.decksize = GameDeck.decksize
        
    def play(self,mycard,player):
        """ determine strategy for player, having been dealt card mycard.  
        If player = "A" return either 'Pass' or 'Raise' 
        If player = 'B' return either 'Fold' or 'Call' """ 
        pass
    



In [5]:
""" Vector based strategy.  

    NOT USED IN 3.0

    Paramaterized by a vector giving probability of aggressive (Raise/Call) strategy for each card.
    
    Old strategies
    
    Random - [1/2,1/2,1/2,1/2,1/2,1/2]
    Simple - e.g [0,0,0,1,1,1] 
    bluff - e.g [p,p,p,1,1,1]
    optimal - A [2/3,0,0,0,1,1]  B [0, 1/3,1/3,1,1,1]"""

class vectorstrat(Strategy):
    
    def __init__(self,aggprobs):
        self.aggprobs = aggprobs
        
    def play(self,mycard,player):
        import random
        if random.random() > self.aggprobs[mycard]:
            if player == "A":
                return 'Pass'
            else:
                return 'Fold'
        else:
            if player == "A":
                return "Raise"
            else:
                return 'Call'

        

In [6]:
""" Four Vector based strategy.  

    3.2 added heat
    
    NEW IN 3.1

    Paramaterized by two vectors giving expectation and variability.
    
    Decison also uses a heat parameter.  1 -- play proportionate to logit(weight).  0 - play max.
    
    Old strategies  - not obvious how to convert.
    
    Random - [0,0,0,0,0,0],[1,1,1,1,1,1],[0,0,0,0,0,0],[1,1,1,1,1,1],1
    
    Note B passive should be fixed at [-1,-1,-1,-1,-1,-1],[0,0,0,0,0,0,0]  -- folding will loose $1 with no uncertainty
    """ 
import math
def logit(x):
    p = 1 / ( 1 + math.exp(-x))
    return p

def logodds(p):
    if p == 0: return -10
    elif p == 1: return 10
    else:
        x = math.log(p / (1-p))
        return x


def calc_prob(expect1,var1,expect2,var2,heat):
    
    #calcs prob of choosing first choice, based on expectations, variance, and a heat hyperparameter
    # heat = 1 -- choose based on to dif / s.d.
    # heat = 0 -- always choose highest
    # heat -> infinity -- choose 50 / 50
    diff = expect1 - expect2
    sd = (var1 + var2)**(0.5)
    heat = max(heat,.01)
    scaleddiff = diff / (sd * heat)
    prob = logit(max(min(scaleddiff,10),-10))
    return prob
    
    

class fourvectorstrat(Strategy):
    
    def __init__(self,value_passive,var_passive,value_aggressive,var_aggressive,heat):
        self.value_passive = value_passive
        self.var_passive = var_passive
        self.value_aggressive = value_aggressive
        self.var_aggressive = var_aggressive
        self.heat = heat

        
    def play(self,mycard,player):
        import random
        probpassive = calc_prob(self.value_passive[mycard],self.var_passive[mycard],
                                self.value_aggressive[mycard],self.var_aggressive[mycard],
                                self.heat)
        if random.random() < probpassive:
            if player == "A":
                return 'Pass'
            else:
                return 'Fold'
        else:
            if player == "A":
                return "Raise"
            else:
                return 'Call'



In [7]:
calc_prob(1.5,3.3,-1,0,.5)

0.9400492802707288

In [8]:
d = Deck(6)
strata = ([-2,-2,-2,0,0,0],[1,.5,.1,1,.5,.1],[0,0,0,2,2,2],[1,.5,.1,1,.5,.1],1)
stratb = ([-1,-1,-1,-1,-1,-1],[0,0,0,0,0,0],[0,0,1,1,2,2],[1,.1,1,.1,1,.1],1)

for mycard in range(6) : 
    Aaggressive = calc_prob(strata[0][mycard],strata[1][mycard],strata[2][mycard],strata[3][mycard],1)
    Baggressive = calc_prob(stratb[0][mycard],stratb[1][mycard],stratb[2][mycard],stratb[3][mycard],1)                     
    print("Card: ",mycard," Player A: ",Aaggressive," Player B: ",Baggressive)

Card:  0  Player A:  0.19557031749304313  Player B:  0.2689414213699951
Card:  1  Player A:  0.11920292202211755  Player B:  0.0406102206733749
Card:  2  Player A:  0.011293882208110638  Player B:  0.11920292202211755
Card:  3  Player A:  0.19557031749304313  Player B:  0.0017885581618688018
Card:  4  Player A:  0.11920292202211755  Player B:  0.04742587317756678
Card:  5  Player A:  0.011293882208110638  Player B:  7.583817064491473e-05


In [49]:
""" A/B Learning  both learn together

    Modified for 2vector. 
    
    Will use logistic regression to set each of the vector parameters individually.
    
    Inline training -- play one game, instantly update.
    
   
    
    """
import math
def logit(x):
    p = 1 / ( 1 + math.exp(-x))
    return p

def logodds(p):
    if p == 0: return -10
    elif p == 1: return 10
    else:
        x = math.log(p / (1-p))
        return x


def train_AB_logit4(num_games = 10 ** 6,num_updates = 20 , alpha = 0.005,beta = .01, gamma = .0001, decksize = 6,
                  start_A =[], start_B =[],verbose = False):

    # alpha is a hyperparameter, speed of linear updates of expectations
    # beta is a hyperparameter, speed of EWMA updates of variances
    # gamma is a hyperparameter, speed of heat updates
    
    
    import random
    import copy
    
    # set paramaters and counters

    d = Deck(decksize)
    
    vec = [0 for i in range(decksize)]  # starts random parameter = 0 -> prob 50%
    if start_A == [] :    
        # default for A is 0 expectation 1 variance
        paramvectorA = ([0 for i in range(decksize)],[1 for i in range(decksize)],
                        [0 for i in range(decksize)],[1 for i in range(decksize)])
        heatA = 1
    else:
        paramvectorA = [x for x in start_A][0:3]
        heatA = start_A[4]
        
    if start_B == []:   # default for B has value = -1, var = 0 for passive -- fold always pays -1
        paramvectorB = ([-1 for i in range(decksize)],[0 for i in range(decksize)],
                        [0 for i in range(decksize)],[1 for i in range(decksize)])
        heatB = 1
    else:
        paramvectorB = [x for x in start_B][0:3]
        heatB = start_B[4]
    
    winningsA = 0
    winningsA_temp = 0
    training_updates =[]
    games_update = num_games / num_updates
 
    for i in range(num_games+1):
        
        # set strategy based on current parameters for both A and B
        sa = fourvectorstrat(*paramvectorA,heatA)    
        sb = fourvectorstrat(*paramvectorB,heatB)

        #play a game
        result = playgame(d,sa,sb,verbose = False)
        
        # Update strategies.  
        # Adjustments to expectation are now simple and consistent 
        # TO DO -- change structure of parameters so this can be done with names and not  vector numbers
        # TO DO -- change passing from playgame so that this can be done without repeats and if statements
    

        if result['playA'] == "Pass":   # if A passes
            
            # A learns how passing worked  -- adjust passive params (0 and 1)
            diffa = result['winA'] - paramvectorA[0][result['cardA']]
            paramvectorA[0][result['cardA']] += diffa * alpha
            paramvectorA[1][result['cardA']] = beta * diffa**2 + (1 - beta) * paramvectorA[1][result['cardA']]
            heatA = heatA * (1 - gamma * diffa)
            
            #if A passes, B gets to see showdown, so learns if calling would have worked
            # calling is aggressive  - vectors 2 and 3
            b_win_if_call = (result['cardB'] > result['cardA']) * 2 - 1      # did B win is 1 or 0.  this is 1 or -1
            if result['cardB'] == result['cardA']: b_win_if_call = 0
            diffb =  2 * (b_win_if_call) - paramvectorB[2][result['cardB']]
            paramvectorB[2][result['cardB']] += diffb * alpha
            paramvectorB[3][result['cardB']] = beta * diffb**2 + (1 - beta) * paramvectorB[3][result['cardB']]
            
        else:    # if A raises
            
            # adjust A's aggressive params (2 and 3)    
            diffa = result['winA'] - paramvectorA[2][result['cardA']]
            paramvectorA[2][result['cardA']] += diffa * alpha
            paramvectorA[3][result['cardA']] = beta * diffa**2 + (1 - beta) * paramvectorA[3][result['cardA']]
            heatA = heatA * (1 - gamma * diffa)
            #print("Pass:  CardA: ",result['cardA']," CardB: ",result['cardB'],"delta :",delta)
 
            if result['playB'] == "Fold":
                # if B folds, she learns nothing.  B's passive vectors are constant.  
                diffb =  - result['winA'] - paramvectorB[0][result['cardB']]
                heatB = heatB * (1 - gamma * diffb)
            
            else:
                # if B calls, her aggressive  (2 and 3) is updated 
                diffb =  - result['winA'] - paramvectorB[2][result['cardB']]
                heatB = heatB * (1 - gamma * diffb)
                #if result['cardB'] == 5:
                #    print("Card A:",result['cardA']," Card B: ",result['cardB'],
                #          " result:",-result['winA']," expect: ",round(paramvectorB[2][result['cardB']],4),
                #          " diff: ",round(diffb,4),
                #          " previous var: ",round(paramvectorB[3][result['cardB']],4))
                

                paramvectorB[2][result['cardB']] += diffb * alpha
                paramvectorB[3][result['cardB']] = beta * diffb**2 + (1 - beta) * paramvectorB[3][result['cardB']]
                

                
                # given the showdown, A learns not only the result of raising (0 and 1), but the result if he had passed
                a_win_if_pass = (result['cardA'] > result['cardB']) * 2 - 1
                if result['cardA'] == result['cardB']: a_win_if_pass = 0
                diffa = a_win_if_pass - paramvectorA[0][result['cardA']]
                paramvectorA[0][result['cardA']] += diffa * alpha
                paramvectorA[1][result['cardA']] = beta * diffa**2 + (1 - beta) * paramvectorA[1][result['cardA']]
                
        #if result['cardB'] == 5:
        #    print("Call:  CardA: ",result['cardA']," CardB: ",result['cardB'],"params :",
        #          paramvectorB[0][5]," ",paramvectorB[1][5])
            
        
        # track performance
        winningsA += result['winA']
        winningsA_temp += result['winA']
 
        # periodically update
        if i % games_update == 0 :
            if verbose:
                if i > 0: 
                    tempwinrate = round(winningsA_temp / games_update,4) 
                else: 
                    tempwinrate = "       "
                print(i, tempwinrate)
                print('A:',[(round(x,2),round(z,2)) for x,y,z,w in zip(*paramvectorA)])
                print('A:',[(round(y,2),round(w,2)) for x,y,z,w in zip(*paramvectorA)])
                print("B: ",[(round(x,2),round(z,2)) for x,y,z,w in zip(*paramvectorB)])
                print('B:',[(round(y,2),round(w,2)) for x,y,z,w in zip(*paramvectorB)])
                print('HeatA: ', heatA,"  HeatB:", heatB)
            training_updates += [{'games':i,'tempAwins':winningsA_temp,
                                  'logoddsA':copy.deepcopy(paramvectorA),'logoddsB':copy.deepcopy(paramvectorB),
                                 'heatA':heatA,'heatB':heatB}]
            winningsA_temp = 0
    
    if verbose:
        print("Done")
        winrateA = winningsA / num_games
        print("A's Winrate = ", winrateA)
        print("Final Strategies:")
        print('A:',[round(calc_prob(z,w,x,y,heatA)*100,2) for x,y,z,w in zip(*paramvectorA)]) # show prob aggressive
        print('B:',[round(calc_prob(z,w,x,y,heatB)*100,2) for x,y,z,w in zip(*paramvectorB)])        
        
    return {'num_games':num_games, 'winningsA':winningsA,'final model A':paramvectorA,
            'final model B':paramvectorB,'training_updates':training_updates}
        
    



In [50]:
t = train_AB_logit4(num_games = 10**6,num_updates = 10, alpha = 0.005, beta = 0.01, gamma = 0.0001, decksize = 6,
                  start_A =[], start_B =[],verbose = True)

0        
A: [(0, 0), (0, 0), (0, 0), (0, 0.01), (0, 0), (0, 0)]
A: [(1, 1), (1, 1), (1, 1), (1, 1.0), (1, 1), (1, 1)]
B:  [(-1, 0), (-1, 0), (-1, 0), (-1, 0), (-1, 0), (-1, 0)]
B: [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]
HeatA:  0.9999   HeatB: 1.0
100000 0.0931
A: [(-0.9, -0.72), (-0.61, -0.49), (-0.32, -0.08), (0.07, 0.3), (0.34, 0.69), (0.78, 1.3)]
A: [(0.08, 2.11), (0.48, 2.38), (0.77, 2.41), (0.85, 2.44), (0.7, 1.88), (0.18, 0.52)]
B:  [(-1, -1.72), (-1, -1.05), (-1, -0.13), (-1, 0.45), (-1, 1.03), (-1, 1.69)]
B: [(0, 0.47), (0, 2.33), (0, 3.46), (0, 3.08), (0, 2.17), (0, 0.49)]
HeatA:  0.7113625225875388   HeatB: 1.5266317062949215
200000 0.1439
A: [(-0.85, -0.67), (-0.57, -0.28), (-0.23, 0.01), (0.03, 0.26), (0.46, 0.88), (0.77, 1.25)]
A: [(0.13, 2.13), (0.51, 2.31), (0.76, 2.49), (0.78, 2.44), (0.59, 1.44), (0.17, 0.47)]
B:  [(-1, -1.65), (-1, -1.0), (-1, -0.17), (-1, 0.31), (-1, 1.16), (-1, 1.63)]
B: [(0, 0.58), (0, 2.17), (0, 3.13), (0, 3.14), (0, 1.98), (0, 0.6)]
He

Obviously, this method for training heat is not working.  
Minor issue -- heat not allways updated  -- e.g. for B if A passes.  This causes a drift.  minor b/c I could compute and adjust
More significant -- not clear what the deriviative of wins w.r.t. heat is -- my guess may not be a good approximation.  Also more significant -- Likely has very slow convergence 
Also - no reason why single heat for all cards will work well.

Back to drawing board
