In [63]:
""" Playgame routine.  Plays one game between StratA and StratB.  Outputs return to A
    Added output of cards and plays to allow ML"""

def playgame(GameDeck, StratA, StratB, heat = 1, verbose = False):

    # Deal 
    cardA = GameDeck.deal()
    cardB = GameDeck.deal()
    if verbose: print("Card A: ", cardA, " Card B: ", cardB)
    playA = ""
    playB = ""
    
    # Player A decides
    playA = StratA.play(cardA,"A",heat)
    if verbose: print("Player A: ", playA)

    # if Player A pass, showdown for $2    
    if playA == "Pass":
        if cardA > cardB:
            payout = 1
        elif cardB > cardA:
            payout = -1
        else:
            payout = 0
    # if Player A raises, player B decides
    else:
        playB = StratB.play(cardB,"B",heat)
        if verbose: print("Player B: ", playB)
        
        #if player B calls, showdown for $4
        if playB == "Call":
            if cardA > cardB:
                payout = 2
            elif cardB > cardA:
                payout = -2
            else:
                assert (cardA == cardB)
                payout = 0
        # if player B folds, A gets the ante
        else:
            payout = 1
    if verbose: 
        print("Payout: ",payout)
        print("")
    return {'winA':payout,'playA':playA,'playB':playB,'cardA':cardA,'cardB':cardB}
        

In [57]:
""" Deck class  Defines the deck.  For now only a discrete set 0 - n-1"""

class Deck:
    def __init__(self, decksize):
        self.decksize = decksize
        self.cards = range(self.decksize)      
        
    def deal(self):
        import random              # is it ok to have this here?
        card_delt = random.randint(0,self.decksize -1 )
        return card_delt
    

In [58]:
"""  Strategy Class.  Sets standards for all strategies 
    Create a subclass for each strategy"""

class Strategy:
    def __init__(self):
        pass
        # self.gamedeck = GameDeck
        # self.decksize = GameDeck.decksize
        
    def play(self,mycard,player,heat):
        """ determine strategy for player, having been dealt card mycard.  
        If player = "A" return either 'Pass' or 'Raise' 
        If player = 'B' return either 'Fold' or 'Call' """ 
        pass
    



In [59]:
""" Vector based strategy.  

    NOT USED IN 3.0

    Paramaterized by a vector giving probability of aggressive (Raise/Call) strategy for each card.
    
    Old strategies
    
    Random - [1/2,1/2,1/2,1/2,1/2,1/2]
    Simple - e.g [0,0,0,1,1,1] 
    bluff - e.g [p,p,p,1,1,1]
    optimal - A [2/3,0,0,0,1,1]  B [0, 1/3,1/3,1,1,1]"""

class vectorstrat(Strategy):
    
    def __init__(self,aggprobs):
        self.aggprobs = aggprobs
        
    def play(self,mycard,player):
        import random
        if random.random() > self.aggprobs[mycard]:
            if player == "A":
                return 'Pass'
            else:
                return 'Fold'
        else:
            if player == "A":
                return "Raise"
            else:
                return 'Call'

        

In [72]:
""" TWO Vector based strategy.  

    NEW IN 3.0

    Paramaterized by two vectors giving probability of value of (Raise/Call) strategy for each card.
    Decison also uses a heat parameter.  1 -- play proportionate to logit(weight).  0 - play max.
    
    Old strategies  - not obvious how to convert.
    
    Random - [0,0,0,0,0,0],[0,0,0,0,0,0],1
    """ 
import math
def logit(x):
    p = 1 / ( 1 + math.exp(-x))
    return p

def logodds(p):
    if p == 0: return -10
    elif p == 1: return 10
    else:
        x = math.log(p / (1-p))
        return x


def calc_prob(value1,value2,heat=1):
    
    #calcs prob of choosing first choice, based on two values and a temp
    # temp = 1 -- choose proportionate to logit of the values
    # temp = 0 -- always choose highest
    # temp -> infinity -- choose 50 / 50
    p1 = logit(value1)
    p2 = logit(value2)
    if heat > 0:
        prob = p1**(1/heat) / (p1**(1/heat) + p2**(1/heat))
    else:
        prob = (p1>p2 + 0.0)
    return prob
    
    

class twovectorstrat(Strategy):
    
    def __init__(self,value_aggressive,value_passive):
        self.value_aggressive = value_aggressive
        self.value_passive = value_passive
        
    def play(self,mycard,player,heat=1):
        import random
        probaggressive = calc_prob(self.value_aggressive[mycard],self.value_passive[mycard],heat)
        if random.random() > probaggressive:
            if player == "A":
                return 'Pass'
            else:
                return 'Fold'
        else:
            if player == "A":
                return "Raise"
            else:
                return 'Call'



In [73]:
d = Deck(6)
strata = twovectorstrat([1,1,1,1,1,1],[3,3,3,3,3,3])
stratb = twovectorstrat([0,0,0,0,0,0],[0,0,0,0,0,0])

for i in range(10) : playgame(d,strata,stratb,True,1)

Card A:  0  Card B:  5
Player A:  Raise
Player B:  Call
Payout:  -2

Card A:  0  Card B:  4
Player A:  Pass
Payout:  -1

Card A:  1  Card B:  4
Player A:  Pass
Payout:  -1

Card A:  0  Card B:  5
Player A:  Pass
Payout:  -1

Card A:  4  Card B:  2
Player A:  Raise
Player B:  Call
Payout:  2

Card A:  3  Card B:  5
Player A:  Raise
Player B:  Call
Payout:  -2

Card A:  5  Card B:  4
Player A:  Pass
Payout:  1

Card A:  3  Card B:  0
Player A:  Raise
Player B:  Call
Payout:  2

Card A:  1  Card B:  2
Player A:  Raise
Player B:  Call
Payout:  -2

Card A:  0  Card B:  1
Player A:  Pass
Payout:  -1



In [98]:
""" A/B Learning  both learn together

    Modified for 2vector. 
    
    Will use logistic regression to set each of the vector parameters individually.
    
    Inline training -- play one game, instantly update.
    
   
    
    """
import math
def logit(x):
    p = 1 / ( 1 + math.exp(-x))
    return p

def logodds(p):
    if p == 0: return -10
    elif p == 1: return 10
    else:
        x = math.log(p / (1-p))
        return x


def train_AB_logit2(num_games = 10 ** 6,num_updates = 20 , alpha = 0.005, decksize = 6,
                  start_A =[], start_B =[],heat = 1,verbose = False):

    
    import random
    import copy
    
    # set paramaters and counters

    d = Deck(decksize)
    
    vec = [0 for i in range(decksize)]  # starts random parameter = 0 -> prob 50%
    if start_A == [] :    
        paramvectorA = (copy.deepcopy(vec),copy.deepcopy(vec))
        
    if start_B == []:   # default for B is also 50%
        paramvectorB = (copy.deepcopy(vec),copy.deepcopy(vec))
   
    
    winningsA = 0
    winningsA_temp = 0
    training_updates =[]
    games_update = num_games / num_updates
    
    
    for i in range(num_games+1):
        
        # set strategy based on current parameters for both A and B
        sa = twovectorstrat(*paramvectorA)    
        sb = twovectorstrat(*paramvectorB)

        #play a game
        result = playgame(d,sa,sb,heat,verbose = False)
        
        # Update strategies.  
        # Direction of update now confusing, so let's go for explicitiness over efficiency.  
        # We will update only the vector for the action taken.  
        # Also using poker rules -- if B folds, they don't see each other's card        
        
       
        if result['playA'] == "Pass":
            
            # we will adjust A's passive vector up if we would have lost, down if we would have won, no change for a tie
    
            if result['cardA'] > result['cardB'] :
                delta =  - alpha
            elif result['cardA'] < result['cardB']:
                delta = alpha
            else:
                delta = 0
            paramvectorA[0][result['cardA']] += delta
            #print("Pass:  CardA: ",result['cardA']," CardB: ",result['cardB'],"delta :",delta)
            
            # if A passes, B has no action, so no adjustment
        else:    # if A raises
            # if A raises, his active vector is rewarded by the payout +1 for a fold, +2 for call/win, -2 for a call/loss     
            delta = alpha * result['winA']
            paramvectorA[1][result['cardA']] += delta
            #print("Raise:  CardA: ",result['cardA']," CardB: ",result['cardB'],"delta :",delta)
            # if A raises, then B acts, so we need to adjust B, based on his action
            
            if result['playB'] == "Fold":
                # if B folds, he learns nothing.  Let's keep this vector constant.  For consistency, it should always be zero.
                pass
            else:
                # if B raises, his reward is his payout - the negative of A's
                delta = - alpha * result['winA']
                paramvectorB[1][result['cardB']] += delta
                
        if result['cardB'] == 5:
            print("Call:  CardA: ",result['cardA']," CardB: ",result['cardB'],"params :",
                  paramvectorB[0][5]," ",paramvectorB[1][5])
            
        # avoid over/underflow errors  Likely a better method, but this works within 1%
        for i in (0,1):
            paramvectorA[i][result['cardA']] = max(-10,paramvectorA[i][result['cardA']])
            paramvectorA[i][result['cardA']] = min(10,paramvectorA[i][result['cardA']])
            paramvectorB[i][result['cardB']] = max(-10,paramvectorB[i][result['cardB']])
            paramvectorB[i][result['cardB']] = min(10,paramvectorB[i][result['cardB']])
    
        
        # track performance
        winningsA += result['winA']
        winningsA_temp += result['winA']
 
        # periodically update
        if i % games_update == 0 :
            if verbose:
                if i > 0: 
                    tempwinrate = round(winningsA_temp / games_update,4) 
                else: 
                    tempwinrate = "       "
                print(i, tempwinrate,'A:',[round(calc_prob(x,y,1)*100,2) for x,y in zip(*paramvectorA)],
                      " B: ",[round(calc_prob(x,y,1)*100,2) for x,y in zip(*paramvectorB)])    
            training_updates += [{'games':i,'tempAwins':winningsA_temp,
                                  'logoddsA':copy.deepcopy(paramvectorA),'logoddsB':copy.deepcopy(paramvectorB)}]
            winningsA_temp = 0
    
    if verbose:
        print("Done")
        winrateA = winningsA / num_games
        print("A's Winrate = ", winrateA)
        print("Final Strategies:")
        print('A:',[round(calc_prob(x,y,1)*100,2) for x,y in zip(*paramvectorA)])
        print('B:',[round(calc_prob(z,w,1)*100,2) for z,w in zip(*paramvectorB)])        
        
    return {'num_games':num_games, 'winningsA':winningsA,'final model A':paramvectorA,
            'final model B':paramvectorB,'training_updates':training_updates}
        
    



In [99]:
t = train_AB_logit2(num_games = 100,num_updates = 20 , alpha = 0.005, decksize = 6,
                  start_A =[], start_B =[],verbose = True,heat = 1)

Call:  CardA:  1  CardB:  5 params : 0   0
Call:  CardA:  3  CardB:  5 params : 0   0
Call:  CardA:  4  CardB:  5 params : 0   0.01
Call:  CardA:  2  CardB:  5 params : 0   0.01
Call:  CardA:  2  CardB:  5 params : 0   0.01
Call:  CardA:  1  CardB:  5 params : 0   0.01
Call:  CardA:  5  CardB:  5 params : 0   0.01
Call:  CardA:  4  CardB:  5 params : 0   0.02
Call:  CardA:  4  CardB:  5 params : 0   0.03
Call:  CardA:  2  CardB:  5 params : 0   0.03
Call:  CardA:  5  CardB:  5 params : 0   0.03
Call:  CardA:  5  CardB:  5 params : 0   0.03
Call:  CardA:  5  CardB:  5 params : 0   0.03
Call:  CardA:  0  CardB:  5 params : 0   0.03
Done
A's Winrate =  0.15
Final Strategies:
A: [50.37, 50.18, 50.56, 49.33, 49.5, 49.13]
B: [50.63, 50.13, 49.88, 49.88, 49.38, 49.63]


In [36]:
"""Challenge Routine.
Plays n games between two strategies and returns the net result.
Used for testing
"""

def challenge(num_games,strata,stratb,strataname = "",stratbname = "",verbose = False,heat=1):

    decksize = 6
    d = Deck(decksize)

    a_net_wins = 0

    if verbose: print("Player A: ", strataname,"   Player B: ", stratbname )

    for i in range (num_games):
        a_net_wins += playgame(d,strata,stratb,heat)['winA']
        if verbose and i % 1000 == 0: print(i," games played")

    if verbose: print(strataname, " won $", a_net_wins, "  $", a_net_wins / num_games, " per game.")
    
    return a_net_wins

In [363]:
"""Second compare.  Used to compare outcomes.

   Moving from ad-hoc review of performance to something better.


'num_games':num_games, 'winnings':winnings,'final model':paramvector,'training_updates':training_updates

"""
def compare_outcomes (args):
    
    # args array of - dict.  'name' 'decksize' 'alpha' 'initial' 'opponent'
    
    results =[]
    
    print("Name  WinRate      Probvector")
    
    for arg in args:
            
        results = train_A_logit(num_games = arg['num_games'], num_updates = arg['num_updates'],  
                             alpha = arg['alpha'], decksize = arg['decksize'],
                             start_strategy = arg['initial'], strategyB = arg['opponent'],
                             verbose = arg['verbose'],pass_win_reward = arg['pass win reward'])
        arg['winrate'] = results['winnings'] / results['num_games']
        arg['model_logodds'] = results['final model']
        arg['model_probs'] = [round(logit(x)*100,1) for x in results['final model']]
        
        print(arg['name']," $",round(arg['winrate'],2),"  ",arg['model_probs'])    
 
    
    
    

In [371]:
"""Tournament.  Runs a tournament among several A and B strategies.

    Used some in testing, but not important in v1"""

num_games = 1000000
verbose = True

Astrategies = []
s = vectorstrat([.5,.5,.5,.5,.5,.5])
Astrategies += [{'strat':s,'name':"Random    "}]
s = vectorstrat([100.0, 97.1, 50.0, 0.4, 0.0, 0.0])
Astrategies += [{'strat':s,'name':"Original "}]
s = vectorstrat([99.9, 99.7, 99.9, 99.0, 100.0, 100.0])
Astrategies += [{'strat':s,'name':"P/W Reward -1"}]


Bstrategies = []
s = vectorstrat([.5,.5,.5,.5,.5,.5])
Bstrategies += [{'strat':s,'name':"Random    "}]
s = vectorstrat([0,0,0,1,1,1])
Bstrategies += [{'strat':s,'name':"Def Opponent"}]
s = vectorstrat([0,0,1,1,1,1])
Bstrategies += [{'strat':s,'name':"Simple 2"}]


for bstrat in Bstrategies:
    bstrat['profit'] = 0
    
for astrat in Astrategies:
    astrat['profit'] = 0
    for bstrat in Bstrategies:
        if verbose: print (astrat['name']," playing ",bstrat['name'])
        awins = challenge(num_games,astrat['strat'],bstrat['strat'],verbose = False)
        astrat['profit'] += awins
        bstrat['profit'] -= awins
        astrat[bstrat['name']] = awins
    astrat['average'] = astrat['profit'] / (num_games * len(Bstrategies))
    
for bstrat in Bstrategies:
    bstrat['average'] = bstrat['profit'] / (num_games * len(Astrategies))
    

print()
print ("Tournament Results -- winnings for Strategy A")
print("                        Strategy B")
print ("Strategy A    | ", end = "")
for b in Bstrategies:
    print(b['name'], end =" | ")
print("Total")
print("_______________________________________________")
for a in Astrategies:
    print(a['name'], end = "    |")
    for b in Bstrategies:
        print(a[b['name']], end = "         |")
    print(a['profit'])
print("Total         |", end = "")
for b in Bstrategies:
    print(-b['profit'], end = "         |")
print(-sum([b['profit'] for b in Bstrategies]))
    
        
    
                            


Random      playing  Random    
Random      playing  Def Opponent
Random      playing  Simple 2
Original   playing  Random    
Original   playing  Def Opponent
Original   playing  Simple 2
P/W Reward -1  playing  Random    
P/W Reward -1  playing  Def Opponent
P/W Reward -1  playing  Simple 2

Tournament Results -- winnings for Strategy A
                        Strategy B
Strategy A    | Random     | Def Opponent | Simple 2 | Total
_______________________________________________
Random        |251530         |-436         |-54866         |196228
Original     |280976         |-23670         |-206499         |50807
P/W Reward -1    |498159         |6         |-111341         |386824
Total         |1030665         |-24100         |-372706         |633859


In [299]:
"""Compare learning.  

Not used in 1.1

   Compares two learning set-ups and graphs covergence stats.  
   Use to set hyperparameters (num_games, alpha) and test covergence
   Moving from ad-hoc review of performance to something better.

    TO DO (maybe)
    make graphics

"""
def av_vol(paramseries):
    # computes the moving GARCH vols of a timeseries of vectors, then takes the mean of the vector at each time.
    
    decayfactor = .5
    
    movingvars = [0 for i in paramseries[0]]
    previous = paramseries[0]
    av_vols =[]
    
    for newvalues in paramseries:
        movingvars = [decayfactor * oldmoving + (1 - decayfactor) * (old-new)**2 
                      for oldmoving,old,new in zip(movingvars,previous,newvalues)]
        movingvols = [x**0.5 for x in movingvars]
        av_vols += [(sum(movingvols)/len(movingvols))]
        previous = newvalues
        
    return av_vols
        
    


def compare (args,num_games = 10 ** 6, num_updates = 20):
    
    # args array of - dict.  'name' 'decksize' 'alpha' 'initial' 'opponent'
    
    tempwins = []
    games = []
    logodds = []
    performance = []
    stability = []
    
    for arg in args:
    
        arg['tempwins'] = []
        arg['games']= []
        arg['logodds'] = []
        arg['performance'] = []
        arg['stability'] = []
        
        results = train_A_logit(num_games, num_updates, 
                             alpha = arg['alpha'], decksize = arg['decksize'],
                             start_strategy = arg['initial'], strategyB = arg['opponent'],verbose = False)
  
        # transform results so they can be accessed easily -- Find better way to pass these.
    
        arg['tempwins'] += [[update['tempwins'] for update in results['training_updates']]]
        arg['games'] += [[update['games'] for update in results['training_updates']]]
        arg['logodds'] += [[update['logodds'] for update in results['training_updates']]]

        # compute performance and stability
        arg['performance'] += [ (win / (num_games/num_updates)) for win in arg['tempwins'][-1]]
        arg['stability'] += [av_vol(arg['logodds'][-1])]
 
    
    
    print ("                   ",end ="")
    for arg in args: 
        print(arg['name'],"               ",end = "")
    print("")
    print ("Games      ",end ="")
    for arg in args: 
        print("Performance   Stability   ",end ="")
    print("")
    
    #below is very ugly but cant figure out a better way
    if len(args) == 2:
        for g,p1,s1,p2,s2 in zip(args[0]['games'][0],args[0]['performance'],args[0]['stability'][0],
                                 args[1]['performance'],args[1]['stability'][0]):
            print(g,"     ",round(p1,4),"      ",round(s1,4),"     ",round(p2,4),"      ",round(s2,4))
    elif len(args)== 3:
        for g,p1,s1,p2,s2,p3,s3 in zip(args[0]['games'][0],args[0]['performance'],args[0]['stability'][0],
                                 args[1]['performance'],args[1]['stability'][0],
                                 args[2]['performance'],args[2]['stability'][0]):
            print(g,"     ",round(p1,4),"      ",round(s1,4),"     ",round(p2,4),"      ",round(s2,4),
                 "     ",round(p3,4),"      ",round(s3,4))
    else:
        for g,p1,s1,p2,s2,p3,s3,p4,s4 in zip(args[0]['games'][0],args[0]['performance'],args[0]['stability'][0],
                                 args[1]['performance'],args[1]['stability'][0],
                                 args[2]['performance'],args[2]['stability'][0],
                                 args[3]['performance'],args[3]['stability'][0],):
            print(g,"     ",round(p1,4),"      ",round(s1,4),"     ",round(p2,4),"      ",round(s2,4),
                 "     ",round(p3,4),"      ",round(s3,4), "     ",round(p4,4),"      ",round(s4,4))
