In [1]:
from tictactoe import *

class QPlayer(BasePlayer):
    
    EXPLORE = 0.05
    ALPHA = 0.10
    EPSILON = 0.90
    
    def __init__(self, explore=EXPLORE, alpha=ALPHA, epsilon=EPSILON):
        self.q_table = np.zeros([3**9,9])
        self.explore = explore
        self.a = alpha
        self.e = epsilon
        
    def move(self, game, state):
        self.state = state
        self.action = np.argmax(self.q_table[self.state])
        if not game.is_empty(self.action) or np.random.random() < self.explore:
            self.action = game.sample()
        return self.action

    def update(self, game, state, reward):
        self.q_table[self.state][self.action] = ((1 - self.a) * self.q_table[self.state][self.action]
                                                  + self.a * (reward + self.e * np.amax(self.q_table[state])))
    def __str__(self):
        return 'QPlayer: ' + str(np.sum(self.q_table))

In [2]:
r0 = RandomPlayer()
r1 = RandomPlayer()

print('==============================================================')
g = Game(r0,r1)
for m in range(10):
    r0.reset_metrics()
    r1.reset_metrics()
    for n in range(1000):
        g.play()
    print(g.i,r0.wins,r0.losses,r0.ties,r0.wins+r0.losses+r0.ties,r0)
    
print('==============================================================')
g = Game(r1,r0)    
for m in range(10):
    r0.reset_metrics()
    r1.reset_metrics()
    for n in range(1000):
        g.play()
    print(g.i,r1.wins,r1.losses,r1.ties,r1.wins+r1.losses+r1.ties,r1)

1000 612 274 114 1000 I make random moves
2000 575 297 128 1000 I make random moves
3000 602 263 135 1000 I make random moves
4000 593 291 116 1000 I make random moves
5000 593 298 109 1000 I make random moves
6000 580 285 135 1000 I make random moves
7000 581 288 131 1000 I make random moves
8000 592 283 125 1000 I make random moves
9000 602 290 108 1000 I make random moves
10000 589 284 127 1000 I make random moves
1000 568 290 142 1000 I make random moves
2000 574 288 138 1000 I make random moves
3000 594 281 125 1000 I make random moves
4000 581 284 135 1000 I make random moves
5000 591 272 137 1000 I make random moves
6000 579 302 119 1000 I make random moves
7000 596 295 109 1000 I make random moves
8000 604 277 119 1000 I make random moves
9000 581 295 124 1000 I make random moves
10000 604 275 121 1000 I make random moves


In [4]:
r0 = RandomPlayer()
p0 = ProceduralPlayer()
g = Game(r0,p0)
for m in range(10):
    r0.reset_metrics()
    p0.reset_metrics()
    for n in range(1000):
        g.play()
    print(g.i,p0.wins,p0.losses,p0.ties,p0.wins+p0.losses+p0.ties,p0)

1000 280 580 140 1000 Pretty good procedural player
2000 292 577 131 1000 Pretty good procedural player
3000 285 608 107 1000 Pretty good procedural player
4000 286 574 140 1000 Pretty good procedural player
5000 305 556 139 1000 Pretty good procedural player
6000 280 585 135 1000 Pretty good procedural player
7000 300 577 123 1000 Pretty good procedural player
8000 293 599 108 1000 Pretty good procedural player
9000 290 585 125 1000 Pretty good procedural player
10000 304 571 125 1000 Pretty good procedural player


In [None]:
q = QPlayer()
r = RandomPlayer()
g = Game(r,r)
for m in range(10000):
    q.reset_metrics()
    r.reset_metrics()
    for n in range(10000):
        g.play()
    print(g.i,q.wins,q.losses,q.ties,q.wins+q.losses+q.ties,q)
    