In [14]:
from tictactoe import *

class QPlayer(BasePlayer):
    
    EXPLORE = 0.05
    ALPHA = 0.10
    EPSILON = 0.90
    
    def __init__(self, explore=EXPLORE, alpha=ALPHA, epsilon=EPSILON):
        super().__init__(display=False)
        self.q_table = np.zeros([3**9,9])
        self.explore = explore
        self.a = alpha
        self.e = epsilon
        
    def move(self, game, state):
        self.state = state
        self.action = np.argmax(self.q_table[self.state])
        if not game.is_empty(self.action) or np.random.random() < self.explore:
            self.action = game.sample()
        return self.action

    def update(self, game, state, reward):
        self.q_table[self.state][self.action] = ((1 - self.a) * self.q_table[self.state][self.action]
                                                  + self.a * (reward + self.e * np.amax(self.q_table[state])))

In [15]:
r = RandomPlayer()
q = QPlayer()
g = Game(r,q)
for m in range(10):
    r.reset_metrics()
    q.reset_metrics()
    for n in range(1000):
        g.play()
    print(m,r,q)
    

0 RandomPlayer w/l/t=516/382/102 QPlayer w/l/t=382/516/102
1 RandomPlayer w/l/t=436/490/74 QPlayer w/l/t=490/436/74
2 RandomPlayer w/l/t=376/531/93 QPlayer w/l/t=531/376/93
3 RandomPlayer w/l/t=401/505/94 QPlayer w/l/t=505/401/94
4 RandomPlayer w/l/t=412/510/78 QPlayer w/l/t=510/412/78
5 RandomPlayer w/l/t=361/552/87 QPlayer w/l/t=552/361/87
6 RandomPlayer w/l/t=398/516/86 QPlayer w/l/t=516/398/86
7 RandomPlayer w/l/t=368/542/90 QPlayer w/l/t=542/368/90
8 RandomPlayer w/l/t=386/522/92 QPlayer w/l/t=522/386/92
9 RandomPlayer w/l/t=399/519/82 QPlayer w/l/t=519/399/82


In [18]:
p = PrettyGoodPlayer()
g = Game(p,q)
for m in range(10):
    p.reset_metrics()
    q.reset_metrics()
    for n in range(10000):
        g.play()
    print(m,p,q)

0 PrettyGoodPlayer w/l/t=8867/302/831 QPlayer w/l/t=302/8867/831
1 PrettyGoodPlayer w/l/t=8802/312/886 QPlayer w/l/t=312/8802/886
2 PrettyGoodPlayer w/l/t=8849/290/861 QPlayer w/l/t=290/8849/861
3 PrettyGoodPlayer w/l/t=8819/318/863 QPlayer w/l/t=318/8819/863
4 PrettyGoodPlayer w/l/t=8863/282/855 QPlayer w/l/t=282/8863/855
5 PrettyGoodPlayer w/l/t=8817/308/875 QPlayer w/l/t=308/8817/875
6 PrettyGoodPlayer w/l/t=8827/306/867 QPlayer w/l/t=306/8827/867
7 PrettyGoodPlayer w/l/t=8867/315/818 QPlayer w/l/t=315/8867/818
8 PrettyGoodPlayer w/l/t=8809/311/880 QPlayer w/l/t=311/8809/880
9 PrettyGoodPlayer w/l/t=8806/298/896 QPlayer w/l/t=298/8806/896


In [19]:
r = RandomPlayer()
g = Game(r,q)
for m in range(10):
    r.reset_metrics()
    q.reset_metrics()
    for n in range(1000):
        g.play()
    print(m,r,q)

0 RandomPlayer w/l/t=377/540/83 QPlayer w/l/t=540/377/83
1 RandomPlayer w/l/t=359/558/83 QPlayer w/l/t=558/359/83
2 RandomPlayer w/l/t=370/548/82 QPlayer w/l/t=548/370/82
3 RandomPlayer w/l/t=375/544/81 QPlayer w/l/t=544/375/81
4 RandomPlayer w/l/t=394/523/83 QPlayer w/l/t=523/394/83
5 RandomPlayer w/l/t=360/548/92 QPlayer w/l/t=548/360/92
6 RandomPlayer w/l/t=362/561/77 QPlayer w/l/t=561/362/77
7 RandomPlayer w/l/t=363/549/88 QPlayer w/l/t=549/363/88
8 RandomPlayer w/l/t=386/516/98 QPlayer w/l/t=516/386/98
9 RandomPlayer w/l/t=371/536/93 QPlayer w/l/t=536/371/93
