In [1]:
import numpy as np
from itertools import product
from IPython.display import clear_output
import pickle

from TicTacToeGame import TTTBoard, getkey
from RecursiveLearning import Recursivelearning
from QLearning import Qlearning
from PlayGame import ai_vs_random, ai_vs_ai, play_game

from time import time

In [2]:
#random game

def random_game():
    board = TTTBoard()
    while not board.done:
        xy = (np.random.randint(0,3), np.random.randint(0,3))
        if board.possible(xy):
            board = board.update(xy)
    return board

# for _ in random_game().history:
#     print(_['s'])
#     print('-----------------')

scores = []
for i in range(10000):
    scores.append(random_game().score())
print('1st player win: ' + str(scores.count(-1)))
print('2nd player win: ' + str(scores.count(1)))
print('Tie game: ' + str(scores.count(0)))

1st player win: 5772
2nd player win: 2958
Tie game: 1270


# Recursive Learning

In [3]:
t0 = time()
training_r = Recursivelearning()
training_r.train()
print('Training time: '+str(time()-t0))

f = open('RecursiveSolution', 'wb')
pickle.dump(training_r, f)
f.close()

Training time: 95.59861707687378


In [4]:
scores = []
for i in range(10000):
    scores.append(ai_vs_random(training_r))
print('Random 1st player: ' + str(scores.count(-1)))    
print('AI 2nd player: ' + str(scores.count(1)))
print('Tie game: ' + str(scores.count(0)))

Random 1st player: 0
AI 2nd player: 8107
Tie game: 1893


In [5]:
scores = []
for i in range(10000):
    scores.append(ai_vs_random(training_r, ai_first=True))
print('AI 1st player: ' + str(scores.count(-1)))    
print('Random 2nd player: ' + str(scores.count(1)))
print('Tie game: ' + str(scores.count(0)))

AI 1st player: 9944
Random 2nd player: 0
Tie game: 56


In [6]:
scores = []
for i in range(10000):
    scores.append(ai_vs_ai(training_r))
print('AI 1st player: ' + str(scores.count(-1)))    
print('AI 2nd player: ' + str(scores.count(1)))
print('Tie game: ' + str(scores.count(0)))

AI 1st player: 0
AI 2nd player: 0
Tie game: 10000


# Semi-Random Learning

In [7]:
t0 = time()
training_q = Qlearning()
n_games = 300000
for _ in range(n_games):
    training_q.simulate_game()
    if _%1000==0: 
        clear_output()
        print(_)
print('Training time: '+str(time()-t0))

f = open('QLearnedSolution', 'wb')
pickle.dump(training_q, f)
f.close()

299000
Training time: 331.6963872909546


In [8]:
scores = []
for i in range(10000):
    scores.append(ai_vs_random(training_q))
print('Random 1st player: ' + str(scores.count(-1)))    
print('AI 2nd player: ' + str(scores.count(1)))
print('Tie game: ' + str(scores.count(0)))

Random 1st player: 21
AI 2nd player: 8828
Tie game: 1151


In [9]:
scores = []
for i in range(10000):
    scores.append(ai_vs_random(training_q, ai_first=True))
print('AI 1st player: ' + str(scores.count(-1)))    
print('Random 2nd player: ' + str(scores.count(1)))
print('Tie game: ' + str(scores.count(0)))

AI 1st player: 9959
Random 2nd player: 0
Tie game: 41


In [10]:
#AI vs AI
scores = []
for i in range(10000):
    scores.append(ai_vs_ai(training_q))
print('AI 1st player: ' + str(scores.count(-1)))    
print('AI 2nd player: ' + str(scores.count(1)))
print('Tie game: ' + str(scores.count(0)))

AI 1st player: 0
AI 2nd player: 0
Tie game: 10000


# Play Yourself

In [11]:
play_game(training_q)

['_', '_', '_']
['_', '_', '_']
['_', '_', '_']

Your Move
x: 1
y: 1


['_', '_', '_']
['_', 'X', '_']
['_', '_', '_']

ai_move
(2, 0)

['_', '_', '_']
['_', 'X', '_']
['O', '_', '_']

Your Move
x: 0
y: 0


['X', '_', '_']
['_', 'X', '_']
['O', '_', '_']

ai_move
(2, 2)

['X', '_', '_']
['_', 'X', '_']
['O', '_', 'O']

Your Move
x: 2
y: 1


['X', '_', '_']
['_', 'X', '_']
['O', 'X', 'O']

ai_move
(0, 1)

['X', 'O', '_']
['_', 'X', '_']
['O', 'X', 'O']

Your Move
x: 1
y: 2


['X', 'O', '_']
['_', 'X', 'X']
['O', 'X', 'O']

ai_move
(1, 0)

['X', 'O', '_']
['O', 'X', 'X']
['O', 'X', 'O']

Your Move
x: 0
y: 2


['X', 'O', 'X']
['O', 'X', 'X']
['O', 'X', 'O']

['X', 'O', 'X']
['O', 'X', 'X']
['O', 'X', 'O']
Game Score: 0
