In [None]:
# Functions for setting up and playing TicTacToe
import numpy as numpy
import copy
import random

# Generate an empty board
def getEmptyBoard():
    return [0,0,0,0,0,0,0,0,0]


# Generate a random board, never return a board that can't exist when following the game rules
def getRandomBoard():
    board = list(numpy.random.choice([0,1,2],(9)))
    if board.count(1) - board.count(2) < 0 or board.count(1) - board.count(2) > 1:
        return getRandomBoard()
    return board


# Show the board
def showBoard(board):
    S=""
    T=["·","O","X"]
    for i in [0,3,6]:
        S=S+T[board[i]]+" "+T[board[i+1]]+" "+T[board[i+2]]
        S=S+"\n"
    print (S)


# Check for winning line
def score(board):
    line=0
    if board[0]==board[1] and board[1]==board[2] and board[0]!=0:
        line=board[0]
    if board[3]==board[4] and board[4]==board[5] and board[3]!=0:
        line=board[3]
    if board[6]==board[7] and board[7]==board[8] and board[6]!=0:
        line=board[6]
        
    if board[0]==board[3] and board[3]==board[6] and board[0]!=0:
        line=board[0]
    if board[1]==board[4] and board[4]==board[7] and board[1]!=0:
        line=board[1]
    if board[2]==board[5] and board[5]==board[8] and board[2]!=0:
        line=board[2]

    if board[0]==board[4] and board[4]==board[8] and board[0]!=0:
        line=board[0]
    if board[2]==board[4] and board[4]==board[6] and board[2]!=0:
        line=board[2]  
    return line


# Check if the game is over
def gameOver(board):
    s=score(board)
    if board.count(0)==0 or s!=0:
        return True
    return False


# Make a move
def doMove(board,move,who):
    newBoard=copy.deepcopy(board)
    newBoard[move]=who
    return newBoard


# Get all legal moves
def getAllMoves(board):
    ret=[]
    for i in range(9):
        if board[i]==0:
            ret.append(i)
    return ret

In [None]:
# Test empty board
print("Empty board tests:")
testEmptyBoard = getEmptyBoard()
print(testEmptyBoard)
showBoard(testEmptyBoard)
print("Score:",score(testEmptyBoard))

# Test random boards
noOfTests = 3
print("\n%d random board tests:\n" % noOfTests)

for i in range(noOfTests):
    print("Run",i+1)
    testRandomBoard = getRandomBoard()
    showBoard(testRandomBoard)
    print("Score:",score(testRandomBoard),"\n")

# Test doMove
print("\nTest doMove():")
testDoMove=getEmptyBoard()
showBoard(testDoMove)
testDoMove=doMove(testDoMove,4,1)
showBoard(testDoMove)

# Test getAllMoves
print("\nTest getAllMoves():")
testGetAllMoves=getRandomBoard()
showBoard(testGetAllMoves)
print(getAllMoves(testGetAllMoves))

# Test gameOver
print("\nTest gameOver():")
for i in range(4):
    testGameOver=getRandomBoard()
    showBoard(testGameOver)
    print(gameOver(testGameOver),"\n---")

In [None]:
# Generate a random strategy function
def addPossibleBoards(board,who):
    moves=getAllMoves(board)
    V=numpy.ones(len(moves))
    V=V/V.sum()
    if score(board)==0 and list(board).count(0)!=0:
        randomStrategy[tuple(board)]=V
    flipMove=[0,2,1]
    for move in moves:
        newBoard=doMove(board,move,who)
        addPossibleBoards(newBoard,flipMove[who])


# Generate randomStrategy
randomStrategy=dict()
board=getEmptyBoard()
addPossibleBoards(board,1)

# Show info about the random strategy
print(len(randomStrategy))
print(randomStrategy[tuple(getEmptyBoard())])
board=[1,1,2,2,1,0,0,0,2]
showBoard(board)
print(randomStrategy[tuple(board)])

# Save the random strategy policy to a file
#import pickle
#pickle.dump(randomStrategy,open("./inClassRandomPolicy.p","wb"))

In [None]:
# Q-learning algorithm

In [None]:
# Load policies from files
import pickle
randomPolicy=pickle.load(open("./inClassRandomPolicy.p","rb"))
print("randomPolicy length:", len(randomPolicy))
perfectPolicy=pickle.load(open("./perfectPolicy.p","rb"))
print("perfectPolicy length:", len(perfectPolicy))
#reinforcementPolicy=pickle.load(open("./reinforcementPolicy.p","rb"))
#print("reinforcementPolicy length:", len(perfectPolicy))

In [None]:
# Discover policy files

## Perfect policy

#print(perfectPolicy)
#print(list(perfectPolicy.keys()))

# Test perfect policy
#for i in range(10):
#    keys=list(perfectPolicy.keys())
#    which=numpy.random.randint(0,4520)
#    board=keys[which]
#    showBoard(board)
#    print(perfectPolicy[board])
#    print("---")

## Random policy

#print(randomPolicy)
#print(list(randomPolicy.keys()))

# Test perfect policy
for i in range(10):
    keys=list(perfectPolicy.keys())
    which=numpy.random.randint(0,4520)
    board=keys[which]
    showBoard(board)
    print(perfectPolicy[board])
    print("---")

In [None]:
# Test perfect policy
for i in range(10):
    keys=list(perfectPolicy.keys())
    which=numpy.random.randint(0,4520)
    board=keys[which]
    showBoard(board)
    print(perfectPolicy[board])
    print("---")

In [None]:
# Functions for testing policies against eachother

# Play a game between two policies
def playTwoPolicies(policyA,policyB,verbose=False):
    flipMove=[0,2,1]
    who=1
    board=[0,0,0,0,0,0,0,0,0]
    done=False
    while not done:
        if verbose:
            showBoard(board)
        moves=getAllMoves(board)
        if who==1:
            p=policyA[tuple(board)]
        else:
            p=policyB[tuple(board)]
        if verbose:
            print(p)
        p/=p.sum()
        choice=numpy.random.choice(moves,p=p)
        board=doMove(board,choice,who)
        s=score(board)
        if len(moves)==1 or s!=0:
            done=True
        who=flipMove[who]
    if verbose:
        showBoard(board)
    if s==0:
        return 0
    return flipMove[who]


# Sample games between two policies
def sampleGames(policyA,policyB,nrOfGames=100):
    result=[0,0,0]
    for n in range(nrOfGames):
        winner=playTwoPolicies(policyA,policyB)
        result[winner]+=1
    result=numpy.array(result)
    return result/result.sum()

In [None]:
# Play a game between two policies
playTwoPolicies(randomPolicy,randomPolicy,verbose=False)

# Sample games between policies
print("randomPolicy vs randomPolicy",sampleGames(randomPolicy,randomPolicy,nrOfGames=100000))
print("perfectPolicy vs perfectPolicy",sampleGames(perfectPolicy,perfectPolicy,nrOfGames=100000))
print("perfectPolicy vs randomPolicy",sampleGames(perfectPolicy,randomPolicy,nrOfGames=100000))
print("randomPolicy vs perfectPolicy",sampleGames(randomPolicy,perfectPolicy,nrOfGames=100000))
#print("reinforcementPolicy vs reinforcementPolicy",sampleGames(reinforcementPolicy,reinforcementPolicy,nrOfGames=100000))
#print("reinforcementPolicy vs randomPolicy",sampleGames(reinforcementPolicy,randomPolicy,nrOfGames=100000))
#print("reinforcementPolicy vs perfectPolicy",sampleGames(reinforcementPolicy,perfectPolicy,nrOfGames=100000))
#print("randomPolicy vs reinforcementPolicy",sampleGames(randomPolicy,reinforcementPolicy,nrOfGames=100000))
#print("perfectPolicy vs reinforcementPolicy",sampleGames(perfectPolicy,reinforcementPolicy,nrOfGames=100000))