In [1]:
import sys

class tic_tac_toe_node:
    
    # Set of 3-index combinations that produce valid three-in-a-row
    #  We check these to see if we have any winners.  
    winners = [[0, 1 ,2], [3, 4, 5], [6, 7, 8],
               [0, 3, 6], [1, 4, 7], [2, 5, 8],
               [0, 4, 8], [2, 4, 6]]
          
    
    def __init__(self, state=None, depth=0):
        """
        Initializes a tic-tac-toe node
        """
        self.depth = depth
        self.winner = None
                
        if state == None:
            self.state = ["." for i in range(0,9)]
        else:
            self.state = state
            
    def __str__(self):
        """
        Creates a string representation of the tic-tac-toe board state
        """
        out = ""
        for i in range(0,9):
            if i%3==0: 
                out += "\n"
                
            out += self.state[i]       
            
        return out
    

    def getWinner(self):
        """
        returns the winner
        """
        self.playerCounts()
        return self.winner
            
    
    def playerCounts(self):
        """
        For each win combination, it counts the number of X's and O's in that combo.
        """
        winner = None
        counts = []
        
        # Traverse all winning cell combinations (i.e. three in a row combos)
        for win in self.winners:
            
            xCount = 0
            oCount = 0
            
            # Count number of player pieces in each location in the combo
            for i in win:
                if self.state[i] == 'X':
                    xCount += 1
                elif self.state[i] == 'O':
                    oCount+=1
            
            # Since we are counting, we can easily identify a winner and
            #  save it as a class variable.
            if xCount == 3:
                self.winner = 'X'
            if oCount == 3:
                self.winner = 'O'
            
            # Append the x and o counts for the combo.
            counts.append([xCount, oCount])
            
        return counts
    
   

    def terminal_test(self, ply):
        """
        Returns true if
        a. Max depth reached
        b. No free spaces
        c. Winner exists
        """
        
        #Depth
        if self.depth == ply:
            return True

        #Free spaces
        if self.state.count('.') == 0:
            return True
        
        # Winners?
        counts = self.playerCounts()
        if self.winner != None:
            return True
            
        return False
        
        
    
    def utility(self, player):
        """
        Determines the utility of a state relative to a given player
        """
        count_to_score_us = {0:0, 1:1, 2:10}
        count_to_score_them = {0:0, 1:1, 2:100}
        
        score = 0
        
        # Gets the count per row and determines if a winner
        #  exists (self.winner)
        counts = self.playerCounts()
        
        # If a player is the winner, maximum utiity
        # else, if opponent wins, the major penalty
        if self.winner != None:
            if self.winner == player:
                return 9999/self.depth
            else:
                return -9999/self.depth
            
        for count in counts:
        
            if (player == 'X'):
                if (count[1] == 0):
                    score += count_to_score_us[count[0]] - count_to_score_them[count[1]]
            else:
                if (count[0] == 0):
                    score += count_to_score_us[count[1]] - count_to_score_them[count[0]]
                
        return score
    
  
    def placePlayerMove(self, row, col):  
        """
        Places player's piece.  Returns true if successful.
        """
        if self.state[row*3 + col] == '.':
            self.state[row*3 + col] = 'X'
            return True
        return False
    
    def placeMove(self, index, player):
        """
        Places a player's piece at a particular index
        """
        self.state[index] = player
    
    def getSuccessors(self, player):
        """
        Creates new board states if player places a piece on one of the
        empty spaces.  This is returned as a list of successors.
        """
        empties = [i for i, val in enumerate(self.state) if val =='.']
        
        successors = []
        for i in empties:
            successor = tic_tac_toe_node(self.state[:], self.depth+1)
            successor.placeMove(i, player)
            successors.append(successor)
            
        return successors

In [2]:
class tic_tac_toe_game:

    """
    This class supports tic-tac-toe with either a minimmax (default)
    or alpha-beta pruning algorithm
    """
           
    def __init__(self, ply=2, algorithm="minimax", dbg=False):
        """
        Initializes the mini-max game with a default ply
        
        algorithm=['minimax' | 'alphabeta']
        """
        self.current = tic_tac_toe_node()
        self.ply = ply
        self.algorithm = algorithm
        self.dbg = dbg
        
    def dbgPrint(self, state, util):
        if self.dbg:
            print(state)
            print("Utility: {}".format(util))
            print("Depth: {}".format(state.depth)) 
        
    def getPlayerMove(self):
        """
        Queries the player for their next move.
        """
        valid = False
        while (valid == False):

            print("Player: Make Your Move")
            print("Row:")              
            row = int(input())
            print("Col:")      
            col = int(input())
            
            # Check if input is valid (input checking and confirming open)
            if row >= 0 and row < 3 and col >=0 and col < 3:
                valid = self.current.placePlayerMove(row, col)
            
            # If move is invali
            if valid == False: 
                print("Invalid input.  Try Again.")
                
    def getAgentMove(self):
        """
        Kicks off the mini-max agent for the AI
        """
        if self.algorithm == "minimax":
            _, self.current = self.MM_Max_Value(tic_tac_toe_node(self.current.state[:]))
        else:
            _, self.current = self.AB_Max_Value(tic_tac_toe_node(self.current.state[:]), -9999, 9999)
    
    ##### Minimax Implementation of Max_Value and Min_value #####  
    def MM_Max_Value(self, state):
        """
        Implements the Max method for Mini-Max
        """
        
        if state.terminal_test(self.ply):
            util = state.utility('O')
            self.dbgPrint(state, util)    
            return util, None
        
        successors = state.getSuccessors('O')
        v = -sys.maxsize-1
        best = None
        for s in successors:
            
            new_v,_ = self.MM_Min_Value(s)
            if new_v > v:
                best = s
                v = new_v
        best.depth = 0        
        return v, best
    
    
    def MM_Min_Value(self, state):
        """
        Implements the min method for Mini-Max
        """
        
        if state.terminal_test(self.ply):
            util = state.utility('O')
            self.dbgPrint(state, util)    
            return util, None
        
        successors = state.getSuccessors('X')
        v = sys.maxsize
        best = None
        for s in successors:
            
            new_v,_ = self.MM_Max_Value(s)
            if new_v < v:
                best = s
                v = new_v
        best.depth = 0     
        return v, best
 
    ##### Alphabeta Pruning Implementation of Max_Value and Min_value #####
    def AB_Max_Value(self, state, alpha, beta):
        """
        Implements the Max method for Mini-Max
        """
        
        if state.terminal_test(self.ply):
            util = state.utility('O')
            self.dbgPrint(state, util)    
            return util, None
        
        successors = state.getSuccessors('O')
        v = -sys.maxsize-1
        best = None
        for s in successors:
            
            #Implements v = max(v, min_value)
            new_v,_ = self.AB_Min_Value(s, alpha, beta)
            if new_v > v:
                best = s
                v = new_v
            
            # determines if pruning possible
            if v >= beta:
                print("pruning.")
                break
                
            # update alpha
            alpha = max(v, alpha)
            
        # Return the result
        best.depth = 0        
        return v, best  
    
    
    def AB_Min_Value(self, state, alpha, beta):
        """
        Implements the min method for Mini-Max
        """
        
        if state.terminal_test(self.ply):
            util = state.utility('O')
            self.dbgPrint(state, util)    
            return util, None
        
        successors = state.getSuccessors('X')
        v = sys.maxsize
        best = None
        for s in successors:
            
            #Implements v = min(v, min_value)
            new_v,_ = self.AB_Max_Value(s, alpha, beta)
            if new_v < v:
                best = s
                v = new_v
            
            # determines if pruning possible
            if v <= alpha:
                print("pruning.")
                break
                
            # update beta
            beta = min(v, beta)
        best.depth = 0     
        return v, best
            
    
    def playGame(self):
        """
        Main tic-tac-toe game loop
        """
        game_over = False
        move = -1
        
        # Iteratate until the game end condition is reached
        #  Winner or stalemate
        while (self.current.terminal_test(self.ply) == False):
            move += 1
            
            if move%2 == 0:
                print("\nCurrent State:")
                print(self.current)
                self.getPlayerMove()
            else:
                self.getAgentMove()

        # Determine the winner and print the result
        winner = self.current.getWinner()
        print("\n\nGame Over:")
        if winner == None: 
            print("\tStalemate.")
        else:
            print("\t" + winner + " Wins!")
            
        print("Final State:")
        print(self.current)
                


In [3]:
# Minimax

game = tic_tac_toe_game(ply=4, dbg=True)
game.playGame()


Current State:

...
...
...
Player: Make Your Move
Row:
0
Col:
0

XOX
OX.
...
Utility: 0
Depth: 4

XOX
O.X
...
Utility: 1
Depth: 4

XOX
O..
X..
Utility: 2
Depth: 4

XOX
O..
.X.
Utility: 1
Depth: 4

XOX
O..
..X
Utility: 2
Depth: 4

XOX
XO.
...
Utility: 10
Depth: 4

XOX
.OX
...
Utility: 10
Depth: 4

XOX
.O.
X..
Utility: 11
Depth: 4

XOX
.O.
.X.
Utility: 1
Depth: 4

XOX
.O.
..X
Utility: 11
Depth: 4

XOX
X.O
...
Utility: 1
Depth: 4

XOX
.XO
...
Utility: 0
Depth: 4

XOX
..O
X..
Utility: 2
Depth: 4

XOX
..O
.X.
Utility: 1
Depth: 4

XOX
..O
..X
Utility: 2
Depth: 4

XOX
X..
O..
Utility: 2
Depth: 4

XOX
.X.
O..
Utility: 1
Depth: 4

XOX
..X
O..
Utility: 2
Depth: 4

XOX
...
OX.
Utility: 0
Depth: 4

XOX
...
O.X
Utility: 1
Depth: 4

XOX
X..
.O.
Utility: 11
Depth: 4

XOX
.X.
.O.
Utility: 1
Depth: 4

XOX
..X
.O.
Utility: 11
Depth: 4

XOX
...
XO.
Utility: 10
Depth: 4

XOX
...
.OX
Utility: 10
Depth: 4

XOX
X..
..O
Utility: 2
Depth: 4

XOX
.X.
..O
Utility: 1
Depth: 4

XOX
..X
..O
Utility: 2
Depth: 4

X

..X
Utility: 1
Depth: 4

XXO
O.X
...
Utility: 1
Depth: 4

X.O
OXX
...
Utility: 0
Depth: 4

X.O
O.X
X..
Utility: 0
Depth: 4

X.O
O.X
.X.
Utility: 1
Depth: 4

X.O
O.X
..X
Utility: 1
Depth: 4

XX.
OOX
...
Utility: 1
Depth: 4

X.X
OOX
...
Utility: 1
Depth: 4

X..
OOX
X..
Utility: 1
Depth: 4

X..
OOX
.X.
Utility: 1
Depth: 4

X..
OOX
..X
Utility: 2
Depth: 4

XX.
O.X
O..
Utility: 2
Depth: 4

X.X
O.X
O..
Utility: 1
Depth: 4

X..
OXX
O..
Utility: 1
Depth: 4

X..
O.X
OX.
Utility: 1
Depth: 4

X..
O.X
O.X
Utility: 1
Depth: 4

XX.
O.X
.O.
Utility: 1
Depth: 4

X.X
O.X
.O.
Utility: 2
Depth: 4

X..
OXX
.O.
Utility: 1
Depth: 4

X..
O.X
XO.
Utility: 1
Depth: 4

X..
O.X
.OX
Utility: 1
Depth: 4

XX.
O.X
..O
Utility: 1
Depth: 4

X.X
O.X
..O
Utility: 1
Depth: 4

X..
OXX
..O
Utility: 1
Depth: 4

X..
O.X
X.O
Utility: 0
Depth: 4

X..
O.X
.XO
Utility: 0
Depth: 4

XOX
O..
X..
Utility: 2
Depth: 4

XO.
OX.
X..
Utility: 0
Depth: 4

XO.
O.X
X..
Utility: 1
Depth: 4

XO.
O..
XX.
Utility: 1
Depth: 4

XO.
O..
X.X
Utilit

Depth: 4

XXO
X.O
...
Utility: 11
Depth: 4

X.O
XXO
...
Utility: 10
Depth: 4

X.O
X.O
X..
Utility: -2499.75
Depth: 4

X.O
X.O
.X.
Utility: 11
Depth: 4

X.O
X.O
..X
Utility: 1
Depth: 4

XX.
XOO
...
Utility: 2
Depth: 4

X.X
XOO
...
Utility: 1
Depth: 4

X..
XOO
X..
Utility: -2499.75
Depth: 4

X..
XOO
.X.
Utility: 2
Depth: 4

X..
XOO
..X
Utility: 2
Depth: 4

XX.
X.O
O..
Utility: 3
Depth: 4

X.X
X.O
O..
Utility: 1
Depth: 4

X..
XXO
O..
Utility: 2
Depth: 4

X..
X.O
OX.
Utility: 2
Depth: 4

X..
X.O
O.X
Utility: 1
Depth: 4

XX.
X.O
.O.
Utility: 2
Depth: 4

X.X
X.O
.O.
Utility: 2
Depth: 4

X..
XXO
.O.
Utility: 2
Depth: 4

X..
X.O
XO.
Utility: -2499.75
Depth: 4

X..
X.O
.OX
Utility: 1
Depth: 4

XX.
X.O
..O
Utility: 11
Depth: 4

X.X
X.O
..O
Utility: 1
Depth: 4

X..
XXO
..O
Utility: 11
Depth: 4

X..
X.O
X.O
Utility: -2499.75
Depth: 4

X..
X.O
.XO
Utility: 10
Depth: 4

XOX
.XO
...
Utility: 0
Depth: 4

XO.
XXO
...
Utility: 1
Depth: 4

XO.
.XO
X..
Utility: 1
Depth: 4

XO.
.XO
.X.
Utility: 1
Depth: 4


Utility: 1
Depth: 4

XOX
...
.OX
Utility: 10
Depth: 4

XO.
X..
.OX
Utility: 10
Depth: 4

XO.
.X.
.OX
Utility: -2499.75
Depth: 4

XO.
..X
.OX
Utility: 10
Depth: 4

XO.
...
XOX
Utility: 10
Depth: 4

XXO
...
.OX
Utility: 1
Depth: 4

X.O
X..
.OX
Utility: 2
Depth: 4

X.O
.X.
.OX
Utility: -2499.75
Depth: 4

X.O
..X
.OX
Utility: 2
Depth: 4

X.O
...
XOX
Utility: 1
Depth: 4

XX.
O..
.OX
Utility: 1
Depth: 4

X.X
O..
.OX
Utility: 2
Depth: 4

X..
OX.
.OX
Utility: -2499.75
Depth: 4

X..
O.X
.OX
Utility: 1
Depth: 4

X..
O..
XOX
Utility: 2
Depth: 4

XX.
.O.
.OX
Utility: 2
Depth: 4

X.X
.O.
.OX
Utility: 11
Depth: 4

X..
XO.
.OX
Utility: 11
Depth: 4

X..
.OX
.OX
Utility: 11
Depth: 4

X..
.O.
XOX
Utility: 11
Depth: 4

XX.
..O
.OX
Utility: 1
Depth: 4

X.X
..O
.OX
Utility: 2
Depth: 4

X..
X.O
.OX
Utility: 1
Depth: 4

X..
.XO
.OX
Utility: -2499.75
Depth: 4

X..
..O
XOX
Utility: 2
Depth: 4

XX.
...
OOX
Utility: 1
Depth: 4

X.X
...
OOX
Utility: 1
Depth: 4

X..
X..
OOX
Utility: 2
Depth: 4

X..
.X.
OOX
Utility

2
Col:
2

XOX
OOX
..X
Utility: -2499.75
Depth: 4

XOX
OO.
X.X
Utility: 20
Depth: 4

XOX
OO.
.XX
Utility: 10
Depth: 4

XOX
XOO
..X
Utility: 10
Depth: 4

XOX
.OO
X.X
Utility: 20
Depth: 4

XOX
.OO
.XX
Utility: 10
Depth: 4

XOX
XO.
O.X
Utility: 10
Depth: 4

XOX
.OX
O.X
Utility: -2499.75
Depth: 4

XOX
.O.
OXX
Utility: 1
Depth: 4

XOX
.O.
.OX
Utility: 3333.0
Depth: 3

XOO
XOX
..X
Utility: 20
Depth: 4

XOO
XO.
X.X
Utility: -2499.75
Depth: 4

XOO
XO.
.XX
Utility: 10
Depth: 4

XOX
XOO
..X
Utility: 10
Depth: 4

XO.
XOO
X.X
Utility: -2499.75
Depth: 4

XO.
XOO
.XX
Utility: 1
Depth: 4

XOX
XO.
O.X
Utility: 10
Depth: 4

XO.
XOX
O.X
Utility: 20
Depth: 4

XO.
XO.
OXX
Utility: 10
Depth: 4

XO.
XO.
.OX
Utility: 3333.0
Depth: 3

XOO
XOX
..X
Utility: 20
Depth: 4

XOO
.OX
X.X
Utility: 10
Depth: 4

XOO
.OX
.XX
Utility: 10
Depth: 4

XOX
OOX
..X
Utility: -2499.75
Depth: 4

XO.
OOX
X.X
Utility: 10
Depth: 4

XO.
OOX
.XX
Utility: 1
Depth: 4

XOX
.OX
O.X
Utility: -2499.75
Depth: 4

XO.
XOX
O.X
Utility: 20
Depth: 

2
Col:
1

XOO
XOO
XXX
Utility: -2499.75
Depth: 4

XOO
XO.
OXX
Utility: 3333.0
Depth: 3

XOO
OOX
XXX
Utility: -2499.75
Depth: 4

XOO
.OX
OXX
Utility: 3333.0
Depth: 3

XOO
.O.
XXX
Utility: -4999.5
Depth: 2

XOX
OOO
.XX
Utility: 3333.0
Depth: 3

XOX
OOX
OXX
Utility: -2499.75
Depth: 4

XOO
OOX
XXX
Utility: -2499.75
Depth: 4

XOX
OOX
OXX
Utility: -2499.75
Depth: 4

XO.
OO.
XXX
Utility: -4999.5
Depth: 2

XOX
OOO
.XX
Utility: 3333.0
Depth: 3

XOX
XOO
OXX
Utility: 0
Depth: 4

XOO
XOO
XXX
Utility: -2499.75
Depth: 4

XOX
XOO
OXX
Utility: 0
Depth: 4

XO.
.OO
XXX
Utility: -4999.5
Depth: 2

XOX
OOX
OXX
Utility: -2499.75
Depth: 4

XOX
XOO
OXX
Utility: 0
Depth: 4

XOO
XO.
OXX
Utility: 3333.0
Depth: 3

XOX
XOO
OXX
Utility: 0
Depth: 4

XOO
.OX
OXX
Utility: 3333.0
Depth: 3

XOX
OOX
OXX
Utility: -2499.75
Depth: 4

Current State:

XO.
.O.
OXX
Player: Make Your Move
Row:
0
Col:
2

XOX
OOX
OXX
Utility: -4999.5
Depth: 2

XOX
XOO
OXX
Utility: 0
Depth: 2

Current State:

XOX
.OO
OXX
Player: Make Your Move
Row:

In [None]:
# Alpha-Beta Pruning
game = tic_tac_toe_game(algorithm="alphabeta", ply=4, dbg = True)
game.playGame()


Current State:

...
...
...
Player: Make Your Move
Row:
