This is an application that attempts to simulate the game of k-NIM in the case k = 2. 
feature additions will employ Monte Carlo Methods as we attempt to understand game dynamics further.

Our next stage is databuilding through implementing a SQL server and testing the mondel. 

Credits:  
        John Tan Chong Min for his python tutorials and his work in writing a UCT / MCTS for classical NIM. 
        Gelly and Silver for there work on MC-Rave in Computer GO



In [1138]:
#libraries that are used in the program
from copy import deepcopy
import numpy as np
import random 
from treelib import Node, Tree

In [1139]:
#helpers to avoid duplicates 

#check if two arrays are unordered matches:
def is_match(a,b): 
    match = True
    for item in a: 
        if a.count(item) != b.count(item):
            match = False
            break
    return match

#check if an array has such a match in a collection of other arrays 
def occurs(collection, element):
    has_element = False
    for item in collection:
        if is_match(item, element):
            has_element = True
            break

    return has_element

def all_even(vector):
    result = True
    for x in vector: 
        if x % 2  == 1 :
            result = False
            break
    return result

def as_str(array): 
    string = ''.join(str(char) for char in array)
    return string 
 

In [1140]:
class two_NIM:

    def __init__(self, position):

        self.position = position
        self.original_position = position 
        self.sum = sum(position) 
        self.in_play = True
        self.turn = 1
        self.descendants = self.plays()
        self.reward = 1

    def move(self, play):

        assert(play in self.descendants) 

        self.position = play #update position
        self.sum = sum(play) #update sum 
        self.descendants = self.plays() #update descendants
        self.turn = self.turn * -1  # update turn

        if self.over() == True:  #check loss
            self.in_play = False #end game
            if(self.turn ==  1):
                self.reward = 0  #punish losers

    def over(self):
        return (self.sum == 0) # game ends when sum accross all heaps is zero

    def reset(self):
        self.position = self.original_position
        self.sum = sum(self.position)
        self.descendants = self.plays()
        self.reward = 1
        self.turn = 1
        self.in_play = True

    def plays(self): 
        #calculate all descendants: (currently we remove unordered duplicates)
        moves = []
        for index, x in enumerate(self.position):
            child = deepcopy(self.position)
            if(x > 0):
                child[index] -= 1
                if not occurs(moves, child):
                    moves.append(child)

        for indexx, x in enumerate(self.position):
            for indexy, y in enumerate(self.position):
                child = deepcopy(self.position)
                if((x > 0) and (y > 0) and (indexx != indexy)): 
                    child[indexx] -= 1
                    child[indexy] -= 1 
                    if not occurs(moves, child):
                        moves.append(child)
        
        return moves 

In [1141]:
# a random player just selects a random position from an array of positions. 
def random_player(position):
    return position[random.randint(0, len(position)-1)]

In [1142]:
#play a random game and record the reward [1 if win, -1 if loss]
def random_expirament(position):
    if (sum(position) == 0): #just in case we start empty
        return 1
    else: 
        return Game(random_player,random_player, position, True)

In [1143]:
# a state (i.e. game state) can be thought of as a node in the game tree
# each node has a parent and a set of children
class State: 
    def __init__(self, position): 
            #self.parent = parent #parent node
            self.position = position #game state as a position 
            #self.children = [] # the children of the node in the game tree
            self.count = 0
            self.value = 0
            
    def update(self, reward): #update the value and count of the node
          self.count += 1
          self.value = (self.value + reward)/self.count

Implementing greedy monte carlo with an AMAF. 

The basic assumption that makes this algorithm effective is that any move in two_NIM is equally good wherever it is played in the game tree. 
THat is why AMAF is a really effective mechanism here. 


In [1144]:
def BACKUP(game_tree: Tree, nodes: list, reward): 

    #we go through nodes and update all the tree
    iter = 0

    while(iter <= len(nodes)-1):

        node = game_tree.get_node(nodes[iter])
        #print(nodes[iter])
        #print(f"value: {node.data.value}, count:{node.data.count}")
        if iter % 2 == 0:
            node.data.update(reward)
        else: 
            node.data.update(abs(reward-1)) 
        #print(f"updated value: {node.data.value}, updated count:{node.data.count}")
        iter += 1

In [1145]:
# as we learn more about the game, we can update default_policy to improve performance. 
# At the moment, the default policy is just random selection. 

def default_policy(game):
    return random_player(game.descendants)

In [1146]:
#evaluate each possible move and select the best or worst one depending on if it is your turn or not. 
def best_move(game : two_NIM, game_tree : Tree, turn: int): 
    
    #some default values
    best_descendant = None
    max_value = 0
    worst_descendant = None
    min_value = 1
    has_child = False



    #loop through and find the best move
    for descendant in game.descendants:
        ID = as_str(descendant)

        if(game_tree.contains(ID)):
            has_child = True
            node = game_tree.get_node(ID)        
            if node.data.value >=  max_value: 
                best_descendant = descendant
                max_value = node.data.value
            if node.data.value <=  min_value: 
                worst_descendant = descendant
                min_value = node.data.value

    if has_child == False :
        return random_player(game.descendants) # if the node was childless, return a random move
    elif turn == 1 :
        return worst_descendant #if it is your turn, choose the worst option (you want to minimize you opponents expected chances)
    else:
        return best_descendant #if it is not our turn, choose the best one. 

In [1147]:
# a simulation in which both players follow the default policy in every move
# at the moment, this is pure Monte Carlo

#initial debugging complete

def default_sim(game: two_NIM, game_tree: Tree): 


    node = game_tree.get_node("root")  # to track the nodes
    nodes = ["root"]
    
    while game.in_play: 

        next_move = default_policy(game) # select a move based on the default policy
        ID = as_str(next_move)
        nodes.append(ID)

        if not game_tree.contains(ID): 
            #when the node is not in the tree, we add a new one
            ndata = State(next_move)
            new_node = game_tree.create_node(None, ID, parent = node.identifier, data = ndata)
            node = new_node
        else:
            node = game_tree.get_node(ID) # move to the node 
           
        game.move(next_move) #move the game into the next position

    BACKUP(game_tree, nodes, game.reward) #update the tree

In [1148]:
# the tree policy
def tree_sim(game: two_NIM, game_tree: Tree): 

    node = game_tree.get_node("root")  # to track the nodes
    nodes = ["root"]

    while game.in_play : 

        next_move = best_move(game, game_tree, game.turn)
        ID = as_str(next_move) #to locate the nodes in the backup phase
        nodes.append(ID)

        if not game_tree.contains(ID): 
            #when the node is not in the tree, we create a new one for the position new one, select a random branch, and continue on
            ndata = State(next_move) 
            new_node = game_tree.create_node(None, ID, parent = node.identifier, data = ndata)
            node = new_node
        else: 
            # when the node is in the tree, we select the best option already there        
            node = game_tree.get_node(ID)

        game.move(next_move)


    BACKUP(game_tree, nodes, game.reward) #bakck-propogation

In [1149]:
def simulate(position, iterations): 

    #we make a new game tree with position as the root
    game = two_NIM(position) # simulate a game
    game_tree = Tree() 
    game_tree.create_node(None, "root", None, State(position))
   

    while iterations >= 0 :
        
        default_sim(game, game_tree) #one default simualtion: this helps mitigate selection bias as well as build out the tree
        game.reset()
        tree_sim(game, game_tree)    #one biased simulation
        game.reset()

        iterations -= 1
        
    return best_move(game, game_tree, 1)

