# Simulation
Class to train the learning agents, as well as test the performance of agents. 

In [2]:
class Simulation:
    def __init__(self, p1, p2, n):
        if p1.getgame() != p2.getgame():
            print('Both players must be playing the same game')
        self.game = p1.getgame()
        self.n = n #number of simulations
        self.p1 = p1
        self.p2 = p2
        return
    
    def simulate(self, n = None, confint = 95): 
        if n == None: 
            n = self.n
            
        from statsmodels.stats import proportion
        import time
        self.p1.setcolor(1)
        self.p2.setcolor(-1)
        startwins = 0
        start = time.time()
        for i in range(n):
            while self.game.gamefinished() == False:
                self.p1.move()
                self.p2.move()
            if self.game.winner() == 1:
                startwins += 1
            self.game.restart()    
            
        end = time.time()
        p = startwins/n
        lower, higher = proportion.proportion_confint(startwins, n, alpha = 1-confint/100)
        
        print('P1 (starting player) won ', p*100, '% of the time.')
        print(confint, '% confidence interval: ', (lower, higher) )
        print((end-start)/n, ' s per round')
        return p, lower, higher
    
    def train(self, learnerstart = True, plot_every_test = False, test_freq = 500, n = None):
        if n == None:
            n = self.n
        import matplotlib.pyplot as plt 
        import time
        self.p1.setcolor(1)
        self.p2.setcolor(-1)
        if learnerstart:
            learnercolor = 1
            p = self.p1
        else: 
            learnercolor = -1
            p = self.p2
        probs = []
        lowers = []
        highers = []
        x_values_for_graph = []
        start = time.time()
        for i in range(n):
            while(self.game.gamefinished() == False):
                self.p1.move()
                self.p2.move()
            p.update_values()  
            self.game.restart()
            if i%test_freq == 0:
                p.test_mode()
                prob, low, high = self.simulate(n = 100)
                p.train_mode()
                probs += [prob]
                lowers += [low]
                highers += [high]
                x_values_for_graph += [i]
                if plot_every_test:
                    plt.plot(x_values_for_graph, probs, 'b', x_values_for_graph, lowers, 'g', x_values_for_graph, highers, 'g')
                    plt.show()
        end = time.time()
        plt.plot(x_values_for_graph, probs, 'b', x_values_for_graph, lowers, 'g', x_values_for_graph, highers, 'g')
        plt.show()
        print((end-start)/n, ' s per training round')
        return 
    
    def train_both(self, test_freq = 500, plot_every_test = False, n = 1000):
        if n == None:
            n = self.n
        import matplotlib.pyplot as plt 
        import time
        self.p1.setcolor(1)
        self.p2.setcolor(-1)
        probs = []
        lowers = []
        highers = []
        x_values_for_graph = []
        start = time.time()
        for i in range(n):
            while(self.game.gamefinished() == False):
                self.p1.move()
                self.p2.move()
            p1.update_values()
            p2.update_values()
            self.game.restart()
            if i%test_freq == 0:
                p1.test_mode()
                p2.test_mode()
                prob, low, high = self.simulate(n = 100)
                p1.train_mode()
                p2.train_mode()
                probs += [prob]
                lowers += [low]
                highers += [high]
                x_values_for_graph += [i]
                if plot_every_test:
                    plt.plot(x_values_for_graph, probs, 'b', x_values_for_graph, lowers, 'g', x_values_for_graph, highers, 'g')
                    plt.show()
        
        end = time.time()
        plt.plot(x_values_for_graph, probs, 'b', x_values_for_graph, lowers, 'g', x_values_for_graph, highers, 'g')
        plt.show()
        print((end-start)/n, ' s per training round')
        return 
        

        