In [None]:
import numpy as np
import pandas as pd
from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt
from TicTacToe.human import Human as human
from TicTacToe.duel import duel, play_n_duels

# Value iteration

## Value iteration player vs random player

In [None]:
from TicTacToe.agent_value_iteration import AgentVI as avi
from TicTacToe.agent_random import RandomAgent as random_agent

In [None]:
agent1 = avi()
agent2 = random_agent()
human = human()

In [None]:
prob = play_n_duels(5000, agent2, agent1)

In [None]:
agent1.wins

In [None]:
agent1.rewards

In [None]:
prob

In [None]:
df = pd.DataFrame({ 'probabilidad' :prob })

figsize(11, 9)
df.plot(title='Juegos ganados por el agente VI ',color='b')
plt.xlabel("juegos simulados")
plt.ylabel("Porcentaje de juegos ganados")

# Monte Carlo  

## Monte Carlo player vs random player

In [None]:
from TicTacToe.agent_mc import AgentMC as amc
from TicTacToe.human import Human as human

In [None]:
player1 = amc()

In [None]:
playerX = agent2
playerO = player1

In [None]:
prob = play_n_duels(10000, playerX, playerO)

In [None]:
vals = [(k,v) for k, v in player1.values.items() if v != 0]

In [None]:
len(vals)# +138

In [None]:
player1.wins

In [None]:
df = pd.DataFrame({ 'probabilidad' :prob })

figsize(11, 9)
df.plot(title='Juegos ganados por el agente MC ',color='r')
plt.xlabel("juegos simulados ")
plt.ylabel("Porcentaje de juegos ganados (%)")

# Q-Learning

## Q-Learning player vs random player

In [None]:
from TicTacToe.agent_tabular_qlearning import Agent_TQL as atql

In [None]:
epsilons  = [0.01, 0.4, 0.8]
alphas = [0.001, 0.02, 0.3]
gammas = [0.02, 0.1, 0.5]
frecs = list()

In [None]:
for eps, alf , gam in zip(epsilons, alphas, gammas):
    agent5 = atql()
    agent5.epsilon = eps
    agent5.alpha = alf
    agent5.gamma = gam 
    
    playerX = agent2
    playerO = agent5
    
    frecs.append(play_n_duels(18000, playerX, playerO))

In [None]:
x = np.arange(180)
legends = list()

i = 0
for e, a ,g in zip(epsilons, alphas, gammas): 
    plt.plot(x, frecs[i])
    legends.append('e='+str(e)+', a='+str(a)+', g ='+str(g))
    i +=1
        
figsize(11, 9)
plt.legend(legends, loc='lower right')
plt.xlabel("juegos simulados ")
plt.ylabel("Frecuencia relativa de juegos ganados (%)")
plt.show()

In [None]:
agent7 = atql()
agent7.epsilon = 0.2
agent7.alpha = 0.02
agent7.gamma = 0.7


playerX = agent2
playerO = agent7
    
frec = play_n_duels(25000, playerX, playerO)

In [None]:
df = pd.DataFrame({ 'probabilidad' :frec })

figsize(11, 9)
df.plot(title='Juegos ganados por el agente MC ',color='r')
plt.xlabel("juegos simulados ")
plt.ylabel("Porcentaje de juegos ganados (%)")

In [None]:
import copy


playerX = agent7
playerO = copy.copy(agent7)
    
frec = play_n_duels(25000, playerX, playerO)
df = pd.DataFrame({ 'probabilidad' :frec })

figsize(11, 9)
df.plot(title='Juegos ganados por el agente MC ',color='r')
plt.xlabel("juegos simulados ")
plt.ylabel("Porcentaje de juegos ganados (%)")

In [None]:
#df = pd.DataFrame({ 'probabilidad' :prob })

#figsize(11, 9)
#df.plot(title='Juegos ganados por el agente Q ',color='r')


In [None]:
n = 1
frecs = list()
aux = agent5

for _ in range(n):
    agentq = atql()
    agentq.epsilon = 0.4
    agentq.alpha = 0.02
    agentq.gamma = 0.1
    
    aux.epsilon = 0.1
    playerX = aux
    playerO = agentq
    playerX.set_role('X')
    playerO.set_role('O')
    
    frecs.append(play_n_duels(18000, playerX, playerO))
    aux = agentq

In [None]:
x = np.arange(180)
legends = list()

i = 0
for i in range(n): 
    plt.plot(x, frecs[i])
    legends.append('entrenamiento #'+str(i+1))
    
plt.legend(legends, loc='lower right')
plt.xlabel("juegos simulados ")
plt.ylabel("Frecuencia relativa de juegos ganados (%)")
plt.show()

# Q-Learning player vs human player

In [None]:
from TicTacToe.human import Human as human
human = human()

In [None]:
agent7.epsilon = 0

agent7.wins = 0


In [None]:
play_n_duels(3, human, agent7, show=True)

In [None]:
agent7.wins

In [None]:
duel(human, agentq, show=True)

In [None]:
agent5.wins

In [None]:
agent7.values

In [None]:
agent7.state_to_matrix(agent7.key_to_state(910))

# SARSA

## SARSA player vs random player

In [None]:
from TicTacToe.agent_sarsa import Agent_SARSA as asarsa

In [None]:
agent6 = asarsa()

In [None]:
playerX = agent2
playerO = agent6

In [None]:
prob = play_n_duels(15000, playerX, playerO)

In [None]:
vals = [(k,v) for k, v in agent6.values.items() if v != 0]

In [None]:
len(vals)

In [None]:
agent6.wins

In [None]:
df = pd.DataFrame({ 'probabilidad' :prob })

figsize(11, 9)
df.plot(title='Juegos ganados por el agente SARSA ',color='r')
plt.xlabel("juegos simulados ")
plt.ylabel("Porcentaje de juegos ganados (%)")

In [None]:
playerX = agent5
playerO = agent6
prob = play_n_duels(15000, playerX, playerO)

In [None]:
df = pd.DataFrame({ 'probabilidad' :prob })

figsize(11, 9)
df.plot(title='Juegos ganados por el agente SARSA ',color='r')
plt.xlabel("juegos simulados ")
plt.ylabel("Porcentaje de juegos ganados (%)")

In [None]:
agent6.wins

In [None]:
agent6.set_role('X')
duel(agent6, human, show=True)

# Campeonato

In [None]:
from random import shuffle 

def campeonato(array):
    jugadores = array
    pasan = []
    j = 0
    shuffle(jugadores) #Revuelve los jugadores
    for _ in range(len(array)/2):
        jugadores[j].set_role('X')
        jugadores[j+1].set_role('O')
        ganador = duel(jugadores[j],jugadores[j+1],show = True)
        if ganador == 1:
            pasan.append(jugadores[j])
        else:
            pasan.append(jugadores[j+1])
        j+=2
        
    j = 0
    if len(pasan) > 1:
        campeonato(pasan)
    else:
        ganador = pasan[0]
        return ganador


In [None]:
def campeonatos(n,puntos):
    for _ in range(n):
        i = campeonato(Agentes)
        puntos[i] = puntos[i] + 1

    return puntos

print(campeonatos(100,puntuacion))