# Piedra-papel-tijera

Ejemplo simple de una red neuronal que aprende a jugar el piedra-papel-tijera.

In [13]:
import pandas as pd
import numpy as np

%matplotlib notebook

In [1]:
# Define las opciones del juego.
options = ["piedra", "tijeras", "papel"]

In [2]:
# Crear el algoritmo
def search_winner(p1, p2):
    if p1 == p2:
        result = 0
    
    elif p1 == "piedra" and p2 == "tijeras":
        result = 1
    elif p1 == "piedra" and p2 == "papel":
        result = 2
    elif p1 == "tijeras" and p2 == "piedra":
        result = 2
    elif p1 == "tijeras" and p2 == "papel":
        result = 1
    elif p1 == "papel" and p2 == "piedra":
        result = 1
    elif p1 == "papel" and p2 == "tijeras":
        result = 2
        
    return result

In [3]:
# Prueba la función
search_winner("papel", "tijeras")

2

In [4]:
# Mas pruebas
test = [
    ["piedra", "piedra", 0],
    ["piedra", "tijeras", 1],
    ["piedra", "papel", 2]
]

for partida in test:
    print("player1: %s player2: %s Winner: %s Validation: %s" % (
        partida[0], partida[1], search_winner(partida[0], partida[1]), partida[2]
    ))

player1: piedra player2: piedra Winner: 0 Validation: 0
player1: piedra player2: tijeras Winner: 1 Validation: 1
player1: piedra player2: papel Winner: 2 Validation: 2


In [5]:
# Mas pruebas aleatorias
from random import choice
def get_choice():
    return choice(options)

In [7]:
# Realiza mas pruebas
for i in range(10):
    player1 = get_choice()
    player2 = get_choice()
    print("player1: %s player2: %s Winner: %s " % (
        player1, player2, search_winner(player1, player2)
    ))

player1: papel player2: piedra Winner: 1 
player1: papel player2: piedra Winner: 1 
player1: tijeras player2: piedra Winner: 2 
player1: tijeras player2: tijeras Winner: 0 
player1: tijeras player2: papel Winner: 1 
player1: papel player2: papel Winner: 0 
player1: piedra player2: papel Winner: 2 
player1: papel player2: papel Winner: 0 
player1: papel player2: piedra Winner: 1 
player1: tijeras player2: piedra Winner: 2 


In [8]:
# Para crear el dataset con los features
def str_to_list(option):
    if option=="piedra":
        res = [1,0,0]
    elif option=="tijeras":
        res = [0,1,0]
    else:
        res = [0,0,1]
    return res

data_X = list(map(str_to_list, ["piedra", "tijeras", "papel"]))
data_y = list(map(str_to_list, ["papel", "piedra", "tijeras"]))

print(data_X)
print(data_y)

[[1, 0, 0], [0, 1, 0], [0, 0, 1]]
[[0, 0, 1], [1, 0, 0], [0, 1, 0]]


In [10]:
# Define el modelo NN
from sklearn.neural_network import MLPClassifier

clf=  MLPClassifier(verbose=False, warm_start=True)
model = clf.fit([data_X[0]], [data_y[0]])
print(model)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=True)




In [11]:
def play_and_learn(iters=10, debug=False):
    score = {"win": 0, "loose": 0}
    
    data_X = []
    data_y = []
    
    for i in range(iters):
        player1 = get_choice()
        
        predict = model.predict_proba([str_to_list(player1)])[0]
        
        if predict[0] >= 0.95:
            player2 = options[0]
        elif predict[1] >= 0.95:
            player2 = options[1]
        elif predict[2] >= 0.95:
            player2 = options[2]
        else:
            player2 = get_choice()
            
        if debug==True:
            print("Player1: %s Player2 (modelo): %s --> %s" % (player1, predict, player2))
        
        winner = search_winner(player1, player2)
        if debug==True:
            print("Comprobamos: p1 VS p2: %s" % winner)
        
        if winner==2:
            data_X.append(str_to_list(player1))
            data_y.append(str_to_list(player2))
            
            score["win"]+=1
        else:
            score["loose"]+=1
        
    return score, data_X, data_y

In [12]:
score, data_X, data_y = play_and_learn(1, debug=True)
print(data_X)
print(data_y)
print("Score: %s %s %%" % (score, (score["win"]*100/(score["win"]+score["loose"]))))
if len(data_X):
    model = model.partial_fit(data_X, data_y)

Player1: tijeras Player2 (modelo): [ 0.12758514  0.14295682  0.87690548] --> piedra
Comprobamos: p1 VS p2: 2
[[0, 1, 0]]
[[1, 0, 0]]
Score: {'win': 1, 'loose': 0} 100.0 %


In [13]:
i = 0
historic_pct = []
while True:
    i+=1
    score, data_X, data_y = play_and_learn(1000, debug=False)
    pct = (score["win"]*100/(score["win"]+score["loose"]))
    historic_pct.append(pct)
    print("Iter: %s - score: %s %s %%" % (i, score, pct))
    
    if len(data_X):
        model = model.partial_fit(data_X, data_y)
    
    if sum(historic_pct[-9:])==900:
        break

Iter: 1 - score: {'win': 555, 'loose': 445} 55.5 %
Iter: 2 - score: {'win': 537, 'loose': 463} 53.7 %
Iter: 3 - score: {'win': 553, 'loose': 447} 55.3 %
Iter: 4 - score: {'win': 529, 'loose': 471} 52.9 %
Iter: 5 - score: {'win': 556, 'loose': 444} 55.6 %
Iter: 6 - score: {'win': 555, 'loose': 445} 55.5 %
Iter: 7 - score: {'win': 551, 'loose': 449} 55.1 %
Iter: 8 - score: {'win': 560, 'loose': 440} 56.0 %
Iter: 9 - score: {'win': 578, 'loose': 422} 57.8 %
Iter: 10 - score: {'win': 543, 'loose': 457} 54.3 %
Iter: 11 - score: {'win': 543, 'loose': 457} 54.3 %
Iter: 12 - score: {'win': 569, 'loose': 431} 56.9 %
Iter: 13 - score: {'win': 545, 'loose': 455} 54.5 %
Iter: 14 - score: {'win': 520, 'loose': 480} 52.0 %
Iter: 15 - score: {'win': 330, 'loose': 670} 33.0 %
Iter: 16 - score: {'win': 347, 'loose': 653} 34.7 %
Iter: 17 - score: {'win': 321, 'loose': 679} 32.1 %
Iter: 18 - score: {'win': 311, 'loose': 689} 31.1 %
Iter: 19 - score: {'win': 338, 'loose': 662} 33.8 %
Iter: 20 - score: {'w

Iter: 160 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 161 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 162 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 163 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 164 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 165 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 166 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 167 - score: {'win': 1000, 'loose': 0} 100.0 %
Iter: 168 - score: {'win': 1000, 'loose': 0} 100.0 %


In [15]:
model.predict_proba([str_to_list("piedra")])

array([[ 0.004804  ,  0.00580106,  0.98809566]])