# Q Learning project

Projecto realizado para la asignatura de Inteligencia Artificial de la titulación Grado en Ingeniería Informática - Ingeniería del Software de la Universidad de Sevilla.

## Alumnos

- Muñoz Aranda, Alejandro José <alemunara@us.es>
- Ruano Fernández, Mario <mruano@us.es>

In [55]:
# Imports de módulos requeridos
import numpy as np
from random import sample
import random

In [56]:
class Casilla():
    def __init__(self, x, y, state):
        self.__x = x
        self.__y = y
        self.__state = state

    def get_x(self):
        return self.__x
    
    def get_y(self):
        return self.__y
    
    def get_coords(self):
        return (self.__x, self.__y)

    def get_state(self):
        return self.__state


class Tablero():
    def __init__(self, n, m, initial_state, goal_state):
        self.__n = n
        self.__m = m
        self.__size = n * m
        self.__initial_state = initial_state
        self.__goal_state = goal_state
        self.__casillas = {}
        self.__map_tablero = {}
        self.__init_tablero()
    
    def get_n(self):
        return self.__n
    
    def get_m(self):
        return self.__m

    def get_size(self):
        return self.__size

    def get_map_tablero(self):
        return self.__map_tablero
    
    def get_initial_state(self):
        return self.__inital_state
    
    def get_goal_state(self):
        return self.__goal_state

    def __init_tablero(self):
        states = sample([s for s in range(0, self.__size)], self.__size)
        k = 0

        for i in range(0, self.__n):
            for j in range(0, self.__m):
                state = states[k]
                self.__casillas[(i, j)] = Casilla(i, j, state) # candidata a morir
                self.__map_tablero[state] = (i, j)
                k = k + 1
    
    def __es_vecino(self, casilla_src, casilla_dst):
        distance = np.sqrt(np.power(casilla_dst[0] - casilla_src[0], 2) +
                    np.power(casilla_dst[1] - casilla_src[1], 2))
        return distance <= np.sqrt(2) and distance > 0

    def get_matrix_R(self):
        R = np.zeros((self.__size, self.__size))
        
        for i in range(0, self.__size):
            for j in range(0, self.__size):
                if (self.__es_vecino(self.__map_tablero[i], self.__map_tablero[j])):
                    if (j == self.__goal_state):
                        R[i, j] = 100
                    else:
                        R[i, j] = 0
                elif (i == self.__goal_state and i == j):
                    R[i, j] = 100
                else:
                    R[i, j] = -1
        
        return R
        
    

In [57]:
class Entrenamiento():
    def __init__(self, tablero, n, gamma):
        self.__tablero = tablero
        self.__n = n
        self.__gamma = gamma
        self.__Q = np.zeros((tablero.get_size(), tablero.get_size()))
        
    def get_Q(self):
        return self.__Q
    
    
    def episode(self):
        R = self.__tablero.get_matrix_R()
        goal_state = self.__tablero.get_goal_state()
        for i in range(0, self.__n):
            init_state = choice(list(range(self.__tablero.get_size())))
            self.__q_learning(init_state, goal_state, R)
        
    
    def __q_learning(self, init_state, goal_state, R):
        current_state = init_state
        while True:
            possible_actions = {}
            actions = R[current_state]
            for i in range(0, self.__tablero.get_size()):
                if(actions[i]>=0):
                    possible_actions[i] = actions[i]
                    
            next_state = random.choice(list(possible_actions.keys()))
            action = next_state
            self.__Q[current_state, action] = R[current_state, action] + self.__gamma*self.__max_q_value(next_state)
            current_state = next_state
            if (current_state == goal_state):
                break
    
    def __max_q_value(self, next_state):
        return max(self.__Q[next_state])

In [77]:
tablero = Tablero(3, 3, 2, 8)
print(tablero.get_map_tablero())

{4: (0, 0), 3: (0, 1), 0: (0, 2), 7: (1, 0), 8: (1, 1), 5: (1, 2), 2: (2, 0), 6: (2, 1), 1: (2, 2)}


In [80]:
entrenamiento = Entrenamiento(tablero, 200, 0.8)
print(entrenamiento.get_Q())
entrenamiento.episode()
print(entrenamiento.get_Q())

[[0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[  0.           0.           0.         284.58341236   0.
  284.58341236   0.           0.         355.72926545]
 [  0.           0.           0.           0.           0.
  284.58341236 284.58341236   0.         355.72926545]
 [  0.           0.           0.           0.           0.
    0.         284.58341236 284.58341236 384.58341236]
 [284.58341236   0.           0.           0.         284.58341236
  284.58341236   0.         284.58341236 355.72926545]
 [  0.           0.           0.         284.58341236   0.
    0.           0.         284.58341236 355.72926545]
 [284.58341236 284.58341236   0.         284.58341236   0.
    0.         284.58341236   0.         355.72926545]
 [  0.         284.58341236 284.58341

In [49]:
x = {1:"uno", 2:"dos", 3:"tres"}
lista = list(x.keys())
choice(lista)

2

In [13]:
from tkinter import *
from tkinter import ttk

filas = 0
columnas = 0

def crear_tablero() :
    filas = int(entry1.get())
    columnas = int(entry2.get())    
    
    dimension = (filas*columnas)
    estados = sample([x for x in range(0,dimension)],dimension)
    E = np.zeros((filas,columnas))
    k = 0

    for i in range(0,filas) :
        for j in range(0,columnas) :
            E[i,j] = estados[k]
            k = k + 1    

    return E

root = Tk()
root.title("Q Learning AI")

frame = ttk.Frame(root, padding="50")
frame.grid_propagate(0)
frame.pack(side="top")

label1 = Label(frame, text="Filas del tablero")
label1.pack()
label1.config(justify=CENTER)

entry1 = Entry(frame, width=30)
entry1.pack()

label2 = Label(frame, text="Columnas del tablero")
label2.pack()
label2.config(justify=CENTER)

entry2 = Entry(frame, width=30)
entry2.pack()

label3 = Label(frame, text="Estado inicial")
label3.pack()
label3.config(justify=CENTER)

entry3 = Entry(frame, width=30)
entry3.pack()

label4 = Label(frame, text="Estado objetivo")
label4.pack()
label4.config(justify=CENTER)

entry4 = Entry(frame, width=30)
entry4.pack()

button = Button(frame)
button["text"] = "Obtener tablero"
button["command"] = crear_tablero
button.pack()

root.mainloop()
