## Imports

In [1]:
import os
import numpy as np
import time
import gym

## Carga del modelo construido desde el archivo .npy

In [2]:
if os.path.exists('TrainedQ.npy'):
    trainedQ = np.load('TrainedQ.npy')
trainedQ.shape

(5, 4, 9, 6, 2)

## Pruebas sobre el modelo entrenado.

### Contenedores utilizados para discretizar

In [3]:
cart_position_bins = np.linspace(-2.4, 2.4, 4)
cart_acc_bins = np.linspace(-100, 100, 3)
pole_angle_bins = np.linspace(-.2,.2, 8)
angular_acc_bins = np.linspace(-5000, 5000, 5)

In [4]:
def get_state(obs):
    cart_pos = np.digitize(obs[0], cart_position_bins)
    cart_acc = np.digitize(obs[1], cart_acc_bins)
    pole_ang = np.digitize(obs[2], pole_angle_bins)
    ang_acc = np.digitize(obs[3], angular_acc_bins)
    state = tuple([cart_pos, cart_acc, pole_ang, ang_acc])
    return state

### Resultado promedio de recompensas de la ejecucion del modelo entrenado en 100 episodios

In [5]:
def optimal_policy(state, Q):
    action = np.argmax(Q[state])
    return action

In [None]:
env = gym.make('CartPole-v1', new_step_api=True)
tries = 100
rewards = np.zeros(tries)
for i in range(tries):
    episode_reward= 0
    obs = env.reset(seed = i)
    done = False
    while not done:
        state = get_state(obs)
        action = optimal_policy(state, trainedQ)
        obs, reward, termination, truncation, info = env.step(action)
        done = termination or truncation
        episode_reward += reward
    rewards[i] = episode_reward
env.close()
print("Average reward: ", np.mean(rewards))

## Visualización de una corrida con la policy optima obtenida con renderizado en modo humano

In [6]:
env = gym.make('CartPole-v1', render_mode='human', new_step_api=True)
episode_reward = 0
obs = env.reset(seed = 0)
done = False
while not done:
    state = get_state(obs)
    action = optimal_policy(state, trainedQ)
    obs, reward, termination, truncation, info = env.step(action)
    done = termination or truncation
    episode_reward += reward
env.close()
print('Reward: ', episode_reward)

Reward:  500.0
