In [None]:
#!git clone https://gitlab.lrz.de/arl-ws2021/gym-crawlingrobot.git
#!pip install -e ../.

# Manual

In [None]:
import sys
import numpy as np
import pygame
import os
#os.environ['SDL_VIDEODRIVER']='dummy'
import gym
import gym_crawlingrobot

In [None]:
env = gym.make('crawlingrobot-discrete-v1', rotation_angles=5, goal_distance=1300, window_size=(1500, 800), render_intermediate_steps=True, plot_steps_per_episode=True)
env.robot.mode = 2
done = False
terminate = False
action = None
env.reset()

while not terminate:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            terminate = True
            continue
            #sys.exit(0)
        elif event.type == pygame.KEYDOWN:
            if event.key == pygame.K_ESCAPE:
                pygame.quit()
                terminate = True
                continue
                #sys.exit(0)
            elif event.key == pygame.K_UP or event.key == pygame.K_w:
                env.step(0)
                action = 0
            elif event.key == pygame.K_RIGHT or event.key == pygame.K_d:
                action = 1
            elif event.key == pygame.K_DOWN or event.key == pygame.K_s:
                action = 2
            elif event.key == pygame.K_LEFT or event.key == pygame.K_a:
                action = 3
            elif event.key == pygame.K_r:
                env.reset()
                action = 3
            elif event.key == pygame.K_SPACE:
                env.robot.render_intermediate_steps = not env.robot.render_intermediate_steps

            if action:
                obs, rewards, done, info = env.step(action)
                action = None
            if done:
                env.reset()
                action = 3

    if not terminate:
        env.render()
    else: 
        print ("exiting ... ")


# Q-Learning

In [None]:
np.set_printoptions(threshold=sys.maxsize)

alpha = 0.3
gamma = 0.9
epsilon = 0.15
maxSteps = 10000
episodes = 200


env = gym.make('crawlingrobot-discrete-v1', rotation_angles=5, goal_distance=2500, window_size=(1500, 800), plot_steps_per_episode=True)
max = env.observation_space.high[0] + 1  # aktuell 5

Q = np.zeros([max ** len(env.observation_space.high), env.action_space.n])
terminate = False

for episode in range (episodes):
    done = False
    state = env.reset()
    step = 0

    print("Episode", episode, "started...")

    while not done and step < maxSteps and not terminate:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    terminate = True
                    #sys.exit(0)
                elif event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_ESCAPE:
                        pygame.quit()
                        terminate = True
                        #sys.exit(0)
                    if event.key == pygame.K_SPACE:
                        env.robot.render_intermediate_steps = not env.robot.render_intermediate_steps

            
            if not terminate:
                # action selection
                if np.random.rand() < epsilon:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(Q[state])

                # perform action in environment
                nextObs, reward, done, _ = env.step(action)
                env.render()
                nextState = int(nextObs[0] * max + nextObs[1])

                # Q-learning
                Q[state, action] += alpha * (reward + gamma * np.max(Q[nextState]) - Q[state, action])

                # time transition
                state = nextState
                step += 1
            else: 
                print ("exiting ... ")
                break


    if not terminate: 
        res = 0
        if len(env.robot.episode_time_results) > 0:
            res = env.robot.episode_time_results[-1]
        print("Episode", episode, "took", step, "steps, took", res,"simulation seconds")
    else:
        pygame.quit()
        break


# PPO2



In [None]:
#!pip install stable-baselines
#!pip install tensorflow==1.15

In [None]:
import pygame
import sys
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines.bench import Monitor
from stable_baselines import PPO2
import gym
import gym_crawlingrobot

In [None]:
log_dir = ""
env = gym.make('crawlingrobot-continuous-v1', goal_distance=2500, window_size=(1500, 800), render_intermediate_steps=False)
robot_env = env
ppo2 = "ppo2"
os.makedirs(ppo2, exist_ok=True)

def learn():
    global env
    env = VecNormalize(DummyVecEnv([lambda: Monitor(env, log_dir)]), norm_obs=True, norm_reward=False)
    model = PPO2(MlpPolicy, env, verbose=1, learning_rate=0.15)
    model.learn(total_timesteps=30000, callback=callback)
    model.save("ppo2/ppo2_crawling_robot")
    env.save("ppo2/vec_normalize.pkl")
 
    del model, env

def callback(a, b):
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            sys.exit(0)
        elif event.type == pygame.KEYDOWN:
            if event.key == pygame.K_ESCAPE:
                pygame.quit()
                sys.exit(0)
            if event.key == pygame.K_SPACE:
                robot_env.robot.render_intermediate_steps = not robot_env.robot.render_intermediate_steps

    env.render()

def show():
    global env
    env = DummyVecEnv([lambda: Monitor(env, log_dir)])
    env = VecNormalize.load("ppo2/vec_normalize.pkl", env)
    model = PPO2.load("ppo2/ppo2_crawling_robot")

    env.training = False
    env.norm_reward = False

    obs = env.reset()
    done = False

    while True:
        if done:
            env.reset()

        action, _states = model.predict(obs)
        print(action)
        obs, rewards, done, info = env.step(action)
        callback(None, None)


learn()
#show()
pygame.quit()
