<a href="https://colab.research.google.com/github/partlygloudy/joint-attention/blob/master/experiments/FlatlandTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install flatland tasks and learning agent architectures from GitHub repo
!git clone https://github.com/partlygloudy/joint-attention.git "joint_attention"

# Install pygame
!pip install pygame

# Configure to use preferred tensorflow version
%tensorflow_version 1.x
import tensorflow as tf

# Import other requirements
import time
import random

fatal: destination path 'joint_attention' already exists and is not an empty directory.


In [2]:
from joint_attention.flatland import flatland_tasks
from joint_attention.learning_agents import dqn
import importlib

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [0]:
# -- EXPERIMENT PARAMETERS -- #
mem_capacity = 250000            # Replay memory size
training_frames = 2500000         # Total steps to train over
epoch_size = 5000                # Number of steps in a single epoch
e = 1.0                          # Starting value for epsilon
e_min = 0.05                     # Final value for epsilon
e_steps = 150000                 # Number of steps epsilon decreases over
discount = 0.99
batch_size = 32
k = 1                            # Number of steps between training batches
trials = 1                       # Number of times to run the whole experiment
random_buffer_len = 5000        # Number of initial random actions to take

In [0]:
# -- DATA COLLECTION -- #

reward_last_10 = [0.0] * 10
eval_hist = []
training_eval_hist = []
e_hist = []
epoch_counter = 1
game_counter = 0

In [5]:
# importlib.reload(dqn)
# importlib.reload(flatland_tasks)

# -- TRAINING LOOP -- #

# Create environment
env = flatland_tasks.TaskFood200()

# Initialize agent
agent = dqn.DQNAgent(memsize=mem_capacity, gamma=discount)

# Train agent
current_frame = 0
while current_frame < training_frames:
    
    # Reset the game
    game_reward = 0.0
    s = env.reset()

    # Run until game ends
    done = False
    while not done:

        # Choose randomly with probability e
        if current_frame >= random_buffer_len:
            e = max(e_min, e - ((1.0 - e_min) / e_steps))
        if random.random() < e or current_frame < random_buffer_len:
            a = random.randint(0, 3)

        # Otherwise choose best action as predicted by agent
        else:
            a = agent.choose_action(s)

        # Take the action
        if a == 1:
            a = 0
        s_prime, r, done = env.step(a)
        
        # Add experience to memory
        agent.memory.add(s, a, r, s_prime)
        
        # Update network every k timesteps
        if agent.memory.size >= random_buffer_len and current_frame % k == 0:
            agent.learn_from_memory(batch_size)
            
        # Update current state
        s = s_prime
        
        # Update tracking info
        game_reward += r
        current_frame += 1

    # Game finished, log data
    reward_last_10[game_counter % 10] = game_reward
    game_counter += 1
        
    if current_frame >= epoch_counter * epoch_size:

        # Print epoch results
        out_str = "Epoch #" + str(epoch_counter)
        out_str += "\tTotal Games:" + str(game_counter)
        out_str += "\tReward avg: " + str(sum(reward_last_10) / 10.0)
        if len(eval_hist) > 0:
            out_str += "\tEval. Avg.: " + str(eval_hist[-1])
        out_str += "\te: " + str(e)
                                    
        print(out_str)
            
        # Increment epoch counter
        epoch_counter += 1 


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch #1	Total Games:24	Reward avg: 2.3	e: 0.9996833333333335
Epoch #2	Total Games:52	Reward avg: 1.4	e: 0.9680166666666814
Epoch #3	Total Games:75	Reward avg: 2.4	e: 0.9344500000000302
Epoch #4	Total Games:101	Reward avg: 1.9	e: 0.9043666666667107
Epoch #5	Total Games:126	Reward avg: 2.3	e: 0.8727000000000587
Epoch #6	Total Games:149	Reward avg: 2.8	e: 0.8416666666667396
Epoch #7	Total Games:173	Reward avg: 2.4	e: 0.809050000000088
Epoch #8	Total Games:197	Reward avg: 1.6	e: 0.7780166666667689
Epoch #9	Total Games:221	Reward avg: 2.9	e: 0.7466666666667834
Epoch #10	Total Games:241	Reward avg: 3.3	e: 0.7143666666667983
Epoch #11	Total Games:262	Reward avg: 3.0	e: 0.6823833333334797
Epoch #12	Total Games:285	Reward avg: 2.8	e: 0.6507166666668276
Epoch #13	Total Games:310	Reward avg: 1.8	e: 0.6200000000001751
Epoch 

KeyboardInterrupt: ignored

In [6]:
# ### TRIAL VIDEO CAPTURE STUFF ###

import cv2
import numpy as np
import glob
import os

# Create directory for storing frames and video
# os.mkdir("video")

# Number of sample videos to record
trials = 5

for t in range(1, trials + 1):

    # Create instance of environment for trial
    env = flatland_tasks.TaskFood200()
    s = env.reset()
    sample_frame = env.render(mode="return")
    dims = (sample_frame.shape[1], sample_frame.shape[0])

    # Video writer object
    vid = cv2.VideoWriter('video/test_vid_' + str(t) + '.avi', cv2.VideoWriter_fourcc(*'DIVX'), 10, dims)

    # Frame counter
    frames = 0

    done = False
    while not done:
        
        a = agent.choose_action(s)
        s_prime, r, done = env.step(a)
        s = s_prime

        # Save rendered game state
        vid.write(env.render(mode="return"))          
        frames += 1
    
    # Print info
    print("Trial #" + str(t) + " recorded\t-\t" + str(frames) + " frames captured")

    # Release VideoWriter object
    vid.release()


Trial #1 recorded	-	250 frames captured
Trial #2 recorded	-	100 frames captured
Trial #3 recorded	-	250 frames captured
Trial #4 recorded	-	350 frames captured
Trial #5 recorded	-	200 frames captured
