In [3]:
import cv2
import gym
from plangym.wrappers import FireResetEnv, FrameStack, MaxAndSkipEnv, NoopResetEnv

class ProcessFrame84(gym.ObservationWrapper):
    def __init__(self, env=None):
        super(ProcessFrame84, self).__init__(env)
        self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))

    def observation(self, obs):
        return ProcessFrame84.process(obs)

    @staticmethod
    def process(frame):
        if frame.size == 210 * 160 * 3:
            img = np.reshape(frame, [210, 160, 3]).astype(np.float32)
        elif frame.size == 250 * 160 * 3:
            img = np.reshape(frame, [250, 160, 3]).astype(np.float32)
        else:
            assert False, "Unknown resolution."
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
        resized_screen = cv2.resize(img, (84, 110), interpolation=cv2.INTER_AREA)
        x_t = resized_screen[18:102, :]
        x_t = np.reshape(x_t, [84, 84, 1])
        return x_t.astype(np.uint8)

def wrap(env):
    env = NoopResetEnv(env, noop_max=30)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = ProcessFrame84(env)
    env = MaxAndSkipEnv(env, skip=4)
    env = FrameStack(env, 4)
    return env

In [4]:
import numpy as np
from collections import deque
import gym
from gym import spaces
import cv2

from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only

import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay

from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()


def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env_video(env):
  env = Monitor(env, './video', force=True)
  return env

In [5]:
from fragile.learning.imitation_atari.network import ConvolutionalNeuralNetwork, ModelTrainer

In [6]:
import gym
import argparse
import numpy as np
import atari_py
from IPython.display import clear_output
import time
from plangym import AtariEnvironment

from fragile.core import DiscreteEnv, DiscreteUniform, GaussianDt
from fragile.core.tree import HistoryTree
from fragile.core.swarm import Swarm
from fragile.distributed import ParallelEnv

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

FRAMES_IN_OBSERVATION = 4
FRAME_SIZE = 84
INPUT_SHAPE = (FRAME_SIZE, FRAME_SIZE, FRAMES_IN_OBSERVATION)
MEMORY_SIZE = 90000
EXPLORE_MEMORY_STEPS = 2


class FragileRunner:
    def __init__(self, game_name):

        self.env = AtariEnvironment(
            name=game_name,
            clone_seeds=True,
            wrappers=[wrap],
        )

        self.game_name = game_name
        self.env_callable = lambda: DiscreteEnv(env=self.env)
        self.model_callable = lambda env: DiscreteUniform(env=self.env)
        self.prune_tree = True
        # A bigger number will increase the quality of the trajectories sampled.
        self.n_walkers = 16
        self.max_epochs = 64  # Increase to sample longer games.
        self.reward_scale = 2  # Rewards are more important than diversity.
        self.distance_scale = 1
        self.minimize = False  # We want to get the maximum score possible.
        self.swarm = swarm = Swarm(
            model=self.model_callable,
            env=self.env_callable,
            tree=lambda: HistoryTree(names=["observs", "actions"], prune=True),
            n_walkers=self.n_walkers,
            max_epochs=self.max_epochs,
            prune_tree=self.prune_tree,
            reward_scale=self.reward_scale,
            distance_scale=self.distance_scale,
            minimize=self.minimize,
        )

    def run(self):
        print("Creating fractal replay memory...")
        for i in range(EXPLORE_MEMORY_STEPS):
            _ = self.swarm.run()
            print("Max. fractal cum_rewards:", max(self.swarm.best_reward))
        return self.memory




Instructions for updating:
non-resource variables are not supported in the long term


In [7]:
class FractalExplorationImitationLearning:

    def __init__(self):
        # We choose a game
        game_name = "SpaceInvaders"

        # Choose after how many runs we should stop
        total_run_limit = 100
        print("Selected game: " + str(game_name))        
        print("Total run limit: " + str(total_run_limit))
        
        env_name = game_name + "Deterministic-v4"
        env = wrap_env_video(wrap(gym.make(env_name)))
        explorer = FragileRunner(env_name)
        
        # Game model
        game_model = ModelTrainer(input_shape=INPUT_SHAPE, action_space=env.action_space.n)

        # model training
        self._main_loop(env_name, explorer, game_model, total_run_limit)

    def _main_loop(self, env_name, explorer, game_model, total_run_limit):
        run = 0
        while run < total_run_limit:
            run += 1            
            print("Training run:", run)                         

            # We explore the game space state using fragile framework  
            explorer.run()

            # Training a run                       
            game_model.train(explorer.swarm)
            
            # Testing model
            clear_output()
            print("Testing Neural Network...")
            env = wrap_env_video(wrap(gym.make(env_name)))
            terminal = False
            current_state = env.reset()
            score = 0
            while not terminal:                     
                action = game_model.move(current_state)
                next_state, reward, terminal, _ = env.step(action)
                score += reward
                current_state = next_state                
            env.close()
            
            print("Neural Network score:", score)
            show_video()   

if __name__ == "__main__":
    FractalExplorationImitationLearning()

Testing Neural Network...
Neural Network score: 10.0


Training run: 2
Creating fractal replay memory...


HBox(children=(FloatProgress(value=0.0, description='Swarm', max=64.0, style=ProgressStyle(description_width='…

HTML(value='')


Max. fractal cum_rewards: 185.0


HBox(children=(FloatProgress(value=0.0, description='Swarm', max=64.0, style=ProgressStyle(description_width='…

HTML(value='')


Max. fractal cum_rewards: 140.0


KeyboardInterrupt: 