**POZNÁMKA: Tento notebook je určený pre platformu Google Colab, ktorá zdarma poskytuje hardvérovú akceleráciu. Je však možné ho spustiť (možno s drobnými úpravami) aj ako štandardný Jupyter notebook, pomocou lokálnej grafickej karty.** 



In [None]:
#@title -- Installation of Packages -- { display-mode: "form" }
import sys
import shutil
USE_NBCAP = False

if not shutil.which('apt') is None:
    !apt update
    !apt install -y xvfb x11-utils
    !{sys.executable} -m pip install pyvirtualdisplay
    !{sys.executable} -m pip install --upgrade pyglet
    !{sys.executable} -m pip install git+https://github.com/michalgregor/nbcap.git

    USE_NBCAP = True

# as of 15th September 2022 OpenAI gym is using the new step interface, which
# returns a 5-tuple; so we are restricting gym to 0.23.1
!{sys.executable} -m pip install gym==0.23.1
# !{sys.executable} -m pip install gym

!{sys.executable} -m pip install pybullet
!{sys.executable} -m pip install class_utils[tensorboard]@git+https://github.com/michalgregor/class_utils.git
!{sys.executable} -m pip install tianshou
!{sys.executable} -m pip install git+https://github.com/michalgregor/tianshou_agents.git

In [None]:
#@title -- Import of Necessary Packages -- { display-mode: "form" }
%load_ext tensorboard

import shutil
if shutil.which('apt') is None:
    USE_NBCAP = False
else:
    USE_NBCAP = True

    from nbcap import ShowVideoCallback, ScreenRecorder, OutputManager, DisplayProcess

import pybullet_envs # this sets up the environments; don't delete it
from tianshou_agents.utils import VectorEnvRenderWrapper
from tianshou_agents.methods.sac import sac_pybullet
from tianshou.data import Collector
from tianshou.env import BaseVectorEnv
from tianshou_agents.components.preset import AgentPresetWrapper
from tianshou_agents.components.env import setup_envs
from functools import partial
import gym

In [None]:
#@title -- Auxiliary Functions -- { display-mode: "form" }

if USE_NBCAP:
    display_size = (700, 500)
    show_video = ShowVideoCallback(dimensions=(700, 500))

    # make sure that only one instance
    # of the display is ever created
    try:
        DISP_PROC
    except NameError:
        DISP_PROC = DisplayProcess(display_size=display_size)

    def make_screen_recorder(max_gui_outputs=1):
        video_path="output"
        segment_time=10

        output_manager = OutputManager(max_gui_outputs=max_gui_outputs)
        video_callback=output_manager(show_video)
        display = DISP_PROC.id

        screen_recorder = ScreenRecorder(
            display, display_size, video_path,
            segment_time=segment_time, video_callback=video_callback
        )
        
        return screen_recorder

    SCREEN_RECORDER = make_screen_recorder()
else:
    from contextlib import suppress
    SCREEN_RECORDER = suppress()

class RenderCollector(Collector):
    def __init__(self, collector, render=0.01):
        self.collector = collector

        if isinstance(self.collector.env, BaseVectorEnv):
            self.collector.env = VectorEnvRenderWrapper(
                self.collector.env)

        self.render = render

    @property
    def collect_time(self):
        return max(self.collector.collect_time, 1e-20)

    @collect_time.setter
    def collect_time(self, val):
        self.collector.collect_time = val

    def collect(
        self, n_step = None, n_episode = None, random = False,
        render = None, no_grad = True,
    ):
        with SCREEN_RECORDER:
            render = render or self.render
            return self.collector.collect(n_step, n_episode, random, render, no_grad)

    def __getattr__(self, name):
        if name.startswith('_'):
            raise AttributeError("attempted to get missing private attribute '{}'".format(name))
        return getattr(self.collector, name)

    def __str__(self):
        return '<{}{}>'.format(type(self).__name__, self.collector)

    def __repr__(self):
        return str(self)

class AddRenderCollector:
    def __init__(self, render=0.01):
        self._prev_test_envs = None
        self.render = render

    def __call__(self, agent):
        # we close the previous pyglet window before
        # opening a new one to work around a bug on Windows
        if not self._prev_test_envs is None:
            self._prev_test_envs.close()

        agent.test_collector = RenderCollector(
            agent.test_collectorm, render=self.render
        )
        
        self._prev_test_envs = agent.test_envs

        return agent
    
class AgentPresetPatch(AgentPresetWrapper):
    def __init__(self, preset, render=None):
        super().__init__(preset)
        self._prev_test_envs = None
        self.render = render

    def _resolve_tasks(self, train_task, test_task, task_name):
        if train_task is None:
            train_task = partial(gym.make, task_name)

        if test_task is None:
            test_task = train_task

        return train_task, test_task
    
    def __call__(self, task_name, *args, **kwargs):
        # we close the previous GUI before opening a new one
        if not self._prev_test_envs is None:
            self._prev_test_envs.close()

        params = self.derive_conf()
        params.update(kwargs)

        train_task, test_task = self._resolve_tasks(
            params.get('task'),
            params.get('test_task'),
            task_name
        )
        
        params['train_envs'] = setup_envs(
            train_task,
            params['train_collector']['env_class'],
            params['train_collector']['env']
        )
        
        params['test_envs'] = setup_envs(
            test_task,
            params['test_collector']['env_class'],
            params['test_collector']['env']
        )
        
        params['test_envs'].workers[0].render()
        params['test_envs'].workers[0].reset()
        
        agent = self._preset(task_name, *args, **params)

        agent.test_collector = RenderCollector(
            agent.test_collector, render=self.render
        )
        
        self._prev_test_envs = agent.test_envs
        
        return agent
        
sac_pybullet = AgentPresetPatch(sac_pybullet)

## Učiť sa kráčať pomocou RL so spojitými akciami

Ako ďalší príklad sa pokúsime vyriešiť o čosi náročnejšiu úlohu: ukážeme, ako sa dá pomocou metódy soft actor critic (SAC) natrénovať chôdza 4-nohého robota v simulácii fyziky založenej na balíčku PyBullet. Notebook má slúžiť hlavne ako demonštrácia – znovu použijeme predpripravenú predvoľbu: v tomto prípade `sac_pybullet`. V tomto prípade však bude tréning trvať podstatne dlhšie.



In [None]:
agent = sac_pybullet('AntBulletEnv-v0', seed=0,
                     test_envs=1, episode_per_test=3)

In [None]:
agent.train(max_epoch=250, step_per_epoch=1000)

In [None]:
test_results = agent.test()