# Automatic Evaluator

In [9]:
%%capture
#########################
# Requirements and Imports
#########################
!pip install ipywidgets
!pip install widgetsnbextension

from os import path
import ipywidgets as widgets
from pathlib import Path

import torch
import numpy as np
import random

In [10]:
reproducibility_field = widgets.Checkbox(
    value = True,
    description = "Reproducibility",
    disabled = False
)
display(reproducibility_field)

Checkbox(value=True, description='Reproducibility')

In [11]:
reproducibility = reproducibility_field.value
if reproducibility:
    seed_field = widgets.IntText(
        value=0,
        placeholder=0,
        description="Seed:",
        disabled=False
    )
    display(seed_field)

IntText(value=0, description='Seed:')

In [12]:
# Set seeds for PRGs

reproducibility = reproducibility_field.value
if reproducibility:
    seed = seed_field.value
    print(seed)
    
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    # Use deterministic Algorithms
    torch.use_deterministic_algorithms(True)
    torch.utils.deterministic.fill_uninitialized_memory = True

0


## Game Selection

In [13]:
import ocatari
all_games = ocatari.core.AVAILABLE_GAMES
print(all_games)

['Adventure', 'AirRaid', 'Alien', 'Amidar', 'Assault', 'Asterix', 'Asteroids', 'Atlantis', 'BankHeist', 'BattleZone', 'BeamRider', 'Berzerk', 'Bowling', 'Boxing', 'Breakout', 'Carnival', 'Centipede', 'ChopperCommand', 'CrazyClimber', 'DemonAttack', 'DonkeyKong', 'DoubleDunk', 'Enduro', 'FishingDerby', 'Freeway', 'Frogger', 'Frostbite', 'Galaxian', 'Gopher', 'Hero', 'IceHockey', 'Jamesbond', 'Kangaroo', 'KeystoneKapers', 'KingKong', 'Krull', 'KungFuMaster', 'MarioBros', 'MontezumaRevenge', 'MsPacman', 'NameThisGame', 'Pacman', 'Phoenix', 'Pitfall', 'Pong', 'Pooyan', 'PrivateEye', 'Qbert', 'Riverraid', 'RoadRunner', 'Seaquest', 'Skiing', 'SpaceInvaders', 'StarGunner', 'Tennis', 'TimePilot', 'UpNDown', 'Venture', 'VideoPinball', 'YarsRevenge', 'Zaxxon']


In [14]:
hack_games = [x.name.replace('.py', '') for x in Path('../hackatari/games').glob('*.py') if x.is_file() and x.name != '__init__.py']

games_option = []
for g in all_games:
    if g.lower() in hack_games:
        games_option.append(g)


game_selector = widgets.Select(
    options = games_option,
    value = "Pong",
    description = 'Game:',
    disabled = False,
)
game_selector.layout.width = '30%'
display(game_selector)

Select(description='Game:', index=21, layout=Layout(width='30%'), options=('Amidar', 'Assault', 'Asterix', 'At…

In [15]:
selected_game = game_selector.value
selected_game

'Pong'

# Model Selection

In [16]:
models = [m for m in Path('../models').glob('**/*.gz') if m.is_file() and selected_game.lower() in str(m).lower()]

model_selector = widgets.SelectMultiple(
    options=models,
    value=[],
    description="Select Models",
    display="flex",
    flew_flow="column",
    align_items="stretch",
    style={"description_width": "initial"},
    disabled=False
)
model_selector.layout.width = '30%'
display(model_selector)


SelectMultiple(description='Select Models', layout=Layout(width='30%'), options=(PosixPath('../models/Pong/0/c…

In [17]:
models_list = list(model_selector.value)
assert models_list, "Please select at least one model"
models_list

[PosixPath('../models/Pong/1/mdqn_modern_50M.gz'),
 PosixPath('../models/Pong/2/c51_classic_50M.gz'),
 PosixPath('../models/Pong/2/dqn_modern_50M.gz')]

# Add Run Arguments

In [18]:
# Human player option. If True, the game will wait for user input to play.
human_player = widgets.HBox([
    widgets.Text(value='--human', description='Let user play', disabled=True),
    widgets.Dropdown(options=[True, False], value=False, description='Type:', disabled=False)
])

display(human_player)

HBox(children=(Text(value='--human', description='Let user play', disabled=True), Dropdown(description='Type:'…

In [19]:
human_opt = '-hu' if human_player.children[1].value else None

In [20]:
import hackatari.core as core

avail_mod_for_game = core._available_modifications(selected_game).split('*')[1:]
modifies_list = [mod.split(':') for mod in avail_mod_for_game]

modifies_selector = widgets.VBox([
        widgets.HBox([
            widgets.Label(value=mod_name, layout={'width': '10%'}),
            widgets.Label(value=mod_info, layout={'width': '50%'}),
            widgets.Dropdown(options=[True, False], value=False, description='Value:',layout={'width': '15%'}, justify_content = 'flex-end')
        ], justify_content = 'space-between') for mod_name, mod_info in modifies_list
    ], layout={'width': '75%'})

display(modifies_selector)

VBox(children=(HBox(children=(Label(value=' ball_x_prev', layout=Layout(width='10%')), Label(value="\n\tint([x…

In [21]:
mod_args = ' '.join([f'{mod.children[0].value.strip()}' for mod in modifies_selector.children if mod.children[2].value])
mod_args

'down_drift lazy_enemy speed_ball'

# Run Game

In [23]:
## prepare the command
eval_script = Path(r"scripts/eval.py")
run_script = Path(r"scripts/run.py")

notebook_dir = path.abspath("")
project_root = path.dirname(notebook_dir)
print(f"Project Root: {project_root}")

# use posix_paths compatibile with the OS
eval_path = (Path(project_root) / eval_script).as_posix()
run_path = (Path(project_root) / run_script).as_posix()

models_path = ' '.join( f'{model.as_posix()}' for model in models_list )

if human_opt:
    command = f"python {run_path} -g {selected_game} {human_opt}"
else:
    command = f"python {eval_path} -g {selected_game} -a {models_path} -m {mod_args}"

print("command: " + command)


Project Root: /mnt/c/Users/peng_/workspace/HackAtari
command: python /mnt/c/Users/peng_/workspace/HackAtari/scripts/eval.py -g Pong -a ../models/Pong/1/mdqn_modern_50M.gz ../models/Pong/2/c51_classic_50M.gz ../models/Pong/2/dqn_modern_50M.gz -m down_drift lazy_enemy speed_ball


In [25]:
# execute the command
!{command}

A.L.E: Arcade Learning Environment (version 0.10.2+c9d4b19)
[Powered by Stella]
Loaded agent from ../models/Pong/0/c51_classic_50M.gz
Episode 1: Reward = -21.0
Episode 2: Reward = -21.0
Episode 3: Reward = -21.0
Episode 4: Reward = -21.0
Episode 5: Reward = -20.0
Episode 6: Reward = -21.0
Episode 7: Reward = -21.0
Episode 8: Reward = -21.0
Episode 9: Reward = -21.0
Episode 10: Reward = -21.0

Summary:
Agent: ../models/Pong/0/c51_classic_50M.gz
Total Episodes: 10
Average Reward: -20.90
Standard Deviation: 0.30
Min Reward: -21.0
Max Reward: -20.0
--------------------------------------
Loaded agent from ../models/Pong/0/dqn_modern_50M.gz
Episode 1: Reward = -21.0
Episode 2: Reward = -21.0
Episode 3: Reward = -21.0
Episode 4: Reward = -21.0
Episode 5: Reward = -21.0
Episode 6: Reward = -21.0
Episode 7: Reward = -21.0
Episode 8: Reward = -21.0
Episode 9: Reward = -21.0
Episode 10: Reward = -21.0

Summary:
Agent: ../models/Pong/0/dqn_modern_50M.gz
Total Episodes: 10
Average Reward: -21.00
St

# Evaluation

In [24]:
# add automatic_evaluator module to the path
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    

In [25]:
from itertools import combinations
mod_list = mod_args.split(" ")

#mod_list = ['A','B','C']

# return all non-emtpy subsets
def subsets(fullset):    
    result = []
    for i in range(1, len(fullset) + 1):
        tmp = [ list(c) for c in combinations(fullset, i) ]
        result += tmp
        
    return result

mod_sublists = subsets(mod_list)

print(mod_sublists)

[['down_drift'], ['lazy_enemy'], ['speed_ball'], ['down_drift', 'lazy_enemy'], ['down_drift', 'speed_ball'], ['lazy_enemy', 'speed_ball'], ['down_drift', 'lazy_enemy', 'speed_ball']]


In [26]:
%reload_ext aim

!aim init -y # it will re-initialize empty aim repo.

%aim up

Re-initialized empty Aim repository at /mnt/c/Users/peng_/workspace/HackAtari/automatic_evaluator


In [27]:
%reload_ext autoreload

%autoreload 1

%aimport utils

# agent_paths = [model.as_posix() for model in models_list]
# for agent_path in agent_paths:
#     agent_name = "/".join(agent_path.split("/")[-2:])
#     print(agent_name)
        
for modifs in mod_sublists:
    utils.eval_withAimRun(selected_game, [ model.as_posix() for model in models_list], modifs)


A.L.E: Arcade Learning Environment (version 0.10.2+c9d4b19)
[Powered by Stella]


Loaded agent from ../models/Pong/1/mdqn_modern_50M.gz
--------------------------------------
Loaded agent from ../models/Pong/2/c51_classic_50M.gz
--------------------------------------
Loaded agent from ../models/Pong/2/dqn_modern_50M.gz
--------------------------------------
Done.
Loaded agent from ../models/Pong/1/mdqn_modern_50M.gz
--------------------------------------
Loaded agent from ../models/Pong/2/c51_classic_50M.gz
--------------------------------------
Loaded agent from ../models/Pong/2/dqn_modern_50M.gz
--------------------------------------
Done.
Loaded agent from ../models/Pong/1/mdqn_modern_50M.gz
--------------------------------------
Loaded agent from ../models/Pong/2/c51_classic_50M.gz
--------------------------------------
Loaded agent from ../models/Pong/2/dqn_modern_50M.gz
--------------------------------------
Done.
Loaded agent from ../models/Pong/1/mdqn_modern_50M.gz
--------------------------------------
Loaded agent from ../models/Pong/2/c51_classic_50M.gz
-