In [1]:
import sys
import os
os.chdir('..')

import argparse
import logging.config
import os

import numpy as np
import ray
import torch
from torch.utils.tensorboard import SummaryWriter

from core.test import test
from core.train import train
from core.utils import init_logger, make_results_dir, set_seed

  from .autonotebook import tqdm as notebook_tqdm
2024-07-09 08:51:02,482	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
parser = argparse.ArgumentParser(description='EfficientZero')

In [3]:
parser

ArgumentParser(prog='ipykernel_launcher.py', usage=None, description='EfficientZero', formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)

In [10]:
args = parser.parse_args([])
args.device = 'cpu'
args.env = 'BreakoutNoFrameskip-v4'
args.case = 'atari'
args.opr = 'train'
args.amp_type = 'none' #'torch_amp'
args.no_cuda = True
args.render = True
args.save_video = True
args.num_gpus = 0
args.num_cpus = 4
args.result_dir = '../results'
args.model_path = '../results/test_model.p'
args.debug = True
args.cpu_actor = 4
args.gpu_actor = 0
args.p_mcts_num = 2

# defaults
args.seed = 0
args.revisit_policy_search_rate = .99
args.use_root_value = False
args.use_priority = False
args.use_max_priority = False
args.test_episodes = 10
args.use_augmentation = True
args.augmentation = ['shift', 'intensity']
args.info = 'none'
args.load_model = False
args.object_store_memory = 150 * 1024 * 1024 * 1024

In [11]:
if args.opr == 'train':
    ray.init(num_gpus=args.num_gpus, num_cpus=args.num_cpus,
            ignore_reinit_error=True,
            # object_store_memory=args.object_store_memory
            )
else:
    ray.init()

RuntimeError: Maybe you called ray.init twice by accident? This error can be suppressed by passing in 'ignore_reinit_error=True' or by calling 'ray.shutdown()' prior to 'ray.init()'.

In [6]:
# ray.init()
# ray.shutdown()

In [7]:
# seeding random iterators
set_seed(args.seed)

# import corresponding configuration , neural networks and envs
if args.case == 'atari':
    from config.atari import game_config
else:
    raise Exception('Invalid --case option')

# set config as per arguments
exp_path = game_config.set_config(args)
exp_path, log_base_path = make_results_dir(exp_path, args)

# set-up logger
init_logger(log_base_path)
logging.getLogger('train').info('Path: {}'.format(exp_path))
logging.getLogger('train').info('Param: {}'.format(game_config.get_hparams()))

device = game_config.device

A.L.E: Arcade Learning Environment (version 0.8.1+ba84c14)
[Powered by Stella]
[2024-07-09 08:51:05,989][train][INFO][3914971446.py><module>] ==> Path: ../results/atari/none/BreakoutNoFrameskip-v4/seed=0/Tue Jul  9 08:51:05 2024
INFO:train:Path: ../results/atari/none/BreakoutNoFrameskip-v4/seed=0/Tue Jul  9 08:51:05 2024
[2024-07-09 08:51:05,989][train][INFO][3914971446.py><module>] ==> Path: ../results/atari/none/BreakoutNoFrameskip-v4/seed=0/Tue Jul  9 08:51:05 2024
[2024-07-09 08:51:05,990][train][INFO][3914971446.py><module>] ==> Param: {'action_space_size': np.int64(4), 'num_actors': 1, 'do_consistency': True, 'use_value_prefix': True, 'off_correction': True, 'gray_scale': False, 'auto_td_steps_ratio': 0.3, 'episode_life': True, 'change_temperature': True, 'init_zero': True, 'state_norm': False, 'clip_reward': True, 'random_start': True, 'cvt_string': True, 'image_based': True, 'max_moves': 3000, 'test_max_moves': 3000, 'history_length': 400, 'num_simulations': 50, 'discount': 0.9

In [8]:
try:
    if args.opr == 'train':
        summary_writer = SummaryWriter(exp_path, flush_secs=10)
        if args.load_model and os.path.exists(args.model_path):
            model_path = args.model_path
        else:
            model_path = None
        model, weights = train(game_config, summary_writer, model_path)
        model.set_weights(weights)
        total_steps = game_config.training_steps + game_config.last_steps
        test_score, _, test_path = test(game_config, model.to(device), total_steps, game_config.test_episodes, device, render=False, save_video=args.save_video, final_test=True, use_pb=True)
        mean_score = test_score.mean()
        std_score = test_score.std()

        test_log = {
            'mean_score': mean_score,
            'std_score': std_score,
        }
        for key, val in test_log.items():
            summary_writer.add_scalar('train/{}'.format(key), np.mean(val), total_steps)

        test_msg = '#{:<10} Test Mean Score of {}: {:<10} (max: {:<10}, min:{:<10}, std: {:<10})' \
                    ''.format(total_steps, game_config.env_name, mean_score, test_score.max(), test_score.min(), std_score)
        logging.getLogger('train_test').info(test_msg)
        if args.save_video:
            logging.getLogger('train_test').info('Saving video in path: {}'.format(test_path))
    elif args.opr == 'test':
        assert args.load_model
        if args.model_path is None:
            model_path = game_config.model_path
        else:
            model_path = args.model_path
        assert os.path.exists(model_path), 'model not found at {}'.format(model_path)

        model = game_config.get_uniform_network().to(device)
        model.load_state_dict(torch.load(model_path, map_location=torch.device(device)))
        test_score, _, test_path = test(game_config, model, 0, args.test_episodes, device=device, render=args.render, save_video=args.save_video, final_test=True, use_pb=True)
        mean_score = test_score.mean()
        std_score = test_score.std()
        logging.getLogger('test').info('Test Mean Score: {} (max: {}, min: {})'.format(mean_score, test_score.max(), test_score.min()))
        logging.getLogger('test').info('Test Std Score: {}'.format(std_score))
        if args.save_video:
            logging.getLogger('test').info('Saving video in path: {}'.format(test_path))
    else:
        raise Exception('Please select a valid operation(--opr) to be performed')
    ray.shutdown()
except Exception as e:
    logging.getLogger('root').error(e, exc_info=True)




[36m(autoscaler +7s)[0m Tip: use `ray status` to view detailed cluster status. To disable these messages, set RAY_SCHEDULER_EVENTS=0.
[33m(autoscaler +7s)[0m Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.25}. Add suitable node types to this cluster to resolve this issue.
[33m(autoscaler +7s)[0m Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.125}. Add suitable node types to this cluster to resolve this issue.
[33m(autoscaler +42s)[0m Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.25}. Add suitable node types to this cluster to resolve this issue.
[33m(autoscaler +42s)[0m Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.125}. Add suitable node types to this cluster to resolve this issue.
[33m(autoscaler +1m17s)[0m Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.25}. Add suitable node types to this cluster 

KeyboardInterrupt: 

In [9]:
! ray status

Node status
---------------------------------------------------------------
Active:
 1 node_08bcb64f6d23aab40bc8335d8d222aa016ebd7c198871993bac8147f
Pending:
 (no pending nodes)
Recent failures:
 (no failures)

Resources
---------------------------------------------------------------
Usage:
 0.0/4.0 CPU
 0B/15.05GiB memory
 64.58MiB/2.00GiB object_store_memory

Demands:
 {'CPU': 1.0, 'GPU': 0.125}: 1+ pending tasks/actors
 {'CPU': 1.0, 'GPU': 0.25}: 1+ pending tasks/actors
[0m