In [1]:
from causal_world.evaluation.evaluation import EvaluationPipeline
from causal_world.intervention_actors import RandomInterventionActorPolicy, GoalInterventionActorPolicy
from causal_world.benchmark.benchmarks import PUSHING_BENCHMARK, PICKING_BENCHMARK, \
    PICK_AND_PLACE_BENCHMARK, STACKING2_BENCHMARK
from causal_world.task_generators.task import generate_task
import causal_world.viewers.task_viewer as viewer
import causal_world.evaluation.visualization.visualiser as vis

from util import utils as utils, utils_baselines as utils_baselines
import argparse
import os

  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


In [2]:
def baseline_model(model_num, task):
    if task == 'pushing':
        benchmarks = utils.sweep('benchmarks', [PUSHING_BENCHMARK])
        task_configs = [{
            'task_configs': {
                'variables_space': 'space_a',
                'fractional_reward_weight': 1,
                'dense_reward_weights': [750, 250, 0]
            }
        }]
    elif task == 'picking':
        benchmarks = utils.sweep('benchmarks', [PICKING_BENCHMARK])
        task_configs = [{
            'task_configs': {
                'variables_space': 'space_a',
                'fractional_reward_weight': 1,
                'dense_reward_weights': [250, 0, 125,
                                         0, 750, 0,
                                         0, 0.005]
            }
        }]
    elif task == 'pick_and_place':
        benchmarks = utils.sweep('benchmarks', [PICK_AND_PLACE_BENCHMARK])
        task_configs = [{
            'task_configs': {
                'variables_space': 'space_a',
                'fractional_reward_weight': 1,
                'dense_reward_weights': [750, 50, 250, 0, 0.005]
            }
        }]
    elif task == 'stacking2':
        benchmarks = utils.sweep('benchmarks', [STACKING2_BENCHMARK])
        task_configs = [{
            'task_configs': {
                'variables_space': 'space_a',
                'fractional_reward_weight': 1,
                'dense_reward_weights': [750, 250,
                                         250, 125,
                                         0.005]
            }
        }]
    else:
        benchmarks = utils.sweep('benchmarks', [PUSHING_BENCHMARK])
        task_configs = [{
            'task_configs': {
                'variables_space': 'space_a',
                'fractional_reward_weight': 1,
                'dense_reward_weights': [750, 250, 0]
            }
        }]

    world_params = [{
        'world_params': {
            'skip_frame': 3,
            'enable_visualization': False,
            'observation_mode': 'structured',
            'normalize_observations': True,
            'action_mode': 'joint_positions'
        }
    }]

    net_layers = utils.sweep('NET_LAYERS', [[256, 256]])
    world_seed = utils.sweep('world_seed', [0])
    NUM_RANDOM_SEEDS = 5
    random_seeds = utils.sweep('seed', list(range(NUM_RANDOM_SEEDS)))

    ppo = {'num_of_envs': 20,
           'algorithm': 'PPO',
           'validate_every_timesteps': int(20000),
           'total_time_steps': int(10000000),
           'train_configs': {
               "gamma": 0.99,
               "n_steps": int(1200 / 20),
               "ent_coef": 0.01,
               "learning_rate": 0.00025,
               "vf_coef": 0.5,
               "max_grad_norm": 0.5,
               "nminibatches": 40,
               "noptepochs": 4
           }}

    sac = {'num_of_envs': 1,
           'algorithm': 'SAC',
           'validate_every_timesteps': int(500000),
           'total_time_steps': int(10000000),
           'train_configs': {
               "gamma": 0.95,
               "tau": 1e-3,
               "ent_coef": 1e-3,
               "target_entropy": 'auto',
               "learning_rate":  1e-4,
               "buffer_size": 1000000,
               "learning_starts": 1000,
               "batch_size": 256
           }}

    td3 = {'num_of_envs': 1,
           'algorithm': 'TD3',
           'validate_every_timesteps': int(500000),
           'total_time_steps': int(10000000),
           'train_configs': {
               "gamma": 0.96,
               "tau": 0.02,
               "learning_rate": 1e-4,
               "buffer_size": 500000,
               "learning_starts": 1000,
               "batch_size": 128}}

    algorithms = [ppo, sac]

    curriculum_kwargs_1 = {'intervention_actors': [], 'actives': []}
    curriculum_kwargs_2 = {
        'intervention_actors': [GoalInterventionActorPolicy()],
        'actives': [(0, 1e9, 1, 0)]
    }
    curriculum_kwargs_3 = {
        'intervention_actors': [RandomInterventionActorPolicy()],
        'actives': [(0, 1e9, 1, 0)]
    }
    curriculum_kwargs = [
        curriculum_kwargs_1, curriculum_kwargs_2, curriculum_kwargs_3
    ]

    return utils.outer_product([
        benchmarks, world_params, task_configs, algorithms, curriculum_kwargs,
        random_seeds, world_seed, net_layers
    ])[model_num]


In [3]:
parser = argparse.ArgumentParser()
parser.add_argument("--model_num",
                    required=True,
                    default=0,
                    help="model number")
parser.add_argument("--task",
                    required=True,
                    default='pushing',
                    help="possible tasks: pushing, picking, pick_and_place, stacking2")
parser.add_argument("--output_path", required=True, help="output path")
# parser.add_argument('--tensorboard', help="tensorboard logging")

tensorboard_logging = True


In [4]:
model_num = 0
task = "pushing"
output_path = "experiments-pushing"
output_path = os.path.join(output_path, str(model_num))
try:
    os.makedirs(output_path)
except FileExistsError:
    print("Folder '{}' already exists. Will try to load existing checkpoints".format(output_path))

model_settings = baseline_model(model_num, task)

model = utils_baselines.train_model(model_settings, output_path, tensorboard_logging)

Folder 'experiments-pushing\0' already exists. Will try to load existing checkpoints
Folder 'experiments-pushing\0\model' already exists
-------------------------------------
| approxkl           | 0.008247341  |
| clipfrac           | 0.11430417   |
| ep_len_mean        | 834          |
| ep_reward_mean     | -0.769       |
| explained_variance | -0.0653      |
| fps                | 1274         |
| n_updates          | 1            |
| policy_entropy     | 12.793945    |
| policy_loss        | -0.008667563 |
| serial_timesteps   | 6000         |
| time_elapsed       | 0            |
| total_timesteps    | 120000       |
| value_loss         | 0.06555984   |
-------------------------------------
-------------------------------------
| approxkl           | 0.007918061  |
| clipfrac           | 0.10838954   |
| ep_len_mean        | 834          |
| ep_reward_mean     | -0.553       |
| explained_variance | 0.112        |
| fps                | 1265         |
| n_updates          | 2   