In [1]:
from baselines import deepq
from baselines import bench
from baselines import logger
from baselines.common.atari_wrappers import make_atari

def callback(lcl, _glb):
    # stop training if reward exceeds 199
    is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 10
    return is_solved


def main():
    logger.configure()
    env = make_atari('PongNoFrameskip-v4')
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)

    model = deepq.learn(
        env,
        "conv_only",
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True,
        lr=1e-4,
        total_timesteps=int(1e7),
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        checkpoint_path="pong",
        print_freq=10,
        callback=callback
    )
    
    print("Saving model to pong_model.pkl")
    model.save('pong_model.pkl')
    env.close()


In [2]:
# https://github.com/openai/baselines/blob/master/baselines/deepq/deepq.py

from baselines.common.atari_wrappers import LazyFrames
import numpy as np
import random

def sp_noise(lazy_frames, prob):
    '''
    Add salt and pepper noise to image
    prob: Probability of the noise
    '''
    images = []
    for image in lazy_frames._frames:
        output = np.zeros(image.shape,np.uint8)
        thres = 1 - prob
        for i in range(image.shape[0]):
            for j in range(image.shape[1]):
                rdn = random.random()
                if rdn < prob:
                    output[i][j] = 0
                elif rdn > thres:
                    output[i][j] = 255
                else:
                    output[i][j] = image[i][j]
        images.append(output)
    
    return LazyFrames(list(images))

In [3]:
import gym
from baselines import deepq


def callback(lcl, _glb):
    # stop training if reward exceeds 199
    is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
    return is_solved


def train_cartpole():
    env = gym.make("CartPole-v0")
    act = deepq.learn(
        env,
        network='mlp',
        lr=1e-3,
        total_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        checkpoint_path="cartpole",
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")

In [5]:
#train_cartpole()

In [None]:
import sys
sys.path.append('rl-generalization')
import sunblaze_envs

In [8]:
import gym
import sunblaze_envs

# Deterministic: the default version with fixed parameters
fixed_env = sunblaze_envs.make('SunblazeCartPole-v0')

# Random: parameters are sampled from a range nearby the default settings
random_env = sunblaze_envs.make('SunblazeCartPoleRandomNormal-v0')

# Extreme: parameters are sampled from an `extreme' range
extreme_env = sunblaze_envs.make('SunblazeCartPoleRandomExtreme-v0')

ModuleNotFoundError: No module named 'sunblaze_envs'

In [9]:
def callback(lcl, _glb):
    # stop training if reward exceeds 199
    is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
    return is_solved


def train_cartpole_random():
    env = sunblaze_envs.make('SunblazeCartPole-v0')
    act = deepq.learn(
        env,
        network='mlp',
        lr=1e-3,
        total_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        checkpoint_path="cartpole",
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")

In [10]:
train_cartpole_random()

NameError: name 'sunblaze_envs' is not defined