In [1]:
%load_ext tensorboard

import math
import gym 
import plotly.express as px
import numpy as np
import warnings
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import random
from collections import Counter, deque
import pandas as pd
from sklearn.model_selection import train_test_split
import datetime
from sklearn.metrics import confusion_matrix
import plotly.figure_factory as ff

import os
import glob
from IPython import display as ipythondisplay
from tqdm.notebook import tqdm
from gym.wrappers import Monitor
from IPython.display import HTML
import base64
import io
import pickle
import torch
from torch import nn
import kornia.augmentation as kaug
import imageio
warnings.filterwarnings("ignore")

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
from pyvirtualdisplay import Display
display = Display(visible=False, size=(1400, 900))
if torch.cuda.is_available():
    display.start()

In [4]:
import os, sys, copy, argparse, shutil
def parse_arguments():
    parser = argparse.ArgumentParser(description='Deep Q Network Argument Parser')
    parser.add_argument('--seed', dest='seed', type=int, default=1)
    parser.add_argument('--env', dest='env', type=str, default='CartPole-v0')
    parser.add_argument('--save_interval', type=int, default=50, help='save model every n episodes')
    parser.add_argument('--log_interval', type=int, default=10, help='logging every n episodes')
    parser.add_argument('--render', help='render', type=int, default=1)
    parser.add_argument('--batch_size', help='batch_size', type=int, default=32)
    parser.add_argument('--train_freq', help='train_frequency', type=int, default=1)
    parser.add_argument('--max_episode', help='maximum episode', type=int, default=None)
    parser.add_argument('--max_timesteps', help='maximum timestep', type=int, default=100000000)
    parser.add_argument('--lr', dest='lr', type=float, default=0.00025)
    parser.add_argument('--lr_decay', action='store_true', help='decay learning rate')
    parser.add_argument('--gamma', help='discount_factor', type=float, default=0.99)
    parser.add_argument('--warmup_mem', type=int, help='warmup memory size', default=1000)
    parser.add_argument('--frame_skip', type=int, help='number of frames to skip for each action', default=3)
    parser.add_argument('--frame_stack', type=int, help='number of frames to stack', default=4)
    parser.add_argument('--memory', help='memory size', type=int, default=1000000)
    parser.add_argument('--initial_epsilon', '-ie', help='initial_epsilon', type=float, default=0.5)
    parser.add_argument('--final_epsilon', '-fe', help='final_epsilon', type=float, default=0.05)
    parser.add_argument('--max_epsilon_decay_steps', '-eds', help='maximum steps to decay epsilon', type=int, default=100000)
    parser.add_argument('--max_grad_norm', type=float, default=None, help='maximum gradient norm')
    parser.add_argument('--soft_update', '-su', action='store_true', help='soft update target network')
    parser.add_argument('--double_q', '-dq', action='store_true', help='enabling double DQN')
    parser.add_argument('--dueling_net', '-dn', action='store_true', help='enabling dueling network')
    parser.add_argument('--test', action='store_true', help='test the trained model')
    parser.add_argument('--tau', type=float, default=0.01, help='tau for soft target network update')
    parser.add_argument('--hard_update_freq', '-huf', type=int, default=2000, help='hard target network update frequency')
    parser.add_argument('--save_dir', type=str, default='./data')
    parser.add_argument('--resume_step', '-rs', type=int, default=None)
    return parser.parse_args()

In [5]:
#@title Set up constants for env and training
test = False 
save_dir = './data'
render = False
max_episode = None
max_timesteps = 100000000


In [6]:
#@title Augmentations
'''
color_jitter
random_elastic_transform
random_fisheye
random_color_equalize
random_gaussian_blur
random_gaussian_noise
random_horizontal_flip
random_color_invert
random_perspective_shift
random_shift
'''
color_jitter = kaug.ColorJitter(
        brightness=np.random.random(),
        contrast=np.random.random(),
        saturation=np.random.random(),
        hue=np.random.random(),
        p=0.5
        )
random_elastic_transform = kaug.RandomElasticTransform()
random_fisheye = kaug.RandomFisheye(
        center_x=torch.tensor([-.3, .3]).to(device),
        center_y=torch.tensor([-.3, .3]).to(device),
        gamma=torch.tensor([.9, 1.]).to(device),
        )
# need to divide by 255.0
random_color_equalize = lambda obs: kaug.RandomEqualize()(obs / 255.) * 255
random_gaussian_blur = kaug.RandomGaussianBlur(
        kernel_size=(9, 9),
        sigma = (5., 5.)
        )
random_gaussian_noise = kaug.RandomGaussianNoise()
random_horizontal_flip = kaug.RandomHorizontalFlip()
random_color_invert = kaug.RandomInvert()
random_perspective_shift = kaug.RandomPerspective()
get_random_shift = lambda h, w, shift_by: nn.Sequential(kaug.RandomCrop((h - shift_by, w - shift_by)), nn.ReplicationPad2d(20), kaug.RandomCrop((h - shift_by, w - shift_by)))

In [7]:
def tie_weights(src, trg):
    assert type(src) == type(trg)
    trg.weight = src.weight
    trg.bias = src.bias

def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
    from math import floor
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
    w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
    return h, w

# for 84 x 84 inputs
OUT_DIM = {2: 39, 4: 35, 6: 31}
# for 64 x 64 inputs
OUT_DIM_64 = {2: 29, 4: 25, 6: 21}

''' TODO change the layer parameters ''' 
class PixelEncoder(nn.Module):
    """Convolutional encoder of pixels observations."""
    def __init__(self, obs_shape, feature_dim=48, num_layers=3, num_filters=64, output_logits=False):
        super().__init__()

        assert len(obs_shape) == 3
        self.obs_shape = (obs_shape[2], obs_shape[0], obs_shape[1])
        self.feature_dim = feature_dim
        self.num_layers = num_layers
        
        # 160, 210, 3
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, stride=5)
        conv1_shape = conv_output_shape(self.obs_shape[1:], kernel_size=5, stride=5)
        # Input to conv2: 32, 42, 32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        conv2_shape = conv_output_shape(conv1_shape, kernel_size=4, stride=2)
        # Input to conv3: 15, 20, 64
        self.conv3 = nn.Conv2d(64, 64, kernel_size=4, stride=1)
        conv3_shape = conv_output_shape(conv2_shape, kernel_size=4, stride=1)
        # Output from conv3: 12, 17, 64

        # out_dim = OUT_DIM_64[num_layers] if obs_shape[-1] == 64 else OUT_DIM[num_layers]
        out_dims = conv3_shape
        self.fc = nn.Linear(num_filters * out_dims[0] * out_dims[1], self.feature_dim)
        self.ln = nn.LayerNorm(self.feature_dim)

        self.outputs = dict()
        self.output_logits = output_logits

    def reparameterize(self, mu, logstd):
        std = torch.exp(logstd)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward_conv(self, obs):
        self.outputs['obs'] = obs
        conv1 = torch.relu(self.conv1(obs))
        self.outputs['conv1'] = conv1
        conv2 = torch.relu(self.conv2(conv1))
        self.outputs['conv2'] = conv2
        conv3 = torch.relu(self.conv3(conv2))
        self.outputs['conv3'] = conv3

        h = conv3.reshape(conv3.size(0), -1)
        return h

    def forward(self, obs, detach=False):
        h = self.forward_conv(obs)

        if detach:
            h = h.detach()

        h_fc = self.fc(h)
        self.outputs['fc'] = h_fc

        h_norm = self.ln(h_fc)
        self.outputs['ln'] = h_norm

        if self.output_logits:
            out = h_norm
        else:
            out = torch.tanh(h_norm)
            self.outputs['tanh'] = out

        return out

    def copy_conv_weights_from(self, source):
        """Tie convolutional layers"""
        # only tie conv layers
        for i in range(self.num_layers):
            tie_weights(src=source.convs[i], trg=self.convs[i])

    def log(self, L, step, log_freq):
        if step % log_freq != 0:
            return

        for k, v in self.outputs.items():
            L.log_histogram('train_encoder/%s_hist' % k, v, step)
            if len(v.shape) > 2:
                L.log_image('train_encoder/%s_img' % k, v[0], step)

        for i in range(self.num_layers):
            L.log_param('train_encoder/conv%s' % (i + 1), self.convs[i], step)
        L.log_param('train_encoder/fc', self.fc, step)
        L.log_param('train_encoder/ln', self.ln, step)

In [8]:
def wrap_env(env, train=True):
    suffix = 'train' if train else 'test'
    monitor_dir = os.path.join(save_dir, 'monitor_%s' % suffix)
    os.makedirs(monitor_dir, exist_ok=True)
    if not train:
        video_save_interval = 10
        env = Monitor(env, directory=monitor_dir,
                      video_callable=lambda episode_id: episode_id % video_save_interval == 0,
                      force=True)
    else:
        if render:
            if max_episode is not None:
                video_save_interval = int(max_episode / 3)
            else:
                video_save_interval = int(max_timesteps / float(env._max_episode_steps) / 3)
            env = Monitor(env, directory=monitor_dir,
                          video_callable=lambda episode_id: episode_id % video_save_interval == 0,
                          force=True)
        else:
            env = Monitor(env, directory=monitor_dir, video_callable=False, force=True)
    return env

In [9]:
class ReplayMemory(object):
    def __init__(self, max_epi_num=2000, max_epi_len=200, obs_shape=(210, 160)):
        # capacity is the maximum number of steps in memory
        self.max_epi_num = max_epi_num
        self.max_epi_len = max_epi_len
        # saves each tuple of (state, action, next state, reward, speeds)
        self.capacity = 250 # self.max_epi_num * max_epi_len
        self.idx = 0
        self.obs_memory = np.zeros((self.capacity, *obs_shape, 3)) # deque(maxlen=self.max_epi_num * max_epi_len)
        self.next_memory = np.zeros((self.capacity, *obs_shape, 3))
        self.act_memory = np.zeros((self.capacity, 1))
        self.reward_memory = np.zeros((self.capacity, 1))
        self.speeds_memory = np.zeros((self.capacity, 2))
        self.is_av = False
        self.current_epi = 0

    def reset(self):
        self.current_epi = 0
        self.memory.clear()

    ''' deprecated for tuple buffer '''
    def create_new_epi(self):
        pass

    def remember(self, state, next_state, action, reward, speeds):
        idx = self.idx % self.capacity
        self.obs_memory[idx] = state.copy()
        self.next_memory[idx] = next_state.copy()
        self.act_memory[idx] = action
        self.reward_memory[idx] = reward
        self.speeds_memory[idx] = speeds
        self.idx += 1
        
        '''
        if len(self.memory) < self.capacity:
            new_sample = np.array([state, action, reward, next_state])
            if len(self.memory) == 0:
                self.memory = [new_sample]
            else:
                length = len(self.memory)
                self.memory.append(new_sample)
        '''
                
    # samples batch_size
    def sample(self, batch_size):
        if batch_size < self.idx:
            idx = np.random.randint(0, min(self.idx, self.capacity) - 1, batch_size)
            return self.obs_memory[idx], self.next_memory[idx], self.act_memory[idx], self.reward_memory[idx], self.speeds_memory[idx]
        return self.obs_memory[:self.idx], self.next_memory[:self.idx], self.act_memory[:self.idx], self.reward_memory[:self.idx], self.speeds_memory[:self.idx]

    def size(self):
        return self.idx

    def is_available(self):
        self.is_av = True
        if self.idx <= 1:
            self.is_av = False
        return self.is_av

    def print_info(self):
        pass

In [10]:
#@title Create a training conv agent
import torch.nn.functional as F

class DQNetworkConv(nn.Module):
    def __init__(self, in_channels, act_dim, dueling=False):
        super(DQNetworkConv, self).__init__()
        self.act_dim = act_dim
        self.dueling = dueling
        # 160, 210, 3 
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=5, stride=5)
        # Input to conv2: 32, 42, 32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        # Input to conv3: 15, 20, 64
        self.conv3 = nn.Conv2d(64, 64, kernel_size=4, stride=1)
        # Output from conv3: 12, 17, 64
        if self.dueling:
            self.v_fc4 = nn.Linear(12 * 17 * 64, 512)
            self.adv_fc4 = nn.Linear(12 * 17 * 64, 512)
            self.v_fc5 = nn.Linear(512, 1)
            self.adv_fc5 = nn.Linear(512, self.act_dim)
        else:
            self.fc4 = nn.Linear(12 * 17 * 64, 512)
            self.fc5 = nn.Linear(512, self.act_dim)
        self.parameters = (list(self.conv1.parameters())) + (list(self.conv2.parameters())) + (list(self.conv3.parameters())) + (list(self.fc4.parameters())) + (list(self.fc5.parameters()))

    def forward(self, st):
        out = F.relu(self.conv1(st))
        out = F.relu(self.conv2(out))
        out = F.relu(self.conv3(out))
        out = out.view(out.size(0), -1)
        if self.dueling:
            val = F.relu(self.v_fc4(out))
            adv = F.relu(self.adv_fc4(out))
            val = self.v_fc5(val)
            adv = self.adv_fc5(adv)
            out = val.expand_as(adv) + adv - adv.mean(-1, keepdim=True).expand_as(adv)
        else:
            out = F.relu(self.fc4(out))
            out = self.fc5(out)
        return out

In [11]:
#@title Create a training FC agent
import torch.nn.functional as F

class DQNetworkFC(nn.Module):
    def __init__(self, z_dim, act_dim, dueling=False):
        super(DQNetworkFC, self).__init__()
        self.act_dim = act_dim
        self.input_dim = z_dim 
        self.dueling = dueling
        if self.dueling:
            self.v_fc1 = nn.Linear(z_dim, 512)
            self.adv_fc1 = nn.Linear(z_dim, 512)
            self.v_fc2 = nn.Linear(512, 1)
            self.adv_fc2 = nn.Linear(512, 256)
            self.v_fc3 = nn.Linear(256, 1)
            self.adv_fc3 = nn.Linear(256, self.act_dim)
        else:
            self.fc1 = nn.Linear(z_dim, 512)
            self.fc2 = nn.Linear(512, 256)
            self.fc3 = nn.Linear(256, self.act_dim)

    def forward(self, st):
        out = F.relu(self.fc1(st))
        out = F.relu(self.fc2(out))
        ''' Do we need a relu on the last layer if the output is probability over action space? '''
        out = F.relu(self.fc3(out))
        return out

In [12]:
def process_obs(obs, divide=True, unsqueeze_not_4=True):
    obs = torch.Tensor(obs / 255. if divide else obs)
    if len(obs.shape) < 4:
        if unsqueeze_not_4:
            obs = obs.unsqueeze(0)
            obs = obs.permute(0, 3, 1, 2)
        else:
            obs = obs.permute(2, 0, 1)
    else:
        obs = obs.permute(0, 3, 1, 2)
    return obs.to(device)

In [13]:
def take_action(env, action):
    state, rew, done, _ = env.step(action)
    obs = env.render(mode='rgb_array')
    return obs, rew, done, state

In [14]:
MAX_STEPS = 200

In [15]:
class CURL(nn.Module):
    """
    CURL
    """

    def __init__(self, obs_shape, z_dim, batch_size, encoder, output_type="continuous", critic=None, critic_target=None):
        super(CURL, self).__init__()
        self.obs_shape = obs_shape
        self.batch_size = batch_size

        # self.encoder = critic.encoder
        self.encoder = encoder 

        # self.encoder_target = critic_target.encoder 
        self.fc1 = nn.Linear(100, 50)
        self.fc2 = nn.Linear(50, 1)

        # self.W = nn.Parameter(torch.rand(z_dim, z_dim))
        self.output_type = output_type

    def encode(self, x, detach=False, ema=False):
        """
        Encoder: z_t = e(x_t)
        :param x: x_t, x y coordinates
        :return: z_t, value in r2
        """
        if ema:
            with torch.no_grad():
                z_out = self.encoder_target(x)
        else:
            z_out = self.encoder(x)

        if detach:
            z_out = z_out.detach()
        return z_out

    def compute_logits(self, z_a, z_mod):
        """
        Uses logits trick for CURL:
        - compute (B,B) matrix z_a (W z_pos.T)
        - positives are all diagonal elements
        - negatives are all other elements
        - to compute loss use multiclass cross entropy with identity matrix for labels
        """
#         Wz = torch.matmul(self.W, z_mod.T)  # (z_dim,B)
#         logits = torch.matmul(z_a, Wz)  # (B,B)
#         logits = logits - torch.max(logits, 1)[0][:, None]
#         return logits
        input_zs = torch.cat([z_a, z_mod], 1)
        logits = F.relu(self.fc1(input_zs))
        logits = F.sigmoid(self.fc2(logits))
        return logits

In [16]:
#@title Generate a batch of negatively labelled examples given observations

def generate_negatives(obs):
    neg_idx = np.random.randint(len(obs), size=len(obs))
    pos_idx = np.arange(len(obs))
    resample = (neg_idx == pos_idx)
    for (i, r) in enumerate(resample):
        if r:
            idx = neg_idx[i]
        else:
            idx = np.random.randint(0, len(obs), 1)[0]
            while idx == i:
                idx = np.random.randint(0, len(obs), 1)[0]
        neg_idx[i] = idx
    return (obs[neg_idx]).copy()


In [17]:
#@title Create a training agent (wrapper for conv agent)

GAMMA = 0.99

class Agent(object):
    def __init__(self, act_dim, in_channels=3, max_epi_num=50, max_epi_len=300, CURL=None, aug=None, conv_net=False, random_shift=None):
        self.N_action = act_dim
        self.max_epi_num = max_epi_num
        self.max_epi_len = max_epi_len
        ''' To decide when to copy weights to the target network '''
        self.num_param_updates = 0
        self.CURL = CURL
        self.aug = aug
        self.random_shift = random_shift
        if conv_net:
            self.conv_net = DQNetworkConv(in_channels, act_dim).to(device)
            self.target = DQNetworkConv(in_channels, act_dim).to(device)
        else:
            ''' if using the encoder head for contrastive loss '''
            self.conv_net = DQNetworkFC(self.CURL.encoder.feature_dim + 2, act_dim).to(device)
            self.target = DQNetworkFC(self.CURL.encoder.feature_dim + 2, act_dim).to(device)
        self.buffer = ReplayMemory(max_epi_num=self.max_epi_num, max_epi_len=self.max_epi_len, obs_shape=CURL.obs_shape[:2])
        self.gamma = 0.99
        self.loss_fn = torch.nn.MSELoss()
        self.optimizer = torch.optim.Adam(
            list(self.conv_net.parameters()) + 
            list(self.CURL.parameters()) + 
            list(self.CURL.encoder.parameters()), lr=1e-3)

    def remember(self, state, action, reward, next_state, speeds):
        self.buffer.remember(state, next_state, action, reward, speeds)

    ''' Copy the weights to the target network every 100 updates '''
    def train(self, batch_size=32, target_update_freq=100, use_encoder=True):
        if self.buffer.is_available():
            obs, next_obs, action_list, reward_list, speeds = self.buffer.sample(batch_size)
            
            ''' Pass through the encoder to get encodings
             If also training the contrastive loss
             include that here! 
             1. data augmentation to create pos and negative pairs
             2. encoder 
             3. update encoder loss function (using a separate optimizer) or add to the loss computed below
            '''

            losses = []

            # check if obs is a numpy or a torch tensor
            if use_encoder:
                speeds = torch.Tensor(speeds).to(device)
                obs_anchor = self.random_shift(process_obs(obs.copy()))
                obs_pos = self.random_shift(process_obs(obs.copy()))
                mixed_obs = generate_negatives(obs)
                mixed_obs = process_obs(mixed_obs)
                obs_neg = self.random_shift(mixed_obs)
                if self.aug is not None:
                    obs_pos = self.aug(obs_pos)
                    obs_neg = self.aug(obs_neg)
                z_a = torch.cat([self.CURL.encode(obs_anchor), speeds], dim=1)
                z_pos = torch.cat([self.CURL.encode(obs_pos), speeds], dim=1)
                # Mix pairs to generate negative labels
                z_neg = torch.cat([self.CURL.encode(obs_neg), speeds], dim=1)
                next_obs = self.random_shift(process_obs(next_obs.copy()))
                z_next = torch.cat([self.CURL.encode(next_obs), speeds], dim=1)

                # logits = self.CURL.compute_logits(z_a, z_pos)
                # labels = torch.arange(logits.shape[0]).long().to(device)
                pos_logits = self.CURL.compute_logits(z_a, z_pos)
                neg_logits = self.CURL.compute_logits(z_a, z_neg)
                # [32, 32]
                pos_labels = torch.ones((pos_logits.shape[0], 1))# .long()
                neg_labels = torch.zeros((neg_logits.shape[0], 1))# .long() 
                # TODO: stack pos and neg logits and labels (double check dim)
                logits = torch.cat([pos_logits, neg_logits], 0)
                labels = torch.cat([pos_labels, neg_labels], 0)
                
                # pass into the loss function
                # encoding_loss = nn.CrossEntropyLoss()(logits, labels)
                encoding_loss = nn.BCELoss()(logits, labels)

                ''' Combine encoding loss with rl loss below '''
                losses.append(encoding_loss)
                # Then pass that encoding through the conv_net to get Q value estimates
                Qs = self.conv_net(z_a)
                next_Qs = self.target(z_next).detach().max(1)[0]
            
            else:
                ''' If not using the encoder, pass the obs directly to the CNN '''
                obs = process_obs(obs)
                # estimate current q values from observations
                Qs = self.conv_net(obs)
                # find next max q values based on next observations
                next_Qs = self.target(next_obs).detach().max(1)[0]
            
            ''' find target q values ''' 
            next_Qs = next_Qs.cpu().numpy() 
            Qs = torch.gather(Qs, dim=1, index=torch.tensor(action_list, dtype=torch.int64).to(device)).float().to(device)
            target_Qs = torch.tensor(reward_list.squeeze(-1) + GAMMA * next_Qs).float().to(device)
            ''' try to set Qs equal to target_Qs '''
            q_loss = self.loss_fn(Qs, target_Qs)
            losses.append(q_loss)
            
            ''' Loss update for q network and encoder head '''
            losses = torch.stack(losses).sum()
            self.optimizer.zero_grad()
            losses.backward()
            self.optimizer.step()

            self.num_param_updates += 1
            if self.num_param_updates % target_update_freq == 0:
                self.target.load_state_dict(self.conv_net.state_dict())

    # TODO: check the sizes of inputs and outputs
    def get_action(self, obs, epsilon, speeds, use_encoding=True):
        ''' 
         If using an encoder, need to pass that thorugh the encoder
         then use the encoding to pass through self.conv_net
        '''
        # obs = torch.tensor(obs)
        if use_encoding:
            speed_tensor = torch.Tensor((speeds,)).to(device)
            obs = self.random_shift(process_obs(obs.copy()))
            obs = torch.cat([self.CURL.encode(obs, detach=True), speed_tensor], dim=1)

        # Dividing obs by 255 is handled in encoder forward (only needed for use_encoding=False)
        if len(obs.shape) == 1:
            obs = obs.unsqueeze(0)

        # epsilon greedy for selecting which action to take
        if random.random() > epsilon:
            qs = self.conv_net(obs)
            action = qs[0].argmax().data.item()
        else:
            action = random.randint(0, self.N_action-1)

        return action

def get_decay(epi_iter):
    decay = math.pow(0.999, epi_iter)
    if decay < 0.2:
        decay = 0.2
    return decay

In [18]:
def main(aug=None, train_curve_filename_prefix="default_curl"):
    env = gym.make('CartPole-v1')
    env.reset()
    max_epi_iter = 1000
    max_MC_iter = 200
    obs = env.render(mode='rgb_array')
    obs_shape = obs.shape
    shift_by = 100
    random_shift = get_random_shift(*obs_shape[:2], shift_by)
    cropped_obs_shape = (obs_shape[0] - shift_by, obs_shape[1] - shift_by, obs_shape[2])
    encoder = PixelEncoder(obs_shape=cropped_obs_shape, num_layers=2, num_filters=64, output_logits=False).to(device)
    CURL_encoder = CURL(obs_shape=obs_shape, z_dim=50, batch_size=1, encoder=encoder, output_type="continuous").to(device)
    agent = Agent(act_dim=env.action_space.n, max_epi_num=max_epi_iter, max_epi_len=max_MC_iter, CURL=CURL_encoder, aug=aug, random_shift=random_shift)
    train_curve = []
    steps_curve = []
    for epi_iter in range(max_epi_iter):
        random.seed()
        env.reset()
        obs = env.render(mode='rgb_array')
        returns = 0.0
        steps = 1
        speeds = (0, 0)
        for MC_iter in range(max_MC_iter):
            action = agent.get_action(obs, get_decay(epi_iter), speeds)
            next_obs, reward, done, state = take_action(env, action)
            returns += reward * agent.gamma ** (MC_iter)
            speeds = (state[1], state[3])
            agent.remember(obs, action, reward, next_obs, speeds)
            obs = next_obs.copy()
            if done or MC_iter >= max_MC_iter-1:
                agent.buffer.create_new_epi()
                steps = MC_iter
                break
        print('Episode', epi_iter, 'returns', returns, 'after', steps, 'timesteps')
        if epi_iter % 1 == 0:
            train_curve.append(returns)
            steps_curve.append(steps)
        if epi_iter % 100 == 0:
            print(f"Saving at episode {epi_iter}")
            np.save(f'{train_curve_filename_prefix}_speed-scaffold_cartpole_{max_MC_iter}MC_{max_epi_iter}E_returns', np.array(train_curve))
            np.save(f'{train_curve_filename_prefix}_speed-scaffold_cartpole_{max_MC_iter}MC_{max_epi_iter}E_steps', np.array(steps_curve))
        if agent.buffer.is_available():
            for _ in range(1):
                agent.train()
    env.close()
    np.save(f'{train_curve_filename_prefix}_speed-scaffold_cartpole_{max_MC_iter}MC_{max_epi_iter}E_returns', np.array(train_curve))
    np.save(f'{train_curve_filename_prefix}_speed-scaffold_cartpole_{max_MC_iter}MC_{max_epi_iter}E_steps', np.array(steps_curve))
    print(train_curve)

In [19]:
main()
main(color_jitter, "color_jitter")
# main(random_elastic_transform, "random_elastic_transform")
main(random_fisheye, "random_fisheye")
main(random_color_equalize, "random_color_equalize")
main(random_gaussian_blur, "random_gaussian_blur")
main(random_gaussian_noise, "random_gaussian_noise")
main(random_horizontal_flip, "random_horizontal_flip")
main(random_color_invert, "random_color_invert")
main(random_perspective_shift, "random_perspective_shift")

Episode 0 returns 10.466174574128356 after 10 timesteps
Saving at episode 0
Episode 1 returns 18.20930624027691 after 19 timesteps
Episode 2 returns 43.6094809547612 after 56 timesteps
Episode 3 returns 12.247897700103202 after 12 timesteps
Episode 4 returns 9.561792499119552 after 9 timesteps
Episode 5 returns 20.63857163563444 after 22 timesteps
Episode 6 returns 19.836941046095397 after 21 timesteps
Episode 7 returns 10.466174574128356 after 10 timesteps
Episode 8 returns 8.64827525163591 after 8 timesteps
Episode 9 returns 13.994164535871148 after 14 timesteps
Episode 10 returns 18.20930624027691 after 19 timesteps
Episode 11 returns 10.466174574128356 after 10 timesteps
Episode 12 returns 20.63857163563444 after 22 timesteps
Episode 13 returns 17.383137616441324 after 18 timesteps
Episode 14 returns 14.854222890512437 after 15 timesteps
Episode 15 returns 16.548623854991238 after 17 timesteps
Episode 16 returns 9.561792499119552 after 9 timesteps
Episode 17 returns 9.5617924991195

Episode 144 returns 19.836941046095397 after 21 timesteps
Episode 145 returns 19.02721317787414 after 20 timesteps
Episode 146 returns 20.63857163563444 after 22 timesteps
Episode 147 returns 19.02721317787414 after 20 timesteps
Episode 148 returns 19.02721317787414 after 20 timesteps
Episode 149 returns 22.995685419484463 after 25 timesteps
Episode 150 returns 17.383137616441324 after 18 timesteps
Episode 151 returns 10.466174574128356 after 10 timesteps
Episode 152 returns 11.361512828387072 after 11 timesteps
Episode 153 returns 32.42709509397165 after 38 timesteps
Episode 154 returns 12.247897700103202 after 12 timesteps
Episode 155 returns 28.94467727277075 after 33 timesteps
Episode 156 returns 10.466174574128356 after 10 timesteps
Episode 157 returns 8.64827525163591 after 8 timesteps
Episode 158 returns 36.38145139361286 after 44 timesteps
Episode 159 returns 18.20930624027691 after 19 timesteps
Episode 160 returns 22.995685419484463 after 25 timesteps
Episode 161 returns 31.05

Episode 287 returns 10.466174574128356 after 10 timesteps
Episode 288 returns 13.12541872310217 after 13 timesteps
Episode 289 returns 22.995685419484463 after 25 timesteps
Episode 290 returns 16.548623854991238 after 17 timesteps
Episode 291 returns 13.994164535871148 after 14 timesteps
Episode 292 returns 15.705680661607312 after 16 timesteps
Episode 293 returns 13.994164535871148 after 14 timesteps
Episode 294 returns 19.02721317787414 after 20 timesteps
Episode 295 returns 18.20930624027691 after 19 timesteps
Episode 296 returns 15.705680661607312 after 16 timesteps
Episode 297 returns 13.994164535871148 after 14 timesteps
Episode 298 returns 12.247897700103202 after 12 timesteps
Episode 299 returns 31.744540498961264 after 37 timesteps
Episode 300 returns 13.12541872310217 after 13 timesteps
Saving at episode 300
Episode 301 returns 10.466174574128356 after 10 timesteps
Episode 302 returns 9.561792499119552 after 9 timesteps
Episode 303 returns 8.64827525163591 after 8 timesteps
E

Episode 429 returns 18.20930624027691 after 19 timesteps
Episode 430 returns 9.561792499119552 after 9 timesteps
Episode 431 returns 26.029962661171947 after 29 timesteps
Episode 432 returns 10.466174574128356 after 10 timesteps
Episode 433 returns 10.466174574128356 after 10 timesteps
Episode 434 returns 19.02721317787414 after 20 timesteps
Episode 435 returns 13.994164535871148 after 14 timesteps
Episode 436 returns 13.994164535871148 after 14 timesteps
Episode 437 returns 13.12541872310217 after 13 timesteps
Episode 438 returns 11.361512828387072 after 11 timesteps
Episode 439 returns 21.432185919278098 after 23 timesteps
Episode 440 returns 13.12541872310217 after 13 timesteps
Episode 441 returns 12.247897700103202 after 12 timesteps
Episode 442 returns 12.247897700103202 after 12 timesteps
Episode 443 returns 10.466174574128356 after 10 timesteps
Episode 444 returns 10.466174574128356 after 10 timesteps
Episode 445 returns 18.20930624027691 after 19 timesteps
Episode 446 returns 1

Episode 572 returns 16.548623854991238 after 17 timesteps
Episode 573 returns 11.361512828387072 after 11 timesteps
Episode 574 returns 13.12541872310217 after 13 timesteps
Episode 575 returns 12.247897700103202 after 12 timesteps
Episode 576 returns 9.561792499119552 after 9 timesteps
Episode 577 returns 21.432185919278098 after 23 timesteps
Episode 578 returns 14.854222890512437 after 15 timesteps
Episode 579 returns 10.466174574128356 after 10 timesteps
Episode 580 returns 13.994164535871148 after 14 timesteps
Episode 581 returns 7.72553055720799 after 7 timesteps
Episode 582 returns 13.12541872310217 after 13 timesteps
Episode 583 returns 8.64827525163591 after 8 timesteps
Episode 584 returns 44.17338614521359 after 57 timesteps
Episode 585 returns 19.02721317787414 after 20 timesteps
Episode 586 returns 16.548623854991238 after 17 timesteps
Episode 587 returns 12.247897700103202 after 12 timesteps
Episode 588 returns 9.561792499119552 after 9 timesteps
Episode 589 returns 11.36151

Episode 715 returns 10.466174574128356 after 10 timesteps
Episode 716 returns 13.12541872310217 after 13 timesteps
Episode 717 returns 8.64827525163591 after 8 timesteps
Episode 718 returns 13.12541872310217 after 13 timesteps
Episode 719 returns 12.247897700103202 after 12 timesteps
Episode 720 returns 9.561792499119552 after 9 timesteps
Episode 721 returns 8.64827525163591 after 8 timesteps
Episode 722 returns 10.466174574128356 after 10 timesteps
Episode 723 returns 9.561792499119552 after 9 timesteps
Episode 724 returns 9.561792499119552 after 9 timesteps
Episode 725 returns 11.361512828387072 after 11 timesteps
Episode 726 returns 21.432185919278098 after 23 timesteps
Episode 727 returns 10.466174574128356 after 10 timesteps
Episode 728 returns 9.561792499119552 after 9 timesteps
Episode 729 returns 13.12541872310217 after 13 timesteps
Episode 730 returns 14.854222890512437 after 15 timesteps
Episode 731 returns 31.055091413092185 after 36 timesteps
Episode 732 returns 14.85422289

Episode 858 returns 10.466174574128356 after 10 timesteps
Episode 859 returns 15.705680661607312 after 16 timesteps
Episode 860 returns 14.854222890512437 after 15 timesteps
Episode 861 returns 8.64827525163591 after 8 timesteps
Episode 862 returns 13.994164535871148 after 14 timesteps
Episode 863 returns 14.854222890512437 after 15 timesteps
Episode 864 returns 10.466174574128356 after 10 timesteps
Episode 865 returns 13.994164535871148 after 14 timesteps
Episode 866 returns 18.20930624027691 after 19 timesteps
Episode 867 returns 10.466174574128356 after 10 timesteps
Episode 868 returns 11.361512828387072 after 11 timesteps
Episode 869 returns 11.361512828387072 after 11 timesteps
Episode 870 returns 19.02721317787414 after 20 timesteps
Episode 871 returns 13.12541872310217 after 13 timesteps
Episode 872 returns 9.561792499119552 after 9 timesteps
Episode 873 returns 7.72553055720799 after 7 timesteps
Episode 874 returns 13.994164535871148 after 14 timesteps
Episode 875 returns 21.43

Episode 0 returns 20.63857163563444 after 22 timesteps
Saving at episode 0
Episode 1 returns 31.055091413092185 after 36 timesteps
Episode 2 returns 22.217864060085315 after 24 timesteps
Episode 3 returns 22.995685419484463 after 25 timesteps
Episode 4 returns 27.501966404214624 after 31 timesteps
Episode 5 returns 20.63857163563444 after 22 timesteps
Episode 6 returns 14.854222890512437 after 15 timesteps
Episode 7 returns 15.705680661607312 after 16 timesteps
Episode 8 returns 10.466174574128356 after 10 timesteps
Episode 9 returns 23.765728565289617 after 26 timesteps
Episode 10 returns 18.20930624027691 after 19 timesteps
Episode 11 returns 13.12541872310217 after 13 timesteps
Episode 12 returns 19.02721317787414 after 20 timesteps
Episode 13 returns 14.854222890512437 after 15 timesteps
Episode 14 returns 12.247897700103202 after 12 timesteps
Episode 15 returns 13.12541872310217 after 13 timesteps
Episode 16 returns 23.765728565289617 after 26 timesteps
Episode 17 returns 22.99568

Episode 144 returns 12.247897700103202 after 12 timesteps
Episode 145 returns 28.226946740172476 after 32 timesteps
Episode 146 returns 19.836941046095397 after 21 timesteps
Episode 147 returns 22.995685419484463 after 25 timesteps
Episode 148 returns 28.226946740172476 after 32 timesteps
Episode 149 returns 12.247897700103202 after 12 timesteps
Episode 150 returns 19.836941046095397 after 21 timesteps
Episode 151 returns 18.20930624027691 after 19 timesteps
Episode 152 returns 13.994164535871148 after 14 timesteps
Episode 153 returns 12.247897700103202 after 12 timesteps
Episode 154 returns 9.561792499119552 after 9 timesteps
Episode 155 returns 16.548623854991238 after 17 timesteps
Episode 156 returns 18.20930624027691 after 19 timesteps
Episode 157 returns 21.432185919278098 after 23 timesteps
Episode 158 returns 49.51141112129299 after 67 timesteps
Episode 159 returns 31.055091413092185 after 36 timesteps
Episode 160 returns 21.432185919278098 after 23 timesteps
Episode 161 returns

Episode 287 returns 27.501966404214624 after 31 timesteps
Episode 288 returns 28.226946740172476 after 32 timesteps
Episode 289 returns 25.28279056684035 after 28 timesteps
Episode 290 returns 22.217864060085315 after 24 timesteps
Episode 291 returns 18.20930624027691 after 19 timesteps
Episode 292 returns 23.765728565289617 after 26 timesteps
Episode 293 returns 22.217864060085315 after 24 timesteps
Episode 294 returns 21.432185919278098 after 23 timesteps
Episode 295 returns 33.10282414303193 after 39 timesteps
Episode 296 returns 32.42709509397165 after 38 timesteps
Episode 297 returns 22.995685419484463 after 25 timesteps
Episode 298 returns 12.247897700103202 after 12 timesteps
Episode 299 returns 16.548623854991238 after 17 timesteps
Episode 300 returns 11.361512828387072 after 11 timesteps
Saving at episode 300
Episode 301 returns 26.029962661171947 after 29 timesteps
Episode 302 returns 13.994164535871148 after 14 timesteps
Episode 303 returns 18.20930624027691 after 19 timeste

Episode 429 returns 13.12541872310217 after 13 timesteps
Episode 430 returns 12.247897700103202 after 12 timesteps
Episode 431 returns 19.02721317787414 after 20 timesteps
Episode 432 returns 14.854222890512437 after 15 timesteps
Episode 433 returns 9.561792499119552 after 9 timesteps
Episode 434 returns 15.705680661607312 after 16 timesteps
Episode 435 returns 16.548623854991238 after 17 timesteps
Episode 436 returns 13.994164535871148 after 14 timesteps
Episode 437 returns 19.02721317787414 after 20 timesteps
Episode 438 returns 13.12541872310217 after 13 timesteps
Episode 439 returns 11.361512828387072 after 11 timesteps
Episode 440 returns 12.247897700103202 after 12 timesteps
Episode 441 returns 11.361512828387072 after 11 timesteps
Episode 442 returns 11.361512828387072 after 11 timesteps
Episode 443 returns 9.561792499119552 after 9 timesteps
Episode 444 returns 12.247897700103202 after 12 timesteps
Episode 445 returns 11.361512828387072 after 11 timesteps
Episode 446 returns 10

Episode 572 returns 9.561792499119552 after 9 timesteps
Episode 573 returns 12.247897700103202 after 12 timesteps
Episode 574 returns 27.501966404214624 after 31 timesteps
Episode 575 returns 13.12541872310217 after 13 timesteps
Episode 576 returns 9.561792499119552 after 9 timesteps
Episode 577 returns 14.854222890512437 after 15 timesteps
Episode 578 returns 15.705680661607312 after 16 timesteps
Episode 579 returns 10.466174574128356 after 10 timesteps
Episode 580 returns 15.705680661607312 after 16 timesteps
Episode 581 returns 33.10282414303193 after 39 timesteps
Episode 582 returns 8.64827525163591 after 8 timesteps
Episode 583 returns 15.705680661607312 after 16 timesteps
Episode 584 returns 22.217864060085315 after 24 timesteps
Episode 585 returns 28.226946740172476 after 32 timesteps
Episode 586 returns 9.561792499119552 after 9 timesteps
Episode 587 returns 17.383137616441324 after 18 timesteps
Episode 588 returns 8.64827525163591 after 8 timesteps
Episode 589 returns 9.561792

Episode 714 returns 12.247897700103202 after 12 timesteps
Episode 715 returns 10.466174574128356 after 10 timesteps
Episode 716 returns 9.561792499119552 after 9 timesteps
Episode 717 returns 9.561792499119552 after 9 timesteps
Episode 718 returns 10.466174574128356 after 10 timesteps
Episode 719 returns 10.466174574128356 after 10 timesteps
Episode 720 returns 16.548623854991238 after 17 timesteps
Episode 721 returns 10.466174574128356 after 10 timesteps
Episode 722 returns 10.466174574128356 after 10 timesteps
Episode 723 returns 8.64827525163591 after 8 timesteps
Episode 724 returns 9.561792499119552 after 9 timesteps
Episode 725 returns 8.64827525163591 after 8 timesteps
Episode 726 returns 17.383137616441324 after 18 timesteps
Episode 727 returns 8.64827525163591 after 8 timesteps
Episode 728 returns 8.64827525163591 after 8 timesteps
Episode 729 returns 12.247897700103202 after 12 timesteps
Episode 730 returns 11.361512828387072 after 11 timesteps
Episode 731 returns 14.854222890

Episode 858 returns 9.561792499119552 after 9 timesteps
Episode 859 returns 14.854222890512437 after 15 timesteps
Episode 860 returns 15.705680661607312 after 16 timesteps
Episode 861 returns 10.466174574128356 after 10 timesteps
Episode 862 returns 13.12541872310217 after 13 timesteps
Episode 863 returns 8.64827525163591 after 8 timesteps
Episode 864 returns 9.561792499119552 after 9 timesteps
Episode 865 returns 13.994164535871148 after 14 timesteps
Episode 866 returns 11.361512828387072 after 11 timesteps
Episode 867 returns 10.466174574128356 after 10 timesteps
Episode 868 returns 20.63857163563444 after 22 timesteps
Episode 869 returns 23.765728565289617 after 26 timesteps
Episode 870 returns 28.226946740172476 after 32 timesteps
Episode 871 returns 13.12541872310217 after 13 timesteps
Episode 872 returns 12.247897700103202 after 12 timesteps
Episode 873 returns 13.994164535871148 after 14 timesteps
Episode 874 returns 13.994164535871148 after 14 timesteps
Episode 875 returns 18.2

Episode 0 returns 17.383137616441324 after 18 timesteps
Saving at episode 0
Episode 1 returns 22.995685419484463 after 25 timesteps
Episode 2 returns 9.561792499119552 after 9 timesteps
Episode 3 returns 14.854222890512437 after 15 timesteps
Episode 4 returns 8.64827525163591 after 8 timesteps
Episode 5 returns 18.20930624027691 after 19 timesteps
Episode 6 returns 8.64827525163591 after 8 timesteps
Episode 7 returns 20.63857163563444 after 22 timesteps
Episode 8 returns 15.705680661607312 after 16 timesteps
Episode 9 returns 16.548623854991238 after 17 timesteps
Episode 10 returns 31.055091413092185 after 36 timesteps
Episode 11 returns 13.994164535871148 after 14 timesteps
Episode 12 returns 19.836941046095397 after 21 timesteps
Episode 13 returns 13.994164535871148 after 14 timesteps
Episode 14 returns 31.744540498961264 after 37 timesteps
Episode 15 returns 17.383137616441324 after 18 timesteps
Episode 16 returns 20.63857163563444 after 22 timesteps
Episode 17 returns 24.5280712796

Episode 144 returns 13.994164535871148 after 14 timesteps
Episode 145 returns 28.94467727277075 after 33 timesteps
Episode 146 returns 24.52807127963672 after 27 timesteps
Episode 147 returns 20.63857163563444 after 22 timesteps
Episode 148 returns 19.836941046095397 after 21 timesteps
Episode 149 returns 15.705680661607312 after 16 timesteps
Episode 150 returns 39.499393286246324 after 49 timesteps
Episode 151 returns 28.226946740172476 after 32 timesteps
Episode 152 returns 12.247897700103202 after 12 timesteps
Episode 153 returns 9.561792499119552 after 9 timesteps
Episode 154 returns 34.434077942585596 after 41 timesteps
Episode 155 returns 27.501966404214624 after 31 timesteps
Episode 156 returns 19.02721317787414 after 20 timesteps
Episode 157 returns 13.994164535871148 after 14 timesteps
Episode 158 returns 21.432185919278098 after 23 timesteps
Episode 159 returns 13.994164535871148 after 14 timesteps
Episode 160 returns 33.10282414303193 after 39 timesteps
Episode 161 returns 1

Episode 286 returns 25.28279056684035 after 28 timesteps
Episode 287 returns 9.561792499119552 after 9 timesteps
Episode 288 returns 19.02721317787414 after 20 timesteps
Episode 289 returns 9.561792499119552 after 9 timesteps
Episode 290 returns 10.466174574128356 after 10 timesteps
Episode 291 returns 14.854222890512437 after 15 timesteps
Episode 292 returns 15.705680661607312 after 16 timesteps
Episode 293 returns 12.247897700103202 after 12 timesteps
Episode 294 returns 16.548623854991238 after 17 timesteps
Episode 295 returns 8.64827525163591 after 8 timesteps
Episode 296 returns 15.705680661607312 after 16 timesteps
Episode 297 returns 10.466174574128356 after 10 timesteps
Episode 298 returns 17.383137616441324 after 18 timesteps
Episode 299 returns 15.705680661607312 after 16 timesteps
Episode 300 returns 19.836941046095397 after 21 timesteps
Saving at episode 300
Episode 301 returns 45.28433576092384 after 59 timesteps
Episode 302 returns 13.12541872310217 after 13 timesteps
Epi

Episode 428 returns 13.994164535871148 after 14 timesteps
Episode 429 returns 7.72553055720799 after 7 timesteps
Episode 430 returns 9.561792499119552 after 9 timesteps
Episode 431 returns 36.38145139361286 after 44 timesteps
Episode 432 returns 27.501966404214624 after 31 timesteps
Episode 433 returns 24.52807127963672 after 27 timesteps
Episode 434 returns 16.548623854991238 after 17 timesteps
Episode 435 returns 22.217864060085315 after 24 timesteps
Episode 436 returns 34.434077942585596 after 41 timesteps
Episode 437 returns 14.854222890512437 after 15 timesteps
Episode 438 returns 11.361512828387072 after 11 timesteps
Episode 439 returns 13.994164535871148 after 14 timesteps
Episode 440 returns 10.466174574128356 after 10 timesteps
Episode 441 returns 13.12541872310217 after 13 timesteps
Episode 442 returns 28.226946740172476 after 32 timesteps
Episode 443 returns 13.12541872310217 after 13 timesteps
Episode 444 returns 16.548623854991238 after 17 timesteps
Episode 445 returns 18.

Episode 571 returns 14.854222890512437 after 15 timesteps
Episode 572 returns 15.705680661607312 after 16 timesteps
Episode 573 returns 13.12541872310217 after 13 timesteps
Episode 574 returns 16.548623854991238 after 17 timesteps
Episode 575 returns 15.705680661607312 after 16 timesteps
Episode 576 returns 18.20930624027691 after 19 timesteps
Episode 577 returns 14.854222890512437 after 15 timesteps
Episode 578 returns 10.466174574128356 after 10 timesteps
Episode 579 returns 12.247897700103202 after 12 timesteps
Episode 580 returns 10.466174574128356 after 10 timesteps
Episode 581 returns 8.64827525163591 after 8 timesteps
Episode 582 returns 10.466174574128356 after 10 timesteps
Episode 583 returns 10.466174574128356 after 10 timesteps
Episode 584 returns 13.994164535871148 after 14 timesteps
Episode 585 returns 9.561792499119552 after 9 timesteps
Episode 586 returns 9.561792499119552 after 9 timesteps
Episode 587 returns 11.361512828387072 after 11 timesteps
Episode 588 returns 9.5

Episode 713 returns 12.247897700103202 after 12 timesteps
Episode 714 returns 14.854222890512437 after 15 timesteps
Episode 715 returns 8.64827525163591 after 8 timesteps
Episode 716 returns 9.561792499119552 after 9 timesteps
Episode 717 returns 14.854222890512437 after 15 timesteps
Episode 718 returns 20.63857163563444 after 22 timesteps
Episode 719 returns 11.361512828387072 after 11 timesteps
Episode 720 returns 9.561792499119552 after 9 timesteps
Episode 721 returns 9.561792499119552 after 9 timesteps
Episode 722 returns 15.705680661607312 after 16 timesteps
Episode 723 returns 13.994164535871148 after 14 timesteps
Episode 724 returns 12.247897700103202 after 12 timesteps
Episode 725 returns 10.466174574128356 after 10 timesteps
Episode 726 returns 12.247897700103202 after 12 timesteps
Episode 727 returns 8.64827525163591 after 8 timesteps
Episode 728 returns 10.466174574128356 after 10 timesteps
Episode 729 returns 13.12541872310217 after 13 timesteps
Episode 730 returns 10.46617

Episode 856 returns 19.02721317787414 after 20 timesteps
Episode 857 returns 16.548623854991238 after 17 timesteps
Episode 858 returns 14.854222890512437 after 15 timesteps
Episode 859 returns 16.548623854991238 after 17 timesteps
Episode 860 returns 11.361512828387072 after 11 timesteps
Episode 861 returns 12.247897700103202 after 12 timesteps
Episode 862 returns 11.361512828387072 after 11 timesteps
Episode 863 returns 20.63857163563444 after 22 timesteps
Episode 864 returns 10.466174574128356 after 10 timesteps
Episode 865 returns 8.64827525163591 after 8 timesteps
Episode 866 returns 14.854222890512437 after 15 timesteps
Episode 867 returns 7.72553055720799 after 7 timesteps
Episode 868 returns 10.466174574128356 after 10 timesteps
Episode 869 returns 13.994164535871148 after 14 timesteps
Episode 870 returns 10.466174574128356 after 10 timesteps
Episode 871 returns 11.361512828387072 after 11 timesteps
Episode 872 returns 8.64827525163591 after 8 timesteps
Episode 873 returns 12.24

[17.383137616441324, 22.995685419484463, 9.561792499119552, 14.854222890512437, 8.64827525163591, 18.20930624027691, 8.64827525163591, 20.63857163563444, 15.705680661607312, 16.548623854991238, 31.055091413092185, 13.994164535871148, 19.836941046095397, 13.994164535871148, 31.744540498961264, 17.383137616441324, 20.63857163563444, 24.52807127963672, 13.994164535871148, 15.705680661607312, 15.705680661607312, 20.63857163563444, 13.994164535871148, 15.705680661607312, 19.836941046095397, 11.361512828387072, 22.217864060085315, 11.361512828387072, 30.35867819504261, 29.655230500043043, 22.217864060085315, 22.995685419484463, 12.247897700103202, 23.765728565289617, 11.361512828387072, 13.12541872310217, 28.226946740172476, 9.561792499119552, 26.769663034560228, 36.38145139361286, 26.029962661171947, 34.434077942585596, 11.361512828387072, 30.35867819504261, 22.217864060085315, 26.769663034560228, 15.705680661607312, 23.765728565289617, 15.705680661607312, 10.466174574128356, 14.85422289051

Episode 0 returns 32.42709509397165 after 38 timesteps
Saving at episode 0
Episode 1 returns 19.836941046095397 after 21 timesteps
Episode 2 returns 16.548623854991238 after 17 timesteps
Episode 3 returns 15.705680661607312 after 16 timesteps
Episode 4 returns 10.466174574128356 after 10 timesteps
Episode 5 returns 12.247897700103202 after 12 timesteps
Episode 6 returns 8.64827525163591 after 8 timesteps
Episode 7 returns 16.548623854991238 after 17 timesteps
Episode 8 returns 25.28279056684035 after 28 timesteps
Episode 9 returns 30.35867819504261 after 35 timesteps
Episode 10 returns 9.561792499119552 after 9 timesteps
Episode 11 returns 11.361512828387072 after 11 timesteps
Episode 12 returns 35.73883979152814 after 43 timesteps
Episode 13 returns 13.12541872310217 after 13 timesteps
Episode 14 returns 11.361512828387072 after 11 timesteps
Episode 15 returns 31.055091413092185 after 36 timesteps
Episode 16 returns 11.361512828387072 after 11 timesteps
Episode 17 returns 21.432185919

Episode 144 returns 40.70335535985002 after 51 timesteps
Episode 145 returns 22.995685419484463 after 25 timesteps
Episode 146 returns 15.705680661607312 after 16 timesteps
Episode 147 returns 17.383137616441324 after 18 timesteps
Episode 148 returns 14.854222890512437 after 15 timesteps
Episode 149 returns 17.383137616441324 after 18 timesteps
Episode 150 returns 19.02721317787414 after 20 timesteps
Episode 151 returns 19.836941046095397 after 21 timesteps
Episode 152 returns 19.02721317787414 after 20 timesteps
Episode 153 returns 10.466174574128356 after 10 timesteps
Episode 154 returns 21.432185919278098 after 23 timesteps
Episode 155 returns 31.055091413092185 after 36 timesteps
Episode 156 returns 16.548623854991238 after 17 timesteps
Episode 157 returns 22.217864060085315 after 24 timesteps
Episode 158 returns 11.361512828387072 after 11 timesteps
Episode 159 returns 11.361512828387072 after 11 timesteps
Episode 160 returns 13.12541872310217 after 13 timesteps
Episode 161 return

Episode 286 returns 13.994164535871148 after 14 timesteps
Episode 287 returns 12.247897700103202 after 12 timesteps
Episode 288 returns 11.361512828387072 after 11 timesteps
Episode 289 returns 14.854222890512437 after 15 timesteps
Episode 290 returns 30.35867819504261 after 35 timesteps
Episode 291 returns 38.27098590577117 after 47 timesteps
Episode 292 returns 12.247897700103202 after 12 timesteps
Episode 293 returns 23.765728565289617 after 26 timesteps
Episode 294 returns 20.63857163563444 after 22 timesteps
Episode 295 returns 15.705680661607312 after 16 timesteps
Episode 296 returns 15.705680661607312 after 16 timesteps
Episode 297 returns 15.705680661607312 after 16 timesteps
Episode 298 returns 23.765728565289617 after 26 timesteps
Episode 299 returns 17.383137616441324 after 18 timesteps
Episode 300 returns 20.63857163563444 after 22 timesteps
Saving at episode 300
Episode 301 returns 13.994164535871148 after 14 timesteps
Episode 302 returns 18.20930624027691 after 19 timeste

Episode 428 returns 15.705680661607312 after 16 timesteps
Episode 429 returns 13.994164535871148 after 14 timesteps
Episode 430 returns 9.561792499119552 after 9 timesteps
Episode 431 returns 22.995685419484463 after 25 timesteps
Episode 432 returns 17.383137616441324 after 18 timesteps
Episode 433 returns 8.64827525163591 after 8 timesteps
Episode 434 returns 13.994164535871148 after 14 timesteps
Episode 435 returns 19.836941046095397 after 21 timesteps
Episode 436 returns 32.42709509397165 after 38 timesteps
Episode 437 returns 12.247897700103202 after 12 timesteps
Episode 438 returns 23.765728565289617 after 26 timesteps
Episode 439 returns 9.561792499119552 after 9 timesteps
Episode 440 returns 13.994164535871148 after 14 timesteps
Episode 441 returns 16.548623854991238 after 17 timesteps
Episode 442 returns 17.383137616441324 after 18 timesteps
Episode 443 returns 13.12541872310217 after 13 timesteps
Episode 444 returns 24.52807127963672 after 27 timesteps
Episode 445 returns 15.7

Episode 571 returns 32.42709509397165 after 38 timesteps
Episode 572 returns 19.02721317787414 after 20 timesteps
Episode 573 returns 13.12541872310217 after 13 timesteps
Episode 574 returns 11.361512828387072 after 11 timesteps
Episode 575 returns 19.836941046095397 after 21 timesteps
Episode 576 returns 35.73883979152814 after 43 timesteps
Episode 577 returns 20.63857163563444 after 22 timesteps
Episode 578 returns 20.63857163563444 after 22 timesteps
Episode 579 returns 13.994164535871148 after 14 timesteps
Episode 580 returns 13.994164535871148 after 14 timesteps
Episode 581 returns 19.836941046095397 after 21 timesteps
Episode 582 returns 9.561792499119552 after 9 timesteps
Episode 583 returns 10.466174574128356 after 10 timesteps
Episode 584 returns 13.12541872310217 after 13 timesteps
Episode 585 returns 15.705680661607312 after 16 timesteps
Episode 586 returns 12.247897700103202 after 12 timesteps
Episode 587 returns 35.73883979152814 after 43 timesteps
Episode 588 returns 39.4

Episode 714 returns 11.361512828387072 after 11 timesteps
Episode 715 returns 11.361512828387072 after 11 timesteps
Episode 716 returns 12.247897700103202 after 12 timesteps
Episode 717 returns 8.64827525163591 after 8 timesteps
Episode 718 returns 9.561792499119552 after 9 timesteps
Episode 719 returns 9.561792499119552 after 9 timesteps
Episode 720 returns 12.247897700103202 after 12 timesteps
Episode 721 returns 8.64827525163591 after 8 timesteps
Episode 722 returns 12.247897700103202 after 12 timesteps
Episode 723 returns 19.02721317787414 after 20 timesteps
Episode 724 returns 9.561792499119552 after 9 timesteps
Episode 725 returns 13.994164535871148 after 14 timesteps
Episode 726 returns 8.64827525163591 after 8 timesteps
Episode 727 returns 13.994164535871148 after 14 timesteps
Episode 728 returns 10.466174574128356 after 10 timesteps
Episode 729 returns 15.705680661607312 after 16 timesteps
Episode 730 returns 9.561792499119552 after 9 timesteps
Episode 731 returns 8.6482752516

Episode 857 returns 9.561792499119552 after 9 timesteps
Episode 858 returns 9.561792499119552 after 9 timesteps
Episode 859 returns 15.705680661607312 after 16 timesteps
Episode 860 returns 12.247897700103202 after 12 timesteps
Episode 861 returns 11.361512828387072 after 11 timesteps
Episode 862 returns 23.765728565289617 after 26 timesteps
Episode 863 returns 13.994164535871148 after 14 timesteps
Episode 864 returns 22.217864060085315 after 24 timesteps
Episode 865 returns 14.854222890512437 after 15 timesteps
Episode 866 returns 13.12541872310217 after 13 timesteps
Episode 867 returns 14.854222890512437 after 15 timesteps
Episode 868 returns 9.561792499119552 after 9 timesteps
Episode 869 returns 10.466174574128356 after 10 timesteps
Episode 870 returns 10.466174574128356 after 10 timesteps
Episode 871 returns 24.52807127963672 after 27 timesteps
Episode 872 returns 11.361512828387072 after 11 timesteps
Episode 873 returns 12.247897700103202 after 12 timesteps
Episode 874 returns 12

Episode 0 returns 18.20930624027691 after 19 timesteps
Saving at episode 0
Episode 1 returns 10.466174574128356 after 10 timesteps
Episode 2 returns 22.995685419484463 after 25 timesteps
Episode 3 returns 17.383137616441324 after 18 timesteps
Episode 4 returns 9.561792499119552 after 9 timesteps
Episode 5 returns 12.247897700103202 after 12 timesteps
Episode 6 returns 29.655230500043043 after 34 timesteps
Episode 7 returns 17.383137616441324 after 18 timesteps
Episode 8 returns 12.247897700103202 after 12 timesteps
Episode 9 returns 10.466174574128356 after 10 timesteps
Episode 10 returns 31.055091413092185 after 36 timesteps
Episode 11 returns 9.561792499119552 after 9 timesteps
Episode 12 returns 13.12541872310217 after 13 timesteps
Episode 13 returns 26.029962661171947 after 29 timesteps
Episode 14 returns 26.029962661171947 after 29 timesteps
Episode 15 returns 14.854222890512437 after 15 timesteps
Episode 16 returns 29.655230500043043 after 34 timesteps
Episode 17 returns 22.99568

Episode 144 returns 22.217864060085315 after 24 timesteps
Episode 145 returns 32.42709509397165 after 38 timesteps
Episode 146 returns 17.383137616441324 after 18 timesteps
Episode 147 returns 22.217864060085315 after 24 timesteps
Episode 148 returns 10.466174574128356 after 10 timesteps
Episode 149 returns 13.994164535871148 after 14 timesteps
Episode 150 returns 8.64827525163591 after 8 timesteps
Episode 151 returns 21.432185919278098 after 23 timesteps
Episode 152 returns 9.561792499119552 after 9 timesteps
Episode 153 returns 8.64827525163591 after 8 timesteps
Episode 154 returns 49.001425375043425 after 66 timesteps
Episode 155 returns 11.361512828387072 after 11 timesteps
Episode 156 returns 22.217864060085315 after 24 timesteps
Episode 157 returns 32.42709509397165 after 38 timesteps
Episode 158 returns 22.995685419484463 after 25 timesteps
Episode 159 returns 31.055091413092185 after 36 timesteps
Episode 160 returns 19.836941046095397 after 21 timesteps
Episode 161 returns 27.5

Episode 286 returns 11.361512828387072 after 11 timesteps
Episode 287 returns 24.52807127963672 after 27 timesteps
Episode 288 returns 23.765728565289617 after 26 timesteps
Episode 289 returns 38.88827604671346 after 48 timesteps
Episode 290 returns 24.52807127963672 after 27 timesteps
Episode 291 returns 22.995685419484463 after 25 timesteps
Episode 292 returns 14.854222890512437 after 15 timesteps
Episode 293 returns 27.501966404214624 after 31 timesteps
Episode 294 returns 13.12541872310217 after 13 timesteps
Episode 295 returns 44.731652283761456 after 58 timesteps
Episode 296 returns 26.029962661171947 after 29 timesteps
Episode 297 returns 26.769663034560228 after 30 timesteps
Episode 298 returns 35.73883979152814 after 43 timesteps
Episode 299 returns 15.705680661607312 after 16 timesteps
Episode 300 returns 49.001425375043425 after 66 timesteps
Saving at episode 300
Episode 301 returns 13.12541872310217 after 13 timesteps
Episode 302 returns 23.765728565289617 after 26 timestep

Episode 428 returns 12.247897700103202 after 12 timesteps
Episode 429 returns 12.247897700103202 after 12 timesteps
Episode 430 returns 12.247897700103202 after 12 timesteps
Episode 431 returns 9.561792499119552 after 9 timesteps
Episode 432 returns 13.12541872310217 after 13 timesteps
Episode 433 returns 26.029962661171947 after 29 timesteps
Episode 434 returns 12.247897700103202 after 12 timesteps
Episode 435 returns 8.64827525163591 after 8 timesteps
Episode 436 returns 20.63857163563444 after 22 timesteps
Episode 437 returns 19.836941046095397 after 21 timesteps
Episode 438 returns 11.361512828387072 after 11 timesteps
Episode 439 returns 18.20930624027691 after 19 timesteps
Episode 440 returns 14.854222890512437 after 15 timesteps
Episode 441 returns 16.548623854991238 after 17 timesteps
Episode 442 returns 18.20930624027691 after 19 timesteps
Episode 443 returns 28.94467727277075 after 33 timesteps
Episode 444 returns 10.466174574128356 after 10 timesteps
Episode 445 returns 13.9

Episode 571 returns 7.72553055720799 after 7 timesteps
Episode 572 returns 12.247897700103202 after 12 timesteps
Episode 573 returns 15.705680661607312 after 16 timesteps
Episode 574 returns 13.994164535871148 after 14 timesteps
Episode 575 returns 31.744540498961264 after 37 timesteps
Episode 576 returns 11.361512828387072 after 11 timesteps
Episode 577 returns 9.561792499119552 after 9 timesteps
Episode 578 returns 11.361512828387072 after 11 timesteps
Episode 579 returns 12.247897700103202 after 12 timesteps
Episode 580 returns 12.247897700103202 after 12 timesteps
Episode 581 returns 11.361512828387072 after 11 timesteps
Episode 582 returns 25.28279056684035 after 28 timesteps
Episode 583 returns 13.994164535871148 after 14 timesteps
Episode 584 returns 11.361512828387072 after 11 timesteps
Episode 585 returns 27.501966404214624 after 31 timesteps
Episode 586 returns 22.217864060085315 after 24 timesteps
Episode 587 returns 28.94467727277075 after 33 timesteps
Episode 588 returns 2

Episode 713 returns 12.247897700103202 after 12 timesteps
Episode 714 returns 8.64827525163591 after 8 timesteps
Episode 715 returns 13.12541872310217 after 13 timesteps
Episode 716 returns 8.64827525163591 after 8 timesteps
Episode 717 returns 7.72553055720799 after 7 timesteps
Episode 718 returns 12.247897700103202 after 12 timesteps
Episode 719 returns 15.705680661607312 after 16 timesteps
Episode 720 returns 11.361512828387072 after 11 timesteps
Episode 721 returns 9.561792499119552 after 9 timesteps
Episode 722 returns 13.12541872310217 after 13 timesteps
Episode 723 returns 11.361512828387072 after 11 timesteps
Episode 724 returns 9.561792499119552 after 9 timesteps
Episode 725 returns 8.64827525163591 after 8 timesteps
Episode 726 returns 15.705680661607312 after 16 timesteps
Episode 727 returns 11.361512828387072 after 11 timesteps
Episode 728 returns 9.561792499119552 after 9 timesteps
Episode 729 returns 31.055091413092185 after 36 timesteps
Episode 730 returns 13.99416453587

Episode 856 returns 24.52807127963672 after 27 timesteps
Episode 857 returns 21.432185919278098 after 23 timesteps
Episode 858 returns 11.361512828387072 after 11 timesteps
Episode 859 returns 8.64827525163591 after 8 timesteps
Episode 860 returns 13.12541872310217 after 13 timesteps
Episode 861 returns 18.20930624027691 after 19 timesteps
Episode 862 returns 11.361512828387072 after 11 timesteps
Episode 863 returns 9.561792499119552 after 9 timesteps
Episode 864 returns 11.361512828387072 after 11 timesteps
Episode 865 returns 10.466174574128356 after 10 timesteps
Episode 866 returns 8.64827525163591 after 8 timesteps
Episode 867 returns 13.994164535871148 after 14 timesteps
Episode 868 returns 16.548623854991238 after 17 timesteps
Episode 869 returns 14.854222890512437 after 15 timesteps
Episode 870 returns 22.995685419484463 after 25 timesteps
Episode 871 returns 8.64827525163591 after 8 timesteps
Episode 872 returns 8.64827525163591 after 8 timesteps
Episode 873 returns 8.648275251

Episode 999 returns 28.94467727277075 after 33 timesteps
[18.20930624027691, 10.466174574128356, 22.995685419484463, 17.383137616441324, 9.561792499119552, 12.247897700103202, 29.655230500043043, 17.383137616441324, 12.247897700103202, 10.466174574128356, 31.055091413092185, 9.561792499119552, 13.12541872310217, 26.029962661171947, 26.029962661171947, 14.854222890512437, 29.655230500043043, 22.995685419484463, 13.12541872310217, 12.247897700103202, 19.02721317787414, 18.20930624027691, 24.52807127963672, 28.226946740172476, 8.64827525163591, 19.836941046095397, 20.63857163563444, 9.561792499119552, 35.08973716315974, 17.383137616441324, 31.744540498961264, 9.561792499119552, 15.705680661607312, 13.12541872310217, 28.226946740172476, 13.12541872310217, 26.769663034560228, 17.383137616441324, 12.247897700103202, 10.466174574128356, 16.548623854991238, 38.88827604671346, 17.383137616441324, 28.94467727277075, 13.12541872310217, 16.548623854991238, 27.501966404214624, 22.217864060085315, 9

Episode 0 returns 16.548623854991238 after 17 timesteps
Saving at episode 0
Episode 1 returns 21.432185919278098 after 23 timesteps
Episode 2 returns 14.854222890512437 after 15 timesteps
Episode 3 returns 15.705680661607312 after 16 timesteps
Episode 4 returns 28.94467727277075 after 33 timesteps
Episode 5 returns 21.432185919278098 after 23 timesteps
Episode 6 returns 10.466174574128356 after 10 timesteps
Episode 7 returns 26.769663034560228 after 30 timesteps
Episode 8 returns 17.383137616441324 after 18 timesteps
Episode 9 returns 13.994164535871148 after 14 timesteps
Episode 10 returns 16.548623854991238 after 17 timesteps
Episode 11 returns 22.995685419484463 after 25 timesteps
Episode 12 returns 10.466174574128356 after 10 timesteps
Episode 13 returns 21.432185919278098 after 23 timesteps
Episode 14 returns 32.42709509397165 after 38 timesteps
Episode 15 returns 13.12541872310217 after 13 timesteps
Episode 16 returns 31.744540498961264 after 37 timesteps
Episode 17 returns 35.08

Episode 144 returns 20.63857163563444 after 22 timesteps
Episode 145 returns 13.12541872310217 after 13 timesteps
Episode 146 returns 9.561792499119552 after 9 timesteps
Episode 147 returns 14.854222890512437 after 15 timesteps
Episode 148 returns 15.705680661607312 after 16 timesteps
Episode 149 returns 13.994164535871148 after 14 timesteps
Episode 150 returns 10.466174574128356 after 10 timesteps
Episode 151 returns 11.361512828387072 after 11 timesteps
Episode 152 returns 22.995685419484463 after 25 timesteps
Episode 153 returns 20.63857163563444 after 22 timesteps
Episode 154 returns 18.20930624027691 after 19 timesteps
Episode 155 returns 41.29632180625152 after 52 timesteps
Episode 156 returns 22.995685419484463 after 25 timesteps
Episode 157 returns 15.705680661607312 after 16 timesteps
Episode 158 returns 14.854222890512437 after 15 timesteps
Episode 159 returns 35.73883979152814 after 43 timesteps
Episode 160 returns 26.029962661171947 after 29 timesteps
Episode 161 returns 20

Episode 287 returns 16.548623854991238 after 17 timesteps
Episode 288 returns 17.383137616441324 after 18 timesteps
Episode 289 returns 13.994164535871148 after 14 timesteps
Episode 290 returns 22.217864060085315 after 24 timesteps
Episode 291 returns 38.88827604671346 after 48 timesteps
Episode 292 returns 14.854222890512437 after 15 timesteps
Episode 293 returns 22.217864060085315 after 24 timesteps
Episode 294 returns 17.383137616441324 after 18 timesteps
Episode 295 returns 15.705680661607312 after 16 timesteps
Episode 296 returns 18.20930624027691 after 19 timesteps
Episode 297 returns 39.499393286246324 after 49 timesteps
Episode 298 returns 19.836941046095397 after 21 timesteps
Episode 299 returns 12.247897700103202 after 12 timesteps
Episode 300 returns 11.361512828387072 after 11 timesteps
Saving at episode 300
Episode 301 returns 13.994164535871148 after 14 timesteps
Episode 302 returns 26.769663034560228 after 30 timesteps
Episode 303 returns 14.854222890512437 after 15 time

Episode 429 returns 42.46452500230711 after 54 timesteps
Episode 430 returns 27.501966404214624 after 31 timesteps
Episode 431 returns 18.20930624027691 after 19 timesteps
Episode 432 returns 19.02721317787414 after 20 timesteps
Episode 433 returns 15.705680661607312 after 16 timesteps
Episode 434 returns 22.217864060085315 after 24 timesteps
Episode 435 returns 14.854222890512437 after 15 timesteps
Episode 436 returns 14.854222890512437 after 15 timesteps
Episode 437 returns 13.994164535871148 after 14 timesteps
Episode 438 returns 13.12541872310217 after 13 timesteps
Episode 439 returns 33.771795901601614 after 40 timesteps
Episode 440 returns 8.64827525163591 after 8 timesteps
Episode 441 returns 26.029962661171947 after 29 timesteps
Episode 442 returns 13.994164535871148 after 14 timesteps
Episode 443 returns 13.994164535871148 after 14 timesteps
Episode 444 returns 11.361512828387072 after 11 timesteps
Episode 445 returns 13.994164535871148 after 14 timesteps
Episode 446 returns 1

Episode 572 returns 13.12541872310217 after 13 timesteps
Episode 573 returns 12.247897700103202 after 12 timesteps
Episode 574 returns 22.995685419484463 after 25 timesteps
Episode 575 returns 14.854222890512437 after 15 timesteps
Episode 576 returns 13.12541872310217 after 13 timesteps
Episode 577 returns 13.994164535871148 after 14 timesteps
Episode 578 returns 14.854222890512437 after 15 timesteps
Episode 579 returns 10.466174574128356 after 10 timesteps
Episode 580 returns 9.561792499119552 after 9 timesteps
Episode 581 returns 10.466174574128356 after 10 timesteps
Episode 582 returns 14.854222890512437 after 15 timesteps
Episode 583 returns 16.548623854991238 after 17 timesteps
Episode 584 returns 25.28279056684035 after 28 timesteps
Episode 585 returns 13.12541872310217 after 13 timesteps
Episode 586 returns 16.548623854991238 after 17 timesteps
Episode 587 returns 13.994164535871148 after 14 timesteps
Episode 588 returns 10.466174574128356 after 10 timesteps
Episode 589 returns 

Episode 715 returns 16.548623854991238 after 17 timesteps
Episode 716 returns 15.705680661607312 after 16 timesteps
Episode 717 returns 18.20930624027691 after 19 timesteps
Episode 718 returns 14.854222890512437 after 15 timesteps
Episode 719 returns 13.12541872310217 after 13 timesteps
Episode 720 returns 12.247897700103202 after 12 timesteps
Episode 721 returns 18.20930624027691 after 19 timesteps
Episode 722 returns 23.765728565289617 after 26 timesteps
Episode 723 returns 9.561792499119552 after 9 timesteps
Episode 724 returns 14.854222890512437 after 15 timesteps
Episode 725 returns 13.12541872310217 after 13 timesteps
Episode 726 returns 15.705680661607312 after 16 timesteps
Episode 727 returns 18.20930624027691 after 19 timesteps
Episode 728 returns 10.466174574128356 after 10 timesteps
Episode 729 returns 9.561792499119552 after 9 timesteps
Episode 730 returns 12.247897700103202 after 12 timesteps
Episode 731 returns 9.561792499119552 after 9 timesteps
Episode 732 returns 9.561

Episode 858 returns 10.466174574128356 after 10 timesteps
Episode 859 returns 16.548623854991238 after 17 timesteps
Episode 860 returns 12.247897700103202 after 12 timesteps
Episode 861 returns 17.383137616441324 after 18 timesteps
Episode 862 returns 13.994164535871148 after 14 timesteps
Episode 863 returns 11.361512828387072 after 11 timesteps
Episode 864 returns 21.432185919278098 after 23 timesteps
Episode 865 returns 13.12541872310217 after 13 timesteps
Episode 866 returns 10.466174574128356 after 10 timesteps
Episode 867 returns 22.995685419484463 after 25 timesteps
Episode 868 returns 38.88827604671346 after 48 timesteps
Episode 869 returns 10.466174574128356 after 10 timesteps
Episode 870 returns 18.20930624027691 after 19 timesteps
Episode 871 returns 15.705680661607312 after 16 timesteps
Episode 872 returns 13.994164535871148 after 14 timesteps
Episode 873 returns 12.247897700103202 after 12 timesteps
Episode 874 returns 12.247897700103202 after 12 timesteps
Episode 875 retur

Episode 0 returns 17.383137616441324 after 18 timesteps
Saving at episode 0
Episode 1 returns 18.20930624027691 after 19 timesteps
Episode 2 returns 9.561792499119552 after 9 timesteps
Episode 3 returns 13.12541872310217 after 13 timesteps
Episode 4 returns 13.994164535871148 after 14 timesteps
Episode 5 returns 24.52807127963672 after 27 timesteps
Episode 6 returns 17.383137616441324 after 18 timesteps
Episode 7 returns 15.705680661607312 after 16 timesteps
Episode 8 returns 15.705680661607312 after 16 timesteps
Episode 9 returns 32.42709509397165 after 38 timesteps
Episode 10 returns 19.836941046095397 after 21 timesteps
Episode 11 returns 14.854222890512437 after 15 timesteps
Episode 12 returns 25.28279056684035 after 28 timesteps
Episode 13 returns 29.655230500043043 after 34 timesteps
Episode 14 returns 17.383137616441324 after 18 timesteps
Episode 15 returns 28.226946740172476 after 32 timesteps
Episode 16 returns 17.383137616441324 after 18 timesteps
Episode 17 returns 32.427095

Episode 144 returns 13.994164535871148 after 14 timesteps
Episode 145 returns 11.361512828387072 after 11 timesteps
Episode 146 returns 39.499393286246324 after 49 timesteps
Episode 147 returns 13.12541872310217 after 13 timesteps
Episode 148 returns 37.017636879676736 after 45 timesteps
Episode 149 returns 16.548623854991238 after 17 timesteps
Episode 150 returns 31.055091413092185 after 36 timesteps
Episode 151 returns 14.854222890512437 after 15 timesteps
Episode 152 returns 16.548623854991238 after 17 timesteps
Episode 153 returns 22.217864060085315 after 24 timesteps
Episode 154 returns 12.247897700103202 after 12 timesteps
Episode 155 returns 17.383137616441324 after 18 timesteps
Episode 156 returns 22.217864060085315 after 24 timesteps
Episode 157 returns 28.226946740172476 after 32 timesteps
Episode 158 returns 11.361512828387072 after 11 timesteps
Episode 159 returns 13.994164535871148 after 14 timesteps
Episode 160 returns 18.20930624027691 after 19 timesteps
Episode 161 retu

Episode 286 returns 18.20930624027691 after 19 timesteps
Episode 287 returns 28.226946740172476 after 32 timesteps
Episode 288 returns 9.561792499119552 after 9 timesteps
Episode 289 returns 15.705680661607312 after 16 timesteps
Episode 290 returns 23.765728565289617 after 26 timesteps
Episode 291 returns 12.247897700103202 after 12 timesteps
Episode 292 returns 16.548623854991238 after 17 timesteps
Episode 293 returns 20.63857163563444 after 22 timesteps
Episode 294 returns 15.705680661607312 after 16 timesteps
Episode 295 returns 14.854222890512437 after 15 timesteps
Episode 296 returns 17.383137616441324 after 18 timesteps
Episode 297 returns 18.20930624027691 after 19 timesteps
Episode 298 returns 14.854222890512437 after 15 timesteps
Episode 299 returns 13.994164535871148 after 14 timesteps
Episode 300 returns 13.12541872310217 after 13 timesteps
Saving at episode 300
Episode 301 returns 31.055091413092185 after 36 timesteps
Episode 302 returns 15.705680661607312 after 16 timestep

Episode 428 returns 13.994164535871148 after 14 timesteps
Episode 429 returns 19.02721317787414 after 20 timesteps
Episode 430 returns 16.548623854991238 after 17 timesteps
Episode 431 returns 8.64827525163591 after 8 timesteps
Episode 432 returns 10.466174574128356 after 10 timesteps
Episode 433 returns 13.994164535871148 after 14 timesteps
Episode 434 returns 9.561792499119552 after 9 timesteps
Episode 435 returns 10.466174574128356 after 10 timesteps
Episode 436 returns 20.63857163563444 after 22 timesteps
Episode 437 returns 16.548623854991238 after 17 timesteps
Episode 438 returns 13.12541872310217 after 13 timesteps
Episode 439 returns 13.994164535871148 after 14 timesteps
Episode 440 returns 24.52807127963672 after 27 timesteps
Episode 441 returns 22.995685419484463 after 25 timesteps
Episode 442 returns 19.02721317787414 after 20 timesteps
Episode 443 returns 13.12541872310217 after 13 timesteps
Episode 444 returns 13.12541872310217 after 13 timesteps
Episode 445 returns 13.994

Episode 571 returns 10.466174574128356 after 10 timesteps
Episode 572 returns 7.72553055720799 after 7 timesteps
Episode 573 returns 9.561792499119552 after 9 timesteps
Episode 574 returns 25.28279056684035 after 28 timesteps
Episode 575 returns 8.64827525163591 after 8 timesteps
Episode 576 returns 8.64827525163591 after 8 timesteps
Episode 577 returns 16.548623854991238 after 17 timesteps
Episode 578 returns 13.12541872310217 after 13 timesteps
Episode 579 returns 14.854222890512437 after 15 timesteps
Episode 580 returns 38.27098590577117 after 47 timesteps
Episode 581 returns 10.466174574128356 after 10 timesteps
Episode 582 returns 13.994164535871148 after 14 timesteps
Episode 583 returns 11.361512828387072 after 11 timesteps
Episode 584 returns 9.561792499119552 after 9 timesteps
Episode 585 returns 11.361512828387072 after 11 timesteps
Episode 586 returns 20.63857163563444 after 22 timesteps
Episode 587 returns 13.12541872310217 after 13 timesteps
Episode 588 returns 19.836941046

Episode 714 returns 9.561792499119552 after 9 timesteps
Episode 715 returns 8.64827525163591 after 8 timesteps
Episode 716 returns 11.361512828387072 after 11 timesteps
Episode 717 returns 10.466174574128356 after 10 timesteps
Episode 718 returns 11.361512828387072 after 11 timesteps
Episode 719 returns 13.12541872310217 after 13 timesteps
Episode 720 returns 13.12541872310217 after 13 timesteps
Episode 721 returns 10.466174574128356 after 10 timesteps
Episode 722 returns 12.247897700103202 after 12 timesteps
Episode 723 returns 21.432185919278098 after 23 timesteps
Episode 724 returns 9.561792499119552 after 9 timesteps
Episode 725 returns 9.561792499119552 after 9 timesteps
Episode 726 returns 8.64827525163591 after 8 timesteps
Episode 727 returns 13.12541872310217 after 13 timesteps
Episode 728 returns 7.72553055720799 after 7 timesteps
Episode 729 returns 10.466174574128356 after 10 timesteps
Episode 730 returns 21.432185919278098 after 23 timesteps
Episode 731 returns 14.854222890

Episode 857 returns 10.466174574128356 after 10 timesteps
Episode 858 returns 15.705680661607312 after 16 timesteps
Episode 859 returns 12.247897700103202 after 12 timesteps
Episode 860 returns 18.20930624027691 after 19 timesteps
Episode 861 returns 15.705680661607312 after 16 timesteps
Episode 862 returns 10.466174574128356 after 10 timesteps
Episode 863 returns 9.561792499119552 after 9 timesteps
Episode 864 returns 10.466174574128356 after 10 timesteps
Episode 865 returns 7.72553055720799 after 7 timesteps
Episode 866 returns 13.994164535871148 after 14 timesteps
Episode 867 returns 13.12541872310217 after 13 timesteps
Episode 868 returns 15.705680661607312 after 16 timesteps
Episode 869 returns 16.548623854991238 after 17 timesteps
Episode 870 returns 12.247897700103202 after 12 timesteps
Episode 871 returns 10.466174574128356 after 10 timesteps
Episode 872 returns 13.12541872310217 after 13 timesteps
Episode 873 returns 13.12541872310217 after 13 timesteps
Episode 874 returns 9.5

[17.383137616441324, 18.20930624027691, 9.561792499119552, 13.12541872310217, 13.994164535871148, 24.52807127963672, 17.383137616441324, 15.705680661607312, 15.705680661607312, 32.42709509397165, 19.836941046095397, 14.854222890512437, 25.28279056684035, 29.655230500043043, 17.383137616441324, 28.226946740172476, 17.383137616441324, 32.42709509397165, 13.994164535871148, 22.217864060085315, 16.548623854991238, 15.705680661607312, 10.466174574128356, 22.217864060085315, 19.02721317787414, 15.705680661607312, 16.548623854991238, 13.12541872310217, 13.12541872310217, 12.247897700103202, 18.20930624027691, 14.854222890512437, 13.12541872310217, 33.10282414303193, 10.466174574128356, 19.02721317787414, 50.01629701008006, 9.561792499119552, 12.247897700103202, 13.12541872310217, 16.548623854991238, 13.12541872310217, 28.226946740172476, 12.247897700103202, 20.63857163563444, 13.994164535871148, 26.769663034560228, 15.705680661607312, 16.548623854991238, 20.63857163563444, 9.561792499119552, 

Episode 0 returns 15.705680661607312 after 16 timesteps
Saving at episode 0
Episode 1 returns 25.28279056684035 after 28 timesteps
Episode 2 returns 13.994164535871148 after 14 timesteps
Episode 3 returns 11.361512828387072 after 11 timesteps
Episode 4 returns 13.12541872310217 after 13 timesteps
Episode 5 returns 28.226946740172476 after 32 timesteps
Episode 6 returns 26.769663034560228 after 30 timesteps
Episode 7 returns 28.94467727277075 after 33 timesteps
Episode 8 returns 19.02721317787414 after 20 timesteps
Episode 9 returns 23.765728565289617 after 26 timesteps
Episode 10 returns 11.361512828387072 after 11 timesteps
Episode 11 returns 13.12541872310217 after 13 timesteps
Episode 12 returns 8.64827525163591 after 8 timesteps
Episode 13 returns 12.247897700103202 after 12 timesteps
Episode 14 returns 9.561792499119552 after 9 timesteps
Episode 15 returns 14.854222890512437 after 15 timesteps
Episode 16 returns 44.731652283761456 after 58 timesteps
Episode 17 returns 37.017636879

Episode 144 returns 11.361512828387072 after 11 timesteps
Episode 145 returns 11.361512828387072 after 11 timesteps
Episode 146 returns 13.994164535871148 after 14 timesteps
Episode 147 returns 17.383137616441324 after 18 timesteps
Episode 148 returns 19.02721317787414 after 20 timesteps
Episode 149 returns 31.744540498961264 after 37 timesteps
Episode 150 returns 30.35867819504261 after 35 timesteps
Episode 151 returns 14.854222890512437 after 15 timesteps
Episode 152 returns 26.769663034560228 after 30 timesteps
Episode 153 returns 22.995685419484463 after 25 timesteps
Episode 154 returns 12.247897700103202 after 12 timesteps
Episode 155 returns 19.02721317787414 after 20 timesteps
Episode 156 returns 16.548623854991238 after 17 timesteps
Episode 157 returns 16.548623854991238 after 17 timesteps
Episode 158 returns 34.434077942585596 after 41 timesteps
Episode 159 returns 17.383137616441324 after 18 timesteps
Episode 160 returns 19.836941046095397 after 21 timesteps
Episode 161 retur

Episode 286 returns 34.434077942585596 after 41 timesteps
Episode 287 returns 30.35867819504261 after 35 timesteps
Episode 288 returns 16.548623854991238 after 17 timesteps
Episode 289 returns 19.02721317787414 after 20 timesteps
Episode 290 returns 17.383137616441324 after 18 timesteps
Episode 291 returns 12.247897700103202 after 12 timesteps
Episode 292 returns 21.432185919278098 after 23 timesteps
Episode 293 returns 23.765728565289617 after 26 timesteps
Episode 294 returns 15.705680661607312 after 16 timesteps
Episode 295 returns 11.361512828387072 after 11 timesteps
Episode 296 returns 20.63857163563444 after 22 timesteps
Episode 297 returns 19.02721317787414 after 20 timesteps
Episode 298 returns 37.017636879676736 after 45 timesteps
Episode 299 returns 21.432185919278098 after 23 timesteps
Episode 300 returns 42.46452500230711 after 54 timesteps
Saving at episode 300
Episode 301 returns 38.27098590577117 after 47 timesteps
Episode 302 returns 17.383137616441324 after 18 timestep

Episode 428 returns 18.20930624027691 after 19 timesteps
Episode 429 returns 17.383137616441324 after 18 timesteps
Episode 430 returns 13.12541872310217 after 13 timesteps
Episode 431 returns 11.361512828387072 after 11 timesteps
Episode 432 returns 18.20930624027691 after 19 timesteps
Episode 433 returns 26.029962661171947 after 29 timesteps
Episode 434 returns 12.247897700103202 after 12 timesteps
Episode 435 returns 18.20930624027691 after 19 timesteps
Episode 436 returns 19.836941046095397 after 21 timesteps
Episode 437 returns 13.12541872310217 after 13 timesteps
Episode 438 returns 12.247897700103202 after 12 timesteps
Episode 439 returns 12.247897700103202 after 12 timesteps
Episode 440 returns 12.247897700103202 after 12 timesteps
Episode 441 returns 16.548623854991238 after 17 timesteps
Episode 442 returns 13.12541872310217 after 13 timesteps
Episode 443 returns 12.247897700103202 after 12 timesteps
Episode 444 returns 14.854222890512437 after 15 timesteps
Episode 445 returns 

Episode 570 returns 10.466174574128356 after 10 timesteps
Episode 571 returns 17.383137616441324 after 18 timesteps
Episode 572 returns 9.561792499119552 after 9 timesteps
Episode 573 returns 14.854222890512437 after 15 timesteps
Episode 574 returns 10.466174574128356 after 10 timesteps
Episode 575 returns 15.705680661607312 after 16 timesteps
Episode 576 returns 13.12541872310217 after 13 timesteps
Episode 577 returns 22.995685419484463 after 25 timesteps
Episode 578 returns 9.561792499119552 after 9 timesteps
Episode 579 returns 9.561792499119552 after 9 timesteps
Episode 580 returns 13.12541872310217 after 13 timesteps
Episode 581 returns 13.994164535871148 after 14 timesteps
Episode 582 returns 15.705680661607312 after 16 timesteps
Episode 583 returns 14.854222890512437 after 15 timesteps
Episode 584 returns 10.466174574128356 after 10 timesteps
Episode 585 returns 11.361512828387072 after 11 timesteps
Episode 586 returns 21.432185919278098 after 23 timesteps
Episode 587 returns 9.

Episode 713 returns 9.561792499119552 after 9 timesteps
Episode 714 returns 13.994164535871148 after 14 timesteps
Episode 715 returns 12.247897700103202 after 12 timesteps
Episode 716 returns 11.361512828387072 after 11 timesteps
Episode 717 returns 7.72553055720799 after 7 timesteps
Episode 718 returns 16.548623854991238 after 17 timesteps
Episode 719 returns 15.705680661607312 after 16 timesteps
Episode 720 returns 9.561792499119552 after 9 timesteps
Episode 721 returns 17.383137616441324 after 18 timesteps
Episode 722 returns 7.72553055720799 after 7 timesteps
Episode 723 returns 8.64827525163591 after 8 timesteps
Episode 724 returns 15.705680661607312 after 16 timesteps
Episode 725 returns 13.12541872310217 after 13 timesteps
Episode 726 returns 9.561792499119552 after 9 timesteps
Episode 727 returns 9.561792499119552 after 9 timesteps
Episode 728 returns 11.361512828387072 after 11 timesteps
Episode 729 returns 10.466174574128356 after 10 timesteps
Episode 730 returns 12.247897700

Episode 856 returns 11.361512828387072 after 11 timesteps
Episode 857 returns 9.561792499119552 after 9 timesteps
Episode 858 returns 13.12541872310217 after 13 timesteps
Episode 859 returns 7.72553055720799 after 7 timesteps
Episode 860 returns 16.548623854991238 after 17 timesteps
Episode 861 returns 13.12541872310217 after 13 timesteps
Episode 862 returns 12.247897700103202 after 12 timesteps
Episode 863 returns 10.466174574128356 after 10 timesteps
Episode 864 returns 20.63857163563444 after 22 timesteps
Episode 865 returns 17.383137616441324 after 18 timesteps
Episode 866 returns 10.466174574128356 after 10 timesteps
Episode 867 returns 10.466174574128356 after 10 timesteps
Episode 868 returns 9.561792499119552 after 9 timesteps
Episode 869 returns 12.247897700103202 after 12 timesteps
Episode 870 returns 9.561792499119552 after 9 timesteps
Episode 871 returns 10.466174574128356 after 10 timesteps
Episode 872 returns 13.12541872310217 after 13 timesteps
Episode 873 returns 10.4661

Episode 999 returns 13.12541872310217 after 13 timesteps
[15.705680661607312, 25.28279056684035, 13.994164535871148, 11.361512828387072, 13.12541872310217, 28.226946740172476, 26.769663034560228, 28.94467727277075, 19.02721317787414, 23.765728565289617, 11.361512828387072, 13.12541872310217, 8.64827525163591, 12.247897700103202, 9.561792499119552, 14.854222890512437, 44.731652283761456, 37.017636879676736, 13.12541872310217, 26.029962661171947, 19.02721317787414, 26.769663034560228, 14.854222890512437, 18.20930624027691, 13.12541872310217, 12.247897700103202, 17.383137616441324, 17.383137616441324, 22.217864060085315, 15.705680661607312, 21.432185919278098, 13.994164535871148, 14.854222890512437, 10.466174574128356, 13.994164535871148, 22.217864060085315, 19.02721317787414, 35.08973716315974, 9.561792499119552, 11.361512828387072, 30.35867819504261, 13.994164535871148, 34.434077942585596, 23.765728565289617, 23.765728565289617, 32.42709509397165, 12.247897700103202, 25.28279056684035, 

Episode 0 returns 15.705680661607312 after 16 timesteps
Saving at episode 0
Episode 1 returns 16.548623854991238 after 17 timesteps
Episode 2 returns 9.561792499119552 after 9 timesteps
Episode 3 returns 12.247897700103202 after 12 timesteps
Episode 4 returns 19.02721317787414 after 20 timesteps
Episode 5 returns 22.995685419484463 after 25 timesteps
Episode 6 returns 35.73883979152814 after 43 timesteps
Episode 7 returns 23.765728565289617 after 26 timesteps
Episode 8 returns 13.12541872310217 after 13 timesteps
Episode 9 returns 12.247897700103202 after 12 timesteps
Episode 10 returns 12.247897700103202 after 12 timesteps
Episode 11 returns 13.12541872310217 after 13 timesteps
Episode 12 returns 9.561792499119552 after 9 timesteps
Episode 13 returns 41.29632180625152 after 52 timesteps
Episode 14 returns 13.994164535871148 after 14 timesteps
Episode 15 returns 22.217864060085315 after 24 timesteps
Episode 16 returns 19.02721317787414 after 20 timesteps
Episode 17 returns 14.854222890

Episode 144 returns 17.383137616441324 after 18 timesteps
Episode 145 returns 10.466174574128356 after 10 timesteps
Episode 146 returns 9.561792499119552 after 9 timesteps
Episode 147 returns 11.361512828387072 after 11 timesteps
Episode 148 returns 20.63857163563444 after 22 timesteps
Episode 149 returns 14.854222890512437 after 15 timesteps
Episode 150 returns 9.561792499119552 after 9 timesteps
Episode 151 returns 12.247897700103202 after 12 timesteps
Episode 152 returns 14.854222890512437 after 15 timesteps
Episode 153 returns 15.705680661607312 after 16 timesteps
Episode 154 returns 31.744540498961264 after 37 timesteps
Episode 155 returns 16.548623854991238 after 17 timesteps
Episode 156 returns 13.12541872310217 after 13 timesteps
Episode 157 returns 12.247897700103202 after 12 timesteps
Episode 158 returns 15.705680661607312 after 16 timesteps
Episode 159 returns 19.836941046095397 after 21 timesteps
Episode 160 returns 16.548623854991238 after 17 timesteps
Episode 161 returns 

Episode 286 returns 14.854222890512437 after 15 timesteps
Episode 287 returns 10.466174574128356 after 10 timesteps
Episode 288 returns 33.10282414303193 after 39 timesteps
Episode 289 returns 17.383137616441324 after 18 timesteps
Episode 290 returns 16.548623854991238 after 17 timesteps
Episode 291 returns 9.561792499119552 after 9 timesteps
Episode 292 returns 12.247897700103202 after 12 timesteps
Episode 293 returns 26.029962661171947 after 29 timesteps
Episode 294 returns 13.994164535871148 after 14 timesteps
Episode 295 returns 31.744540498961264 after 37 timesteps
Episode 296 returns 12.247897700103202 after 12 timesteps
Episode 297 returns 13.12541872310217 after 13 timesteps
Episode 298 returns 9.561792499119552 after 9 timesteps
Episode 299 returns 18.20930624027691 after 19 timesteps
Episode 300 returns 11.361512828387072 after 11 timesteps
Saving at episode 300
Episode 301 returns 20.63857163563444 after 22 timesteps
Episode 302 returns 14.854222890512437 after 15 timesteps


Episode 428 returns 9.561792499119552 after 9 timesteps
Episode 429 returns 22.217864060085315 after 24 timesteps
Episode 430 returns 13.12541872310217 after 13 timesteps
Episode 431 returns 13.994164535871148 after 14 timesteps
Episode 432 returns 13.12541872310217 after 13 timesteps
Episode 433 returns 12.247897700103202 after 12 timesteps
Episode 434 returns 9.561792499119552 after 9 timesteps
Episode 435 returns 16.548623854991238 after 17 timesteps
Episode 436 returns 9.561792499119552 after 9 timesteps
Episode 437 returns 11.361512828387072 after 11 timesteps
Episode 438 returns 10.466174574128356 after 10 timesteps
Episode 439 returns 12.247897700103202 after 12 timesteps
Episode 440 returns 12.247897700103202 after 12 timesteps
Episode 441 returns 13.12541872310217 after 13 timesteps
Episode 442 returns 13.12541872310217 after 13 timesteps
Episode 443 returns 19.836941046095397 after 21 timesteps
Episode 444 returns 13.12541872310217 after 13 timesteps
Episode 445 returns 11.36

Episode 571 returns 12.247897700103202 after 12 timesteps
Episode 572 returns 13.994164535871148 after 14 timesteps
Episode 573 returns 10.466174574128356 after 10 timesteps
Episode 574 returns 8.64827525163591 after 8 timesteps
Episode 575 returns 13.994164535871148 after 14 timesteps
Episode 576 returns 18.20930624027691 after 19 timesteps
Episode 577 returns 22.995685419484463 after 25 timesteps
Episode 578 returns 20.63857163563444 after 22 timesteps
Episode 579 returns 22.995685419484463 after 25 timesteps
Episode 580 returns 9.561792499119552 after 9 timesteps
Episode 581 returns 9.561792499119552 after 9 timesteps
Episode 582 returns 16.548623854991238 after 17 timesteps
Episode 583 returns 11.361512828387072 after 11 timesteps
Episode 584 returns 12.247897700103202 after 12 timesteps
Episode 585 returns 11.361512828387072 after 11 timesteps
Episode 586 returns 22.217864060085315 after 24 timesteps
Episode 587 returns 15.705680661607312 after 16 timesteps
Episode 588 returns 9.5

Episode 713 returns 13.994164535871148 after 14 timesteps
Episode 714 returns 8.64827525163591 after 8 timesteps
Episode 715 returns 16.548623854991238 after 17 timesteps
Episode 716 returns 8.64827525163591 after 8 timesteps
Episode 717 returns 9.561792499119552 after 9 timesteps
Episode 718 returns 10.466174574128356 after 10 timesteps
Episode 719 returns 14.854222890512437 after 15 timesteps
Episode 720 returns 12.247897700103202 after 12 timesteps
Episode 721 returns 17.383137616441324 after 18 timesteps
Episode 722 returns 12.247897700103202 after 12 timesteps
Episode 723 returns 11.361512828387072 after 11 timesteps
Episode 724 returns 15.705680661607312 after 16 timesteps
Episode 725 returns 16.548623854991238 after 17 timesteps
Episode 726 returns 9.561792499119552 after 9 timesteps
Episode 727 returns 13.12541872310217 after 13 timesteps
Episode 728 returns 8.64827525163591 after 8 timesteps
Episode 729 returns 15.705680661607312 after 16 timesteps
Episode 730 returns 10.46617

Episode 856 returns 9.561792499119552 after 9 timesteps
Episode 857 returns 29.655230500043043 after 34 timesteps
Episode 858 returns 16.548623854991238 after 17 timesteps
Episode 859 returns 12.247897700103202 after 12 timesteps
Episode 860 returns 12.247897700103202 after 12 timesteps
Episode 861 returns 9.561792499119552 after 9 timesteps
Episode 862 returns 9.561792499119552 after 9 timesteps
Episode 863 returns 10.466174574128356 after 10 timesteps
Episode 864 returns 13.994164535871148 after 14 timesteps
Episode 865 returns 21.432185919278098 after 23 timesteps
Episode 866 returns 9.561792499119552 after 9 timesteps
Episode 867 returns 12.247897700103202 after 12 timesteps
Episode 868 returns 16.548623854991238 after 17 timesteps
Episode 869 returns 9.561792499119552 after 9 timesteps
Episode 870 returns 13.994164535871148 after 14 timesteps
Episode 871 returns 16.548623854991238 after 17 timesteps
Episode 872 returns 33.10282414303193 after 39 timesteps
Episode 873 returns 39.49

Episode 999 returns 28.94467727277075 after 33 timesteps
[15.705680661607312, 16.548623854991238, 9.561792499119552, 12.247897700103202, 19.02721317787414, 22.995685419484463, 35.73883979152814, 23.765728565289617, 13.12541872310217, 12.247897700103202, 12.247897700103202, 13.12541872310217, 9.561792499119552, 41.29632180625152, 13.994164535871148, 22.217864060085315, 19.02721317787414, 14.854222890512437, 15.705680661607312, 21.432185919278098, 24.52807127963672, 27.501966404214624, 16.548623854991238, 22.217864060085315, 17.383137616441324, 16.548623854991238, 12.247897700103202, 35.73883979152814, 17.383137616441324, 13.12541872310217, 13.12541872310217, 21.432185919278098, 13.994164535871148, 11.361512828387072, 22.995685419484463, 17.383137616441324, 19.02721317787414, 17.383137616441324, 19.836941046095397, 54.79563497335241, 18.20930624027691, 15.705680661607312, 13.994164535871148, 31.055091413092185, 19.02721317787414, 25.28279056684035, 13.12541872310217, 11.361512828387072, 

In [None]:
env = gym.make('CartPole-v1')
env.reset()
obs = env.render(mode='rgb_array')
env.close()

In [None]:
obs_shape = obs.shape
shift_by = 20
random_shift = get_random_shift(*obs_shape[:2], shift_by)
# imageio.imwrite("cartpole_unchanged.png", obs)
obs_tensor = process_obs(obs.copy(), divide=False, unsqueeze_not_4=False)
# imageio.imwrite("cartpole_random_shift.png", random_shift(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_color_jitter.png", color_jitter(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_elastic_transform.png", random_elastic_transform(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_fisheye.png", random_fisheye(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_color_equalize.png", random_color_equalize(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_gaussian_blur.png", random_gaussian_blur(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_gaussian_noise.png", random_gaussian_noise(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_horizontal_flip.png", random_horizontal_flip(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_color_invert.png", random_color_invert(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))
# imageio.imwrite("cartpole_random_perspective_shift.png", random_perspective_shift(obs_tensor).squeeze().numpy().transpose((1, 2, 0)))

In [None]:
def old_main():
    env = gym.make('MountainCars-v0')
    if len(env.observation_space.shape) >= 3:
        env = WrapAtariEnv(env=env, noop_max=30, frameskip=3, framestack=4, test=test)
    if not test:
        dele = input("Do you wanna recreate ckpt and log folders? (y/n)")
        if dele == 'y':
            if os.path.exists(save_dir):
                shutil.rmtree(save_dir)

    env = wrap_env(env, train=not test)
    print(env.observation_space.shape)
    if len(env.observation_space.shape) >= 3:
        q_net = DQNetworkConv
    else:
        assert(False)
    agent = DQNAgent(env=env, qnet=q_net)
    if args.test:
        agent.rollout(episodes=100, render=render)
    else:
        agent.train()
    agent.env.close()

Code references for DQN:

https://github.com/taochenshh/dqn-pytorch

https://github.com/transedward/pytorch-dqn (for sampling from replay buffer)

CURL code: https://github.com/MishaLaskin/curl