In [1]:
import os
import time
import gym
import random
import numpy as np
import pandas as pd
from tensorflow import keras
#import keras
#from keras import layers
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, auc, roc_curve
import tensorflow as tf
print(tf. __version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


1.13.1


In [2]:
from baselines.ppo2 import ppo2
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv

from baselines import bench
from baselines import logger
from baselines import deepq
from baselines.common.tf_util import make_session
from baselines.common import set_global_seeds

In [3]:
SEED = 1
np.random.seed(SEED)
random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
#tf.random.set_seed(SEED)
tf.set_random_seed(SEED)

In [4]:
def multiclass(actual_class, pred_class, average = 'macro'):

    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        other_class = [x for x in unique_class if x != per_class]
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    return avg

In [5]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

#### The data

In [6]:
num_classes = 3
input_shape = (3,)

In [7]:
#data with random zeros
X_train = np.loadtxt('data/zeros/X_train.txt', dtype=np.float32)
#X_val = np.loadtxt('data/zeros/X_val.txt', dtype=np.float32)
X_test = np.loadtxt('data/zeros/X_test.txt', dtype=np.float32)

y_train = np.loadtxt('data/zeros/y_train.txt', dtype=int)
#y_val = np.loadtxt('data/zeros/y_val.txt', dtype=int)
y_test = np.loadtxt('data/zeros/y_test.txt', dtype=int)

In [8]:
#Original synthetic dataset
# df = pd.read_csv('data/dataset_10000.csv')
# class_dict = {'A':0, 'B':1, 'C':2}
# df['label'] = df['label'].replace(class_dict)
# X = df.iloc[:, 0:-1]
# y = df.iloc[:, -1]

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# scaler = MinMaxScaler()
# X_train = scaler.fit_transform(X_train)
# #X_val = scaler.transform(X_val)
# X_test = scaler.transform(X_test)
# X_train, y_train = np.array(X_train), np.array(y_train)
# #X_val, y_val = np.array(X_val), np.array(y_val)
# X_test, y_test = np.array(X_test), np.array(y_test)
# X_train.head()

In [9]:
# Make sure images have shape (28, 28, 1)
#x_train = np.expand_dims(x_train, -1)
#x_train = np.reshape(-1,3)
#x_test = np.expand_dims(x_test, -1)
x_train, x_test = X_train, X_test
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (7000, 3)
7000 train samples
3000 test samples


In [10]:
y_train

array([0, 0, 2, ..., 1, 0, 1])

In [11]:
class SyntheticEnv(gym.Env):
    def __init__(self, images_per_episode=1, dataset=(x_train, y_train), random=True):
        super().__init__()

        self.action_space = gym.spaces.Discrete(3)
        self.observation_space = gym.spaces.Box(low=0, high=1,
                                                shape=(3,),
                                                dtype=np.float32)

        self.images_per_episode = images_per_episode
        self.step_count = 0

        self.x, self.y = dataset
        self.random = random
        self.dataset_idx = 0
    
    def reset(self):
        self.step_count = 0

        obs = self._next_obs()
        return obs

    def step(self, action):
        done = False
        reward = int(action == self.expected_action)

        obs = self._next_obs()
        info = {}

        self.step_count += 1
        if self.step_count >= self.images_per_episode:
            done = True
            info = {'y_pred':action, 'y_actual':self.expected_action}
        if not self.random:
            self.dataset_idx+=1
        return obs, reward, done, info

    

    def _next_obs(self):
        if self.random:
            #print('choosing randomly')
            next_obs_idx = random.randint(0, len(self.x) - 1)
            self.expected_action = int(self.y[next_obs_idx])
            obs = self.x[next_obs_idx]

        else:
            #self.dataset_idx += 1
            if self.dataset_idx >= len(self.x):
                raise StopIteration()
            #print('choosing sequentially')
            obs = self.x[self.dataset_idx]
            self.expected_action = int(self.y[self.dataset_idx])


        return obs

In [12]:
def synthetic_dqn():
    logger.configure(dir='./logs/synthetic_dqn', format_strs=['stdout', 'tensorboard'])
    env = SyntheticEnv(images_per_episode=1)
    env = bench.Monitor(env, logger.get_dir())

    model = deepq.learn(
        env,
        'mlp',
        num_layers=1, #change number of layers
        num_hidden=64,
        activation=tf.nn.relu,
        hiddens=[32],
        dueling=False, #change to True after
        lr=1e-4,
        total_timesteps=int(1.2e5),
        #total_timesteps = 100,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
    )

    model.save('models/dqn_synth.pkl')
    env.close()

    return model

start_time = time.time()
dqn_model = synthetic_dqn()
print("DQN Training Time:", time.time() - start_time)

Logging to ./logs/synthetic_dqn
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.




Instructions for updating:
Use tf.cast instead.
--------------------------------------
| % time spent exploring  | 99       |
| episodes                | 100      |
| mean 100 episode reward | 0.3      |
| steps                   | 98       |
--------------------------------------
--------------------------------------
| % time spent exploring  | 98       |
| episodes                | 200      |
| mean 100 episode reward | 0.3      |
| steps                   | 198      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 97       |
| episodes                | 300      |
| mean 100 episode reward | 0.4      |
| steps                   | 298      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 96       |
| episodes                | 400      |
| mean 100 episode reward | 0.4      |
| steps                   | 398      |
--------------------------------------
----------------

--------------------------------------
| % time spent exploring  | 71       |
| episodes                | 3.5e+03  |
| mean 100 episode reward | 0.4      |
| steps                   | 3.5e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 70       |
| episodes                | 3.6e+03  |
| mean 100 episode reward | 0.3      |
| steps                   | 3.6e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 69       |
| episodes                | 3.7e+03  |
| mean 100 episode reward | 0.3      |
| steps                   | 3.7e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 68       |
| episodes                | 3.8e+03  |
| mean 100 episode reward | 0.4      |
| steps                   | 3.8e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 41       |
| episodes                | 7.1e+03  |
| mean 100 episode reward | 0.4      |
| steps                   | 7.1e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 40       |
| episodes                | 7.2e+03  |
| mean 100 episode reward | 0.4      |
| steps                   | 7.2e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 39       |
| episodes                | 7.3e+03  |
| mean 100 episode reward | 0.4      |
| steps                   | 7.3e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 38       |
| episodes                | 7.4e+03  |
| mean 100 episode reward | 0.4      |
| steps                   | 7.4e+03  |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 11       |
| episodes                | 1.07e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.07e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 10       |
| episodes                | 1.08e+04 |
| mean 100 episode reward | 0.3      |
| steps                   | 1.08e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 10       |
| episodes                | 1.09e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.09e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 9        |
| episodes                | 1.1e+04  |
| mean 100 episode reward | 0.4      |
| steps                   | 1.1e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.43e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.43e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.44e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.44e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.45e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.45e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.46e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.46e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.79e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.79e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.8e+04  |
| mean 100 episode reward | 0.4      |
| steps                   | 1.8e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.81e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.81e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.82e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.82e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.14e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 2.14e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.15e+04 |
| mean 100 episode reward | 0.3      |
| steps                   | 2.15e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.16e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 2.16e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.17e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 2.17e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.5e+04  |
| mean 100 episode reward | 0.5      |
| steps                   | 2.5e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.51e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 2.51e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.52e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 2.52e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.53e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 2.53e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.86e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 2.86e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.87e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 2.87e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.88e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 2.88e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 2.89e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 2.89e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.22e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 3.22e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.23e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 3.23e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.24e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 3.24e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.25e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 3.25e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.58e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 3.58e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.59e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 3.59e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.6e+04  |
| mean 100 episode reward | 0.5      |
| steps                   | 3.6e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.61e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 3.61e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.94e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 3.94e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.95e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 3.95e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.96e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 3.96e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 3.97e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 3.97e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.3e+04  |
| mean 100 episode reward | 0.6      |
| steps                   | 4.3e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.31e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 4.31e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.32e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 4.32e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.33e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 4.33e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.66e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 4.66e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.67e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 4.67e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.68e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 4.68e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 4.69e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 4.69e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.01e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 5.01e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.02e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 5.02e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.03e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.03e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.04e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.04e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.37e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.37e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.38e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 5.38e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.39e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.39e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.4e+04  |
| mean 100 episode reward | 0.4      |
| steps                   | 5.4e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.73e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.73e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.74e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.74e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.75e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.75e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 5.76e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 5.76e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.09e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.09e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.1e+04  |
| mean 100 episode reward | 0.4      |
| steps                   | 6.1e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.11e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 6.11e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.12e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 6.12e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.45e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.45e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.46e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.46e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.47e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.47e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.48e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.48e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.81e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.81e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.82e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 6.82e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.83e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.83e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 6.84e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 6.84e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.17e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 7.17e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.18e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 7.18e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.19e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 7.19e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.2e+04  |
| mean 100 episode reward | 0.4      |
| steps                   | 7.2e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.53e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 7.53e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.54e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 7.54e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.55e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 7.55e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.56e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 7.56e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.89e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 7.89e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.9e+04  |
| mean 100 episode reward | 0.4      |
| steps                   | 7.9e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.91e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 7.91e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 7.92e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 7.92e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.25e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 8.25e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.26e+04 |
| mean 100 episode reward | 0.6      |
| steps                   | 8.26e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.27e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 8.27e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.28e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 8.28e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.61e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 8.61e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.62e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 8.62e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.63e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 8.63e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.64e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 8.64e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.97e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 8.97e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.98e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 8.98e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 8.99e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 8.99e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9e+04    |
| mean 100 episode reward | 0.5      |
| steps                   | 9e+04    |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.33e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 9.33e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.34e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 9.34e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.35e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 9.35e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.36e+04 |
| mean 100 episode reward | 0.5      |
| steps                   | 9.36e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.69e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 9.69e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.7e+04  |
| mean 100 episode reward | 0.6      |
| steps                   | 9.7e+04  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.71e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 9.71e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 9.72e+04 |
| mean 100 episode reward | 0.4      |
| steps                   | 9.72e+04 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.00e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1e+05    |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.01e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.01e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.01e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.01e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.01e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.01e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.04e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.04e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.04e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.04e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.04e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.04e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.04e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.04e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.08e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.08e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.08e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.08e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.08e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.08e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.08e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.08e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.11e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.11e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.11e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.11e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.12e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.11e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.12e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.12e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.15e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.15e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.15e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.15e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.15e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.15e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.15e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.15e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.18e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.18e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.19e+05 |
| mean 100 episode reward | 0.4      |
| steps                   | 1.19e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.19e+05 |
| mean 100 episode reward | 0.5      |
| steps                   | 1.19e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring  | 1        |
| episodes                | 1.19e+05 |
| mean 100 episode reward | 0.6      |
| steps                   | 1.19e+05 |
--------------------------------------
--------------------------------------
| % time spent exploring 

In [13]:
def dqn_eval(dqn_model):
    attempts, correct = 0,0
    #a =0

    env = SyntheticEnv(images_per_episode=1, dataset=(x_test, y_test), random=False)
    test_df = pd.DataFrame()

    try:
        while True:
        #while a <5:
            #a+=1
            #print(f'Attempt {attempts}')
            obs, done = env.reset(), False
            #print(f'Dataset index: {env.dataset_idx}')
            while not done:
                obs, rew, done, info = env.step(dqn_model(obs[None])[0])
                #print(f'obs: {obs}')
                #print(f'reward: {rew}')
                #print(f'done: {done}')
                #print(f'info: {info}')
                if done==True:
                    test_df = test_df.append(info, ignore_index=True)

                attempts += 1
                if rew > 0:
                    correct += 1

    except StopIteration:
        print()
        print('validation done...')
        print('Accuracy: {0}%'.format((float(correct) / attempts) * 100))
    return test_df

test_df = dqn_eval(dqn_model)


validation done...
Accuracy: 46.5%


In [14]:
acc, f1, roc_auc = test(test_df['y_actual'], test_df['y_pred'])
acc, f1, roc_auc

(0.465, 0.2141974499158582, 0.498906265156988)

In [15]:
len(X_test), len(test_df)

(3000, 3000)

In [16]:
len(X_train)

7000

In [17]:
def synthetic_ppo():
    logger.configure(dir='./logs/synthetic_ppo', format_strs=['stdout', 'tensorboard'])
    env = DummyVecEnv([lambda: bench.Monitor(SyntheticEnv(images_per_episode=1), logger.get_dir())])

    model = ppo2.learn(
        env=env,
        network='mlp',
        num_layers=2,
        num_hidden=64,
        nsteps=32,
        total_timesteps=int(1.2e5),
        seed=int(time.time()))

    return model

start_time = time.time()
ppo_model = synthetic_ppo()
print('PPO Training Time:', time.time() - start_time)

Logging to ./logs/synthetic_ppo




Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.156     |
| fps                     | 71        |
| loss/approxkl           | 2.58e-06  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 1.1       |
| loss/policy_loss        | -0.000253 |
| loss/value_loss         | 0.0745    |
| misc/explained_variance | 0.0113    |
| misc/nupdates           | 1         |
| misc/serial_timesteps   | 32        |
| misc/time_elapsed       | 0.45      |
| misc/total_timesteps    | 32        |
---------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.39     |
| fps                     | 210      |
| loss/approxkl           | 0.000112 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 1.09     |
| loss/policy_loss 

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.5       |
| fps                     | 179       |
| loss/approxkl           | 1.69e-09  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0226    |
| loss/policy_loss        | -2.79e-09 |
| loss/value_loss         | 0.125     |
| misc/explained_variance | 0         |
| misc/nupdates           | 130       |
| misc/serial_timesteps   | 4.16e+03  |
| misc/time_elapsed       | 19.3      |
| misc/total_timesteps    | 4.16e+03  |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.4       |
| fps                     | 197       |
| loss/approxkl           | 2.58e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0232    |
| loss/policy_loss        | -8.38e-08 |
| loss/value_loss   

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.47      |
| fps                     | 174       |
| loss/approxkl           | 4.42e-10  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00953   |
| loss/policy_loss        | -9.36e-08 |
| loss/value_loss         | 0.14      |
| misc/explained_variance | 0.0039    |
| misc/nupdates           | 270       |
| misc/serial_timesteps   | 8.64e+03  |
| misc/time_elapsed       | 39.6      |
| misc/total_timesteps    | 8.64e+03  |
---------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.44     |
| fps                     | 202      |
| loss/approxkl           | 0.000862 |
| loss/clipfrac           | 0.0156   |
| loss/policy_entropy     | 0.00846  |
| loss/policy_loss        | -0.00317 |
| loss/value_loss         | 

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.43      |
| fps                     | 206       |
| loss/approxkl           | 7.15e-13  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00295   |
| loss/policy_loss        | -1.86e-08 |
| loss/value_loss         | 0.124     |
| misc/explained_variance | -0.00234  |
| misc/nupdates           | 410       |
| misc/serial_timesteps   | 1.31e+04  |
| misc/time_elapsed       | 59.4      |
| misc/total_timesteps    | 1.31e+04  |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.47      |
| fps                     | 214       |
| loss/approxkl           | 6.61e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00297   |
| loss/policy_loss        | -7.68e-08 |
| loss/value_loss   

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.49     |
| fps                     | 199      |
| loss/approxkl           | 3.45e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00235  |
| loss/policy_loss        | 2.79e-09 |
| loss/value_loss         | 0.123    |
| misc/explained_variance | 0        |
| misc/nupdates           | 550      |
| misc/serial_timesteps   | 1.76e+04 |
| misc/time_elapsed       | 79.2     |
| misc/total_timesteps    | 1.76e+04 |
--------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.47     |
| fps                     | 211      |
| loss/approxkl           | 7.76e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00236  |
| loss/policy_loss        | 2.64e-07 |
| loss/value_loss         | 0.122    |
| mi

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.5       |
| fps                     | 214       |
| loss/approxkl           | 2.89e-10  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00351   |
| loss/policy_loss        | -7.45e-09 |
| loss/value_loss         | 0.125     |
| misc/explained_variance | 0         |
| misc/nupdates           | 690       |
| misc/serial_timesteps   | 2.21e+04  |
| misc/time_elapsed       | 99.9      |
| misc/total_timesteps    | 2.21e+04  |
---------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.43     |
| fps                     | 324      |
| loss/approxkl           | 7.17e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00396  |
| loss/policy_loss        | 1.86e-09 |
| loss/value_loss         | 

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.41      |
| fps                     | 232       |
| loss/approxkl           | 2.34e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00212   |
| loss/policy_loss        | -1.21e-08 |
| loss/value_loss         | 0.116     |
| misc/explained_variance | 0         |
| misc/nupdates           | 830       |
| misc/serial_timesteps   | 2.66e+04  |
| misc/time_elapsed       | 120       |
| misc/total_timesteps    | 2.66e+04  |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.47      |
| fps                     | 213       |
| loss/approxkl           | 1.07e-10  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00207   |
| loss/policy_loss        | -6.52e-09 |
| loss/value_loss   

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.33     |
| fps                     | 211      |
| loss/approxkl           | 3.07e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00285  |
| loss/policy_loss        | 3.21e-07 |
| loss/value_loss         | 0.105    |
| misc/explained_variance | -0.00797 |
| misc/nupdates           | 970      |
| misc/serial_timesteps   | 3.1e+04  |
| misc/time_elapsed       | 141      |
| misc/total_timesteps    | 3.1e+04  |
--------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.54     |
| fps                     | 199      |
| loss/approxkl           | 1.19e-10 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00304  |
| loss/policy_loss        | 6.01e-07 |
| loss/value_loss         | 0.128    |
| mi

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.47     |
| fps                     | 209      |
| loss/approxkl           | 1.69e-09 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.0311   |
| loss/policy_loss        | 1.6e-06  |
| loss/value_loss         | 0.126    |
| misc/explained_variance | -0.0034  |
| misc/nupdates           | 1.11e+03 |
| misc/serial_timesteps   | 3.55e+04 |
| misc/time_elapsed       | 162      |
| misc/total_timesteps    | 3.55e+04 |
--------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.52     |
| fps                     | 215      |
| loss/approxkl           | 0.00206  |
| loss/clipfrac           | 0.0156   |
| loss/policy_entropy     | 0.0485   |
| loss/policy_loss        | -0.00432 |
| loss/value_loss         | 0.128    |
| mi

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.42     |
| fps                     | 586      |
| loss/approxkl           | 3.04e-10 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00851  |
| loss/policy_loss        | 4.16e-07 |
| loss/value_loss         | 0.129    |
| misc/explained_variance | 0.00854  |
| misc/nupdates           | 1.25e+03 |
| misc/serial_timesteps   | 4e+04    |
| misc/time_elapsed       | 178      |
| misc/total_timesteps    | 4e+04    |
--------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.48     |
| fps                     | 601      |
| loss/approxkl           | 3.86e-09 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00803  |
| loss/policy_loss        | 1.04e-06 |
| loss/value_loss         | 0.129    |
| mi

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.45     |
| fps                     | 359      |
| loss/approxkl           | 0.000835 |
| loss/clipfrac           | 0.00781  |
| loss/policy_entropy     | 0.0599   |
| loss/policy_loss        | -0.00402 |
| loss/value_loss         | 0.13     |
| misc/explained_variance | -0.00339 |
| misc/nupdates           | 1.39e+03 |
| misc/serial_timesteps   | 4.45e+04 |
| misc/time_elapsed       | 190      |
| misc/total_timesteps    | 4.45e+04 |
--------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.44      |
| fps                     | 502       |
| loss/approxkl           | 2.93e-08  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0235    |
| loss/policy_loss        | -6.68e-06 |
| loss/value_loss         | 0.118  

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.42      |
| fps                     | 350       |
| loss/approxkl           | 3.06e-12  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00244   |
| loss/policy_loss        | -2.19e-08 |
| loss/value_loss         | 0.123     |
| misc/explained_variance | -0.000394 |
| misc/nupdates           | 1.53e+03  |
| misc/serial_timesteps   | 4.9e+04   |
| misc/time_elapsed       | 202       |
| misc/total_timesteps    | 4.9e+04   |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.42      |
| fps                     | 359       |
| loss/approxkl           | 1.85e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00269   |
| loss/policy_loss        | 2.05e-08  |
| loss/value_loss   

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.42      |
| fps                     | 345       |
| loss/approxkl           | 3.97e-07  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.044     |
| loss/policy_loss        | -3.29e-05 |
| loss/value_loss         | 0.124     |
| misc/explained_variance | 0.0167    |
| misc/nupdates           | 1.67e+03  |
| misc/serial_timesteps   | 5.34e+04  |
| misc/time_elapsed       | 214       |
| misc/total_timesteps    | 5.34e+04  |
---------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.4      |
| fps                     | 423      |
| loss/approxkl           | 5.9e-08  |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.141    |
| loss/policy_loss        | 9.4e-06  |
| loss/value_loss         | 

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.4       |
| fps                     | 378       |
| loss/approxkl           | 7.22e-09  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00793   |
| loss/policy_loss        | -1.29e-05 |
| loss/value_loss         | 0.109     |
| misc/explained_variance | 0.0579    |
| misc/nupdates           | 1.81e+03  |
| misc/serial_timesteps   | 5.79e+04  |
| misc/time_elapsed       | 226       |
| misc/total_timesteps    | 5.79e+04  |
---------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.4      |
| fps                     | 319      |
| loss/approxkl           | 1.64e-09 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.0113   |
| loss/policy_loss        | 1.4e-06  |
| loss/value_loss         | 

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.47      |
| fps                     | 344       |
| loss/approxkl           | 2.82e-08  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0417    |
| loss/policy_loss        | -6.11e-06 |
| loss/value_loss         | 0.124     |
| misc/explained_variance | -0.00279  |
| misc/nupdates           | 1.95e+03  |
| misc/serial_timesteps   | 6.24e+04  |
| misc/time_elapsed       | 237       |
| misc/total_timesteps    | 6.24e+04  |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.45      |
| fps                     | 358       |
| loss/approxkl           | 2.11e-08  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0456    |
| loss/policy_loss        | -4.66e-09 |
| loss/value_loss   

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.44      |
| fps                     | 344       |
| loss/approxkl           | 2.55e-10  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0161    |
| loss/policy_loss        | -9.38e-07 |
| loss/value_loss         | 0.12      |
| misc/explained_variance | 0.0162    |
| misc/nupdates           | 2.09e+03  |
| misc/serial_timesteps   | 6.69e+04  |
| misc/time_elapsed       | 249       |
| misc/total_timesteps    | 6.69e+04  |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.49      |
| fps                     | 340       |
| loss/approxkl           | 4.43e-09  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00838   |
| loss/policy_loss        | -7.45e-09 |
| loss/value_loss   

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.51     |
| fps                     | 498      |
| loss/approxkl           | 1.67e-08 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.0161   |
| loss/policy_loss        | 4.66e-09 |
| loss/value_loss         | 0.125    |
| misc/explained_variance | 0        |
| misc/nupdates           | 2.23e+03 |
| misc/serial_timesteps   | 7.14e+04 |
| misc/time_elapsed       | 261      |
| misc/total_timesteps    | 7.14e+04 |
--------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.5       |
| fps                     | 431       |
| loss/approxkl           | 4.36e-09  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00877   |
| loss/policy_loss        | -2.14e-06 |
| loss/value_loss         | 0.126  

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.5      |
| fps                     | 344      |
| loss/approxkl           | 1.48e-10 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00663  |
| loss/policy_loss        | -1.6e-06 |
| loss/value_loss         | 0.124    |
| misc/explained_variance | 0.00687  |
| misc/nupdates           | 2.37e+03 |
| misc/serial_timesteps   | 7.58e+04 |
| misc/time_elapsed       | 273      |
| misc/total_timesteps    | 7.58e+04 |
--------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.49      |
| fps                     | 337       |
| loss/approxkl           | 1.61e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00893   |
| loss/policy_loss        | -2.33e-08 |
| loss/value_loss         | 0.123  

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.5       |
| fps                     | 366       |
| loss/approxkl           | 4.75e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00321   |
| loss/policy_loss        | 8.57e-08  |
| loss/value_loss         | 0.125     |
| misc/explained_variance | -2.41e-05 |
| misc/nupdates           | 2.51e+03  |
| misc/serial_timesteps   | 8.03e+04  |
| misc/time_elapsed       | 285       |
| misc/total_timesteps    | 8.03e+04  |
---------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.49     |
| fps                     | 340      |
| loss/approxkl           | 2.54e-10 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00435  |
| loss/policy_loss        | 1.47e-07 |
| loss/value_loss         | 

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.44     |
| fps                     | 337      |
| loss/approxkl           | 9.95e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00475  |
| loss/policy_loss        | 1.44e-07 |
| loss/value_loss         | 0.122    |
| misc/explained_variance | 0.00146  |
| misc/nupdates           | 2.65e+03 |
| misc/serial_timesteps   | 8.48e+04 |
| misc/time_elapsed       | 297      |
| misc/total_timesteps    | 8.48e+04 |
--------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.51      |
| fps                     | 350       |
| loss/approxkl           | 5.69e-10  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00437   |
| loss/policy_loss        | -6.35e-06 |
| loss/value_loss         | 0.122  

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.37     |
| fps                     | 362      |
| loss/approxkl           | 0.000992 |
| loss/clipfrac           | 0.0156   |
| loss/policy_entropy     | 0.0143   |
| loss/policy_loss        | 0.000675 |
| loss/value_loss         | 0.117    |
| misc/explained_variance | -0.00127 |
| misc/nupdates           | 2.79e+03 |
| misc/serial_timesteps   | 8.93e+04 |
| misc/time_elapsed       | 309      |
| misc/total_timesteps    | 8.93e+04 |
--------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.42     |
| fps                     | 361      |
| loss/approxkl           | 8.96e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.0114   |
| loss/policy_loss        | 2.63e-07 |
| loss/value_loss         | 0.125    |
| mi

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.41     |
| fps                     | 628      |
| loss/approxkl           | 9.69e-10 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.0131   |
| loss/policy_loss        | -1.5e-06 |
| loss/value_loss         | 0.122    |
| misc/explained_variance | -0.01    |
| misc/nupdates           | 2.93e+03 |
| misc/serial_timesteps   | 9.38e+04 |
| misc/time_elapsed       | 321      |
| misc/total_timesteps    | 9.38e+04 |
--------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.47      |
| fps                     | 408       |
| loss/approxkl           | 1.51e-09  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0109    |
| loss/policy_loss        | -1.36e-06 |
| loss/value_loss         | 0.126  

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.38      |
| fps                     | 405       |
| loss/approxkl           | 1.32e-08  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.0154    |
| loss/policy_loss        | -6.52e-09 |
| loss/value_loss         | 0.114     |
| misc/explained_variance | 0         |
| misc/nupdates           | 3.07e+03  |
| misc/serial_timesteps   | 9.82e+04  |
| misc/time_elapsed       | 332       |
| misc/total_timesteps    | 9.82e+04  |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.47      |
| fps                     | 353       |
| loss/approxkl           | 2.84e-10  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.015     |
| loss/policy_loss        | 4.56e-07  |
| loss/value_loss   

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.37      |
| fps                     | 352       |
| loss/approxkl           | 3.86e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00208   |
| loss/policy_loss        | -7.19e-07 |
| loss/value_loss         | 0.11      |
| misc/explained_variance | -0.00555  |
| misc/nupdates           | 3.21e+03  |
| misc/serial_timesteps   | 1.03e+05  |
| misc/time_elapsed       | 344       |
| misc/total_timesteps    | 1.03e+05  |
---------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.5      |
| fps                     | 384      |
| loss/approxkl           | 5.67e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00205  |
| loss/policy_loss        | 7.03e-08 |
| loss/value_loss         | 

Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.51      |
| fps                     | 351       |
| loss/approxkl           | 1.74e-12  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00131   |
| loss/policy_loss        | -5.63e-08 |
| loss/value_loss         | 0.123     |
| misc/explained_variance | 0.0131    |
| misc/nupdates           | 3.35e+03  |
| misc/serial_timesteps   | 1.07e+05  |
| misc/time_elapsed       | 356       |
| misc/total_timesteps    | 1.07e+05  |
---------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.55      |
| fps                     | 345       |
| loss/approxkl           | 1.78e-11  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00128   |
| loss/policy_loss        | -1.86e-09 |
| loss/value_loss   

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.42     |
| fps                     | 344      |
| loss/approxkl           | 1.33e-10 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00167  |
| loss/policy_loss        | -3e-07   |
| loss/value_loss         | 0.119    |
| misc/explained_variance | 0.0104   |
| misc/nupdates           | 3.49e+03 |
| misc/serial_timesteps   | 1.12e+05 |
| misc/time_elapsed       | 368      |
| misc/total_timesteps    | 1.12e+05 |
--------------------------------------
Stepping environment...
Done.
---------------------------------------
| eplenmean               | 1         |
| eprewmean               | 0.59      |
| fps                     | 319       |
| loss/approxkl           | 2.27e-10  |
| loss/clipfrac           | 0         |
| loss/policy_entropy     | 0.00144   |
| loss/policy_loss        | -7.03e-08 |
| loss/value_loss         | 0.12   

Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.44     |
| fps                     | 344      |
| loss/approxkl           | 2.97e-11 |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00651  |
| loss/policy_loss        | 3.82e-07 |
| loss/value_loss         | 0.124    |
| misc/explained_variance | -0.0219  |
| misc/nupdates           | 3.63e+03 |
| misc/serial_timesteps   | 1.16e+05 |
| misc/time_elapsed       | 380      |
| misc/total_timesteps    | 1.16e+05 |
--------------------------------------
Stepping environment...
Done.
--------------------------------------
| eplenmean               | 1        |
| eprewmean               | 0.53     |
| fps                     | 321      |
| loss/approxkl           | 5.3e-10  |
| loss/clipfrac           | 0        |
| loss/policy_entropy     | 0.00478  |
| loss/policy_loss        | 4.89e-08 |
| loss/value_loss         | 0.123    |
| mi

In [18]:
def synthetic_ppo_eval(ppo_model):
    attempts, correct = 0,0

    env = DummyVecEnv([lambda: SyntheticEnv(images_per_episode=1, dataset=(x_test, y_test), random=False)])
    test_df = pd.DataFrame()

    try:
        while True:
            obs, done = env.reset(), [False]
            while not done[0]:
                obs, rew, done, info = env.step(ppo_model.step(obs[None])[0])
                if done==True:
                    test_df = test_df.append(info, ignore_index=True)

                attempts += 1
                if rew[0] > 0:
                    correct += 1

    except StopIteration:
        print()
        print('validation done...')
        print('Accuracy: {0}%'.format((float(correct) / attempts) * 100))
        
    return test_df

test_df = synthetic_ppo_eval(ppo_model)
len(test_df

SyntaxError: unexpected EOF while parsing (2506171289.py, line 27)