In [3]:
import pandas as pd
import numpy as np
import random
import os
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import copy
from gym import Env
from gym.spaces import Discrete, Box
import d3rlpy
from d3rlpy.algos import DQN
from d3rlpy.dataset import MDPDataset
from d3rlpy.online.buffers import ReplayBuffer
from d3rlpy.online.explorers import LinearDecayEpsilonGreedy
from envs import SyntheticSimpleEnv, SyntheticComplexEnv
import helper

ModuleNotFoundError: No module named 'd3rlpy'

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tensorflow.set_random_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

#### Original datasets

In [3]:
file_simple = 'data/dataset_10000.csv'
class_dict_simple = {'A':0, 'B':1, 'C':2}
X_train, X_test, y_train, y_test = helper.split_dataset(file_simple, class_dict_simple)

#### Online Training

In [4]:
simple_online_train_env = SyntheticSimpleEnv(X_train, y_train)
simple_online_test_env = SyntheticSimpleEnv(X_test, y_test, random=False)

In [5]:
online_dqn = DQN(batch_size=32, learning_rate=2.5e-4, target_update_interval=100, use_gpu=False)
buffer = ReplayBuffer(maxlen=50000, env=simple_online_train_env)
explorer = LinearDecayEpsilonGreedy(start_epsilon=1.0, end_epsilon=0.1, duration=10000)

In [6]:
online_dqn.fit_online(simple_online_train_env, buffer, explorer=explorer, eval_env=simple_online_train_env, n_steps=120000, 
                      n_steps_per_epoch=1000, update_interval=10) 

2022-07-29 20:36.54 [info     ] Directory is created at d3rlpy_logs\DQN_online_20220729203654
2022-07-29 20:36.54 [debug    ] Building model...
2022-07-29 20:36.54 [debug    ] Model has been built.
2022-07-29 20:36.54 [info     ] Parameters are saved to d3rlpy_logs\DQN_online_20220729203654\params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.00025, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 'target_update_interval': 100, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (3,), 'action_size': 6}


  0%|          | 0/120000 [00:00<?, ?it/s]

2022-07-29 20:36.56 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_1000.pt
2022-07-29 20:36.56 [info     ] DQN_online_20220729203654: epoch=1 step=1000 epoch=1 metrics={'time_inference': 0.0010172193050384522, 'time_environment_step': 3.274679183959961e-05, 'time_step': 0.001764174222946167, 'rollout_return': 0.26105263157894737, 'time_sample_batch': 0.00021169849277771625, 'time_algorithm_update': 0.006254965497046402, 'loss': 0.8539277963417092, 'evaluation': -0.4} step=1000
2022-07-29 20:36.58 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_2000.pt
2022-07-29 20:36.58 [info     ] DQN_online_20220729203654: epoch=2 step=2000 epoch=2 metrics={'time_inference': 0.0013331966400146485, 'time_environment_step': 2.53603458404541e-05, 'rollout_return': 0.2345132743362832, 'time_step': 0.002310093402862549, 'time_sample_batch': 0.0002476668357849121, 'time_algorithm_update': 0.008156418800354004, 'loss': 0.652340755

2022-07-29 20:37.29 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_17000.pt
2022-07-29 20:37.29 [info     ] DQN_online_20220729203654: epoch=17 step=17000 epoch=17 metrics={'time_inference': 0.0012085368633270263, 'time_environment_step': 3.552365303039551e-05, 'time_step': 0.0021881020069122316, 'rollout_return': 2.1653846153846152, 'time_sample_batch': 0.0002582907676696777, 'time_algorithm_update': 0.008096771240234375, 'loss': 0.2911991833150387, 'evaluation': 3.7} step=17000
2022-07-29 20:37.30 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_18000.pt
2022-07-29 20:37.30 [info     ] DQN_online_20220729203654: epoch=18 step=18000 epoch=18 metrics={'time_inference': 0.0008996052742004394, 'time_environment_step': 2.9068708419799805e-05, 'time_step': 0.0016700036525726318, 'rollout_return': 2.457692307692308, 'time_sample_batch': 0.0002337837219238281, 'time_algorithm_update': 0.006192667484283447, 'loss': 0.

2022-07-29 20:38.01 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_33000.pt
2022-07-29 20:38.01 [info     ] DQN_online_20220729203654: epoch=33 step=33000 epoch=33 metrics={'time_inference': 0.0009734985828399658, 'time_environment_step': 3.065776824951172e-05, 'time_step': 0.0017981698513031006, 'rollout_return': 2.8534798534798536, 'time_sample_batch': 0.00019713878631591797, 'time_algorithm_update': 0.006828250885009765, 'loss': 0.19443919122219086, 'evaluation': 2.7} step=33000
2022-07-29 20:38.04 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_34000.pt
2022-07-29 20:38.04 [info     ] DQN_online_20220729203654: epoch=34 step=34000 epoch=34 metrics={'time_inference': 0.0011284751892089843, 'time_environment_step': 3.16925048828125e-05, 'time_step': 0.0020751471519470216, 'rollout_return': 2.6867924528301885, 'time_sample_batch': 0.00027476787567138673, 'time_algorithm_update': 0.007766733169555664, 'loss': 

2022-07-29 20:38.34 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_49000.pt
2022-07-29 20:38.34 [info     ] DQN_online_20220729203654: epoch=49 step=49000 epoch=49 metrics={'time_inference': 0.0010795350074768067, 'time_environment_step': 3.016161918640137e-05, 'rollout_return': 2.9185185185185185, 'time_step': 0.0019543190002441406, 'time_sample_batch': 0.00027100324630737306, 'time_algorithm_update': 0.007357971668243408, 'loss': 0.16666044175624847, 'evaluation': 3.2} step=49000
2022-07-29 20:38.37 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_50000.pt
2022-07-29 20:38.37 [info     ] DQN_online_20220729203654: epoch=50 step=50000 epoch=50 metrics={'time_inference': 0.001124497652053833, 'time_environment_step': 2.7388811111450194e-05, 'time_step': 0.0020516290664672854, 'rollout_return': 2.7773722627737225, 'time_sample_batch': 0.00028213977813720706, 'time_algorithm_update': 0.007743971347808838, 'loss':

2022-07-29 20:39.12 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_65000.pt
2022-07-29 20:39.12 [info     ] DQN_online_20220729203654: epoch=65 step=65000 epoch=65 metrics={'time_inference': 0.0015290341377258302, 'time_environment_step': 4.6573877334594726e-05, 'time_step': 0.0027666208744049074, 'rollout_return': 2.8823529411764706, 'time_sample_batch': 0.00034874916076660156, 'time_algorithm_update': 0.010338315963745117, 'loss': 0.13069478698074818, 'evaluation': 3.5} step=65000
2022-07-29 20:39.14 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_66000.pt
2022-07-29 20:39.14 [info     ] DQN_online_20220729203654: epoch=66 step=66000 epoch=66 metrics={'time_inference': 0.001039897918701172, 'time_environment_step': 4.05733585357666e-05, 'time_step': 0.001902510643005371, 'rollout_return': 3.048689138576779, 'time_sample_batch': 0.00018747806549072266, 'time_algorithm_update': 0.0071240425109863285, 'loss': 0

2022-07-29 20:39.52 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_81000.pt
2022-07-29 20:39.52 [info     ] DQN_online_20220729203654: epoch=81 step=81000 epoch=81 metrics={'time_inference': 0.001444373607635498, 'time_environment_step': 4.5512914657592776e-05, 'time_step': 0.0026233859062194825, 'rollout_return': 3.007434944237918, 'time_sample_batch': 0.0002528572082519531, 'time_algorithm_update': 0.009892404079437256, 'loss': 0.11325032778084278, 'evaluation': 3.6} step=81000
2022-07-29 20:39.54 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_82000.pt
2022-07-29 20:39.54 [info     ] DQN_online_20220729203654: epoch=82 step=82000 epoch=82 metrics={'time_inference': 0.0011957592964172363, 'time_environment_step': 2.9945850372314454e-05, 'time_step': 0.00216660737991333, 'rollout_return': 2.8453237410071943, 'time_sample_batch': 0.00027755975723266603, 'time_algorithm_update': 0.008275725841522218, 'loss': 0.

2022-07-29 20:40.30 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_97000.pt
2022-07-29 20:40.30 [info     ] DQN_online_20220729203654: epoch=97 step=97000 epoch=97 metrics={'time_inference': 0.0011446592807769775, 'time_environment_step': 3.237628936767578e-05, 'rollout_return': 2.99250936329588, 'time_step': 0.002116929054260254, 'time_sample_batch': 0.0002729320526123047, 'time_algorithm_update': 0.008077132701873779, 'loss': 0.1172642557322979, 'evaluation': 3.6} step=97000
2022-07-29 20:40.33 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_98000.pt
2022-07-29 20:40.33 [info     ] DQN_online_20220729203654: epoch=98 step=98000 epoch=98 metrics={'time_inference': 0.001502098798751831, 'time_environment_step': 4.025149345397949e-05, 'time_step': 0.0027723491191864015, 'rollout_return': 3.2222222222222223, 'time_sample_batch': 0.00024533748626708984, 'time_algorithm_update': 0.010491960048675537, 'loss': 0.116

2022-07-29 20:41.11 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_113000.pt
2022-07-29 20:41.11 [info     ] DQN_online_20220729203654: epoch=113 step=113000 epoch=113 metrics={'time_inference': 0.0012943923473358154, 'time_environment_step': 4.304862022399902e-05, 'time_step': 0.002413138151168823, 'rollout_return': 2.8796992481203008, 'time_sample_batch': 0.00024608135223388674, 'time_algorithm_update': 0.009230849742889404, 'loss': 0.1224897537752986, 'evaluation': 4.0} step=113000
2022-07-29 20:41.13 [info     ] Model parameters are saved to d3rlpy_logs\DQN_online_20220729203654\model_114000.pt
2022-07-29 20:41.13 [info     ] DQN_online_20220729203654: epoch=114 step=114000 epoch=114 metrics={'time_inference': 0.001274198293685913, 'time_environment_step': 4.2238473892211914e-05, 'rollout_return': 3.0634328358208953, 'time_step': 0.0023850975036621095, 'time_sample_batch': 0.00023446559906005858, 'time_algorithm_update': 0.009123404026031495, 

In [1]:
online_test_df = helper.test_d3rlpy_dqn(online_dqn, simple_online_test_env)

NameError: name 'helper' is not defined

In [8]:
len(X_test), len(online_test_df)

(3000, 3000)

In [10]:
y_pred_df = online_test_df[online_test_df['y_pred'].notna()]
success_df = y_pred_df[y_pred_df['y_pred']== y_pred_df['y_actual']]
len(success_df)

2507

In [11]:
success_rate = len(success_df)/len(online_test_df)*100
success_rate

83.56666666666666

In [12]:
#avg length and return 
avg_length, avg_return = helper.get_avg_length_reward(online_test_df)
avg_length, avg_return

(3.9703333333333335, 3.6336666666666666)

In [14]:
acc, f1, roc_auc = helper.test(y_pred_df['y_actual'], y_pred_df['y_pred'])
acc, f1, roc_auc

(0.8367823765020027, 0.8390825356105415, 0.8834565709254397)

#### Offline

In [19]:
offline_dataset = MDPDataset.load('data/dqn_simple_dataset.h5')
simple_offline_train_env = SyntheticSimpleEnv(X_train, y_train)
simple_offline_test_env = SyntheticSimpleEnv(X_test, y_test, random=False)

In [18]:
offline_dqn = d3rlpy.algos.DQN()
offline_dqn.fit(offline_dataset, eval_episodes=offline_dataset.episodes, n_steps=120000, n_steps_per_epoch=10000, 
        scorers={"environment": d3rlpy.metrics.evaluate_on_environment(simple_offline_train_env),
                },
       )

2022-07-29 20:56.04 [debug    ] RandomIterator is selected.
2022-07-29 20:56.04 [info     ] Directory is created at d3rlpy_logs\DQN_20220729205604
2022-07-29 20:56.04 [debug    ] Building models...
2022-07-29 20:56.04 [debug    ] Models have been built.
2022-07-29 20:56.04 [info     ] Parameters are saved to d3rlpy_logs\DQN_20220729205604\params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (3,), 'action_size': 6}


Epoch 1/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 20:57.35 [info     ] DQN_20220729205604: epoch=1 step=10000 epoch=1 metrics={'time_sample_batch': 0.0004963466882705688, 'time_algorithm_update': 0.007851599740982056, 'loss': 0.3459163155451417, 'time_step': 0.008802766156196594, 'environment': -1.5} step=10000
2022-07-29 20:57.35 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_10000.pt


Epoch 2/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 20:59.04 [info     ] DQN_20220729205604: epoch=2 step=20000 epoch=2 metrics={'time_sample_batch': 0.00048010497093200686, 'time_algorithm_update': 0.007798137092590332, 'loss': 0.26114884775057434, 'time_step': 0.008700963139533996, 'environment': -4.6} step=20000
2022-07-29 20:59.04 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_20000.pt


Epoch 3/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:00.34 [info     ] DQN_20220729205604: epoch=3 step=30000 epoch=3 metrics={'time_sample_batch': 0.00047753713130950926, 'time_algorithm_update': 0.007797300910949707, 'loss': 0.22103062183186412, 'time_step': 0.00867125174999237, 'environment': -3.0} step=30000
2022-07-29 21:00.34 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_30000.pt


Epoch 4/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:02.00 [info     ] DQN_20220729205604: epoch=4 step=40000 epoch=4 metrics={'time_sample_batch': 0.0004535848379135132, 'time_algorithm_update': 0.007609726071357727, 'loss': 0.1586648769404739, 'time_step': 0.00845857036113739, 'environment': -0.9} step=40000
2022-07-29 21:02.00 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_40000.pt


Epoch 5/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:03.28 [info     ] DQN_20220729205604: epoch=5 step=50000 epoch=5 metrics={'time_sample_batch': 0.00045339446067810057, 'time_algorithm_update': 0.0076961973667144775, 'loss': 0.11598078709822147, 'time_step': 0.008524190926551819, 'environment': 2.3} step=50000
2022-07-29 21:03.28 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_50000.pt


Epoch 6/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:04.44 [info     ] DQN_20220729205604: epoch=6 step=60000 epoch=6 metrics={'time_sample_batch': 0.00039135234355926516, 'time_algorithm_update': 0.006762499737739563, 'loss': 0.10929135550744831, 'time_step': 0.00746098051071167, 'environment': -0.8} step=60000
2022-07-29 21:04.44 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_60000.pt


Epoch 7/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:05.54 [info     ] DQN_20220729205604: epoch=7 step=70000 epoch=7 metrics={'time_sample_batch': 0.00035663702487945557, 'time_algorithm_update': 0.006184486532211304, 'loss': 0.10894690727936104, 'time_step': 0.00684428346157074, 'environment': 1.6} step=70000
2022-07-29 21:05.54 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_70000.pt


Epoch 8/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:07.06 [info     ] DQN_20220729205604: epoch=8 step=80000 epoch=8 metrics={'time_sample_batch': 0.00035919458866119386, 'time_algorithm_update': 0.006380054235458374, 'loss': 0.11639473493695258, 'time_step': 0.007037436699867249, 'environment': 1.1} step=80000
2022-07-29 21:07.06 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_80000.pt


Epoch 9/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:08.18 [info     ] DQN_20220729205604: epoch=9 step=90000 epoch=9 metrics={'time_sample_batch': 0.0003602618217468262, 'time_algorithm_update': 0.006418586802482605, 'loss': 0.11403368643736467, 'time_step': 0.0070811615705490116, 'environment': -0.7} step=90000
2022-07-29 21:08.18 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_90000.pt


Epoch 10/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:09.31 [info     ] DQN_20220729205604: epoch=10 step=100000 epoch=10 metrics={'time_sample_batch': 0.00036128361225128174, 'time_algorithm_update': 0.006477787351608276, 'loss': 0.10713017215915024, 'time_step': 0.007139286565780639, 'environment': 1.3} step=100000
2022-07-29 21:09.31 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_100000.pt


Epoch 11/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:10.46 [info     ] DQN_20220729205604: epoch=11 step=110000 epoch=11 metrics={'time_sample_batch': 0.0003677783727645874, 'time_algorithm_update': 0.006686350655555725, 'loss': 0.1054789417117834, 'time_step': 0.0073614606380462645, 'environment': 2.5} step=110000
2022-07-29 21:10.46 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_110000.pt


Epoch 12/12:   0%|          | 0/10000 [00:00<?, ?it/s]

2022-07-29 21:12.05 [info     ] DQN_20220729205604: epoch=12 step=120000 epoch=12 metrics={'time_sample_batch': 0.00038762409687042237, 'time_algorithm_update': 0.007022116637229919, 'loss': 0.10050704800421371, 'time_step': 0.007734153461456299, 'environment': 0.7} step=120000
2022-07-29 21:12.05 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20220729205604\model_120000.pt


[(1,
  {'time_sample_batch': 0.0004963466882705688,
   'time_algorithm_update': 0.007851599740982056,
   'loss': 0.3459163155451417,
   'time_step': 0.008802766156196594,
   'environment': -1.5}),
 (2,
  {'time_sample_batch': 0.00048010497093200686,
   'time_algorithm_update': 0.007798137092590332,
   'loss': 0.26114884775057434,
   'time_step': 0.008700963139533996,
   'environment': -4.6}),
 (3,
  {'time_sample_batch': 0.00047753713130950926,
   'time_algorithm_update': 0.007797300910949707,
   'loss': 0.22103062183186412,
   'time_step': 0.00867125174999237,
   'environment': -3.0}),
 (4,
  {'time_sample_batch': 0.0004535848379135132,
   'time_algorithm_update': 0.007609726071357727,
   'loss': 0.1586648769404739,
   'time_step': 0.00845857036113739,
   'environment': -0.9}),
 (5,
  {'time_sample_batch': 0.00045339446067810057,
   'time_algorithm_update': 0.0076961973667144775,
   'loss': 0.11598078709822147,
   'time_step': 0.008524190926551819,
   'environment': 2.3}),
 (6,
  {'ti

In [20]:
offline_test_df = helper.test_d3rlpy_dqn(offline_dqn, simple_offline_test_env)

0
1000
2000
Testing done.....


In [21]:
len(X_test), len(offline_test_df)

(3000, 3000)

In [22]:
y_pred_df = offline_test_df[offline_test_df['y_pred'].notna()]
success_df = y_pred_df[y_pred_df['y_pred']== y_pred_df['y_actual']]
len(success_df)

1049

In [23]:
success_rate = len(success_df)/len(offline_test_df)*100
success_rate

34.96666666666667

In [24]:
#avg length and return 
avg_length, avg_return = helper.get_avg_length_reward(offline_test_df)
avg_length, avg_return

(3.147, 0.045)

In [25]:
acc, f1, roc_auc = helper.test(y_pred_df['y_actual'], y_pred_df['y_pred'])
acc, f1, roc_auc

(0.434009102192801, 0.2124771570719668, 0.5055197159671433)