In [1]:
from model import DQN, LSTM_DQN
import torch
import time
from torchinfo import summary
from torch.distributions.categorical import Categorical

In [2]:
state = torch.randint(1,10,(10,4,8,3), dtype=torch.float32)
state = state.view(state.size(0), 4, -1)
print(state.shape)

torch.Size([10, 4, 24])


In [10]:
# standard dqn
test_network = DQN(4, 8, [4,4,4,4])
#print(summary(test_network, input_data=state))
start_time = time.time()

q_values = test_network(state)
dist = Categorical(state)
print(q_values[0])
print(dist.sample()[0])
print()

total_time = time.time() - start_time
print(f"{total_time} to process 600 states, average of {total_time/600:.6f} seconds per state")

tensor([[-0.3839,  0.1889,  0.2450,  0.3958],
        [ 0.3333,  0.1157,  0.3868, -0.2146],
        [-0.1389,  0.1014,  0.0629, -0.2644],
        [ 0.6350, -0.2690,  0.3087,  0.2739]], grad_fn=<SelectBackward0>)
tensor([21,  3, 23, 17])

0.007051944732666016 to process 600 states, average of 0.000012 seconds per state


In [22]:
# noisy dqn
test_network = DQN(4, 8, [4,4,4,4], noisy=True)
print(summary(test_network, input_data=state))
start_time = time.time()

q_values = test_network(state)
print(q_values.shape)

total_time = time.time() - start_time
print(f"{total_time} to process 600 states, average of {total_time/600:.6f} seconds per state")

Layer (type:depth-idx)                   Output Shape              Param #
DQN                                      [10, 4, 4]                258
├─Sequential: 1-1                        [10, 64, 24]              --
│    └─Conv1d: 2-1                       [10, 64, 24]              832
│    └─ReLU: 2-2                         [10, 64, 24]              --
│    └─Conv1d: 2-3                       [10, 64, 24]              12,352
│    └─ReLU: 2-4                         [10, 64, 24]              --
├─Sequential: 1-2                        [10, 128]                 --
│    └─Noisy_Layer: 2-5                  [10, 128]                 393,472
│    └─ReLU: 2-6                         [10, 128]                 --
├─ModuleList: 1-3                        --                        --
│    └─Noisy_Layer: 2-7                  [10, 4]                   1,032
│    └─Noisy_Layer: 2-8                  [10, 4]                   1,032
│    └─Noisy_Layer: 2-9                  [10, 4]                   1

In [23]:
# dueling noisy dqn
test_network = DQN(4, 8, [4,4,4,4], noisy=True, dueling=True)
print(summary(test_network, input_data=state))
start_time = time.time()

q_values = test_network(state)
print(q_values.shape)

total_time = time.time() - start_time
print(f"{total_time} to process 600 states, average of {total_time/600:.6f} seconds per state")

Layer (type:depth-idx)                   Output Shape              Param #
DQN                                      [10, 4, 4]                --
├─Sequential: 1-1                        [10, 64, 24]              --
│    └─Conv1d: 2-1                       [10, 64, 24]              832
│    └─ReLU: 2-2                         [10, 64, 24]              --
│    └─Conv1d: 2-3                       [10, 64, 24]              12,352
│    └─ReLU: 2-4                         [10, 64, 24]              --
├─Sequential: 1-2                        [10, 128]                 --
│    └─Noisy_Layer: 2-5                  [10, 128]                 393,472
│    └─ReLU: 2-6                         [10, 128]                 --
├─Noisy_Layer: 1-3                       [10, 1]                   258
├─ModuleList: 1-4                        --                        --
│    └─Noisy_Layer: 2-7                  [10, 4]                   1,032
│    └─Noisy_Layer: 2-8                  [10, 4]                   1,03

In [24]:
state = torch.randint(1,10,(1,15,8,3), dtype=torch.float32)
state = state.view(state.size(0), 15, -1)
print(state.shape)

torch.Size([1, 15, 24])


In [25]:
# standard lstm dqn
test_network = LSTM_DQN(24, [4,4,4,4])
print(summary(test_network, input_data=state))
start_time = time.time()

hidden = None
q_values, hidden = test_network(state, hidden)
print(q_values.shape)
print(hidden[0].shape)
print(hidden[1].shape)

total_time = time.time() - start_time
print(f"{total_time} to process 600 states, average of {total_time/600:.6f} seconds per state")

Layer (type:depth-idx)                   Output Shape              Param #
LSTM_DQN                                 [1, 4, 4]                 129
├─LSTM: 1-1                              [1, 15, 128]              78,848
├─Sequential: 1-2                        [1, 128]                  --
│    └─Linear: 2-1                       [1, 128]                  16,512
│    └─ReLU: 2-2                         [1, 128]                  --
├─ModuleList: 1-3                        --                        --
│    └─Linear: 2-3                       [1, 4]                    516
│    └─Linear: 2-4                       [1, 4]                    516
│    └─Linear: 2-5                       [1, 4]                    516
│    └─Linear: 2-6                       [1, 4]                    516
Total params: 97,553
Trainable params: 97,553
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 1.20
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 0.39
Estimated Total Siz



In [26]:
# noisy lstm dqn
test_network = LSTM_DQN(24, [4,4,4,4], noisy=True)
print(summary(test_network, input_data=state))
start_time = time.time()

hidden = None
q_values, hidden = test_network(state, hidden)
print(q_values.shape)
print(hidden[0].shape)
print(hidden[1].shape)

total_time = time.time() - start_time
print(f"{total_time} to process 600 states, average of {total_time/600:.6f} seconds per state")

Layer (type:depth-idx)                   Output Shape              Param #
LSTM_DQN                                 [1, 4, 4]                 258
├─LSTM: 1-1                              [1, 15, 128]              78,848
├─Sequential: 1-2                        [1, 128]                  --
│    └─Linear: 2-1                       [1, 128]                  16,512
│    └─ReLU: 2-2                         [1, 128]                  --
├─ModuleList: 1-3                        --                        --
│    └─Noisy_Layer: 2-3                  [1, 4]                    1,032
│    └─Noisy_Layer: 2-4                  [1, 4]                    1,032
│    └─Noisy_Layer: 2-5                  [1, 4]                    1,032
│    └─Noisy_Layer: 2-6                  [1, 4]                    1,032
Total params: 99,746
Trainable params: 99,746
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 1.22
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 0.40
Estimated T

In [27]:
# dueling noisy lstm dqn
test_network = LSTM_DQN(24, [4,4,4,4], noisy=True, dueling=True)
print(summary(test_network, input_data=state))
start_time = time.time()

hidden = None
q_values, hidden = test_network(state, hidden)
print(q_values.shape)
print(hidden[0].shape)
print(hidden[1].shape)

total_time = time.time() - start_time
print(f"{total_time} to process 600 states, average of {total_time/600:.6f} seconds per state")

Layer (type:depth-idx)                   Output Shape              Param #
LSTM_DQN                                 [1, 4, 4]                 --
├─LSTM: 1-1                              [1, 15, 128]              78,848
├─Sequential: 1-2                        [1, 128]                  --
│    └─Linear: 2-1                       [1, 128]                  16,512
│    └─ReLU: 2-2                         [1, 128]                  --
├─Noisy_Layer: 1-3                       [1, 1]                    258
├─ModuleList: 1-4                        --                        --
│    └─Noisy_Layer: 2-3                  [1, 4]                    1,032
│    └─Noisy_Layer: 2-4                  [1, 4]                    1,032
│    └─Noisy_Layer: 2-5                  [1, 4]                    1,032
│    └─Noisy_Layer: 2-6                  [1, 4]                    1,032
Total params: 99,746
Trainable params: 99,746
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 1.22
Input size (MB): 0.00
Fo