In [1]:
import torch

from src.warehouse.environment.warehouse_v3 import WarehouseV3
from src.policy import ExponentialEpsilonGreedyPolicy
import numpy as np
import time

from src.model import BigDQN, DQN, BiggerDQN
from src.agent import QAgent

import gym
import torch.nn.functional as F
import torch.optim as optimizer

In [2]:
hyperparameters = {
    # training
    'batch_size': 128,
    'learning_rate': 0.01,
    'scheduler_milestones': [20000, 40000],
    'scheduler_decay': 0.1,
    'optimizer': optimizer.Adam,
    'loss': F.smooth_l1_loss,
    # reinforcement & environment
    'eps_policy': ExponentialEpsilonGreedyPolicy(eps_max=1.0, eps_min=0.02, decay=2000),
    'gamma': 0.9,
    'target_update': 10,
    'num_episodes': 50001,
    'memory_capacity': 50000,
    'warmup_episodes': 100,
    'save_freq': 5000,
    'max_steps_per_episode': 125,
    'num_aisles': 4,  # 2,  # 4
    'rack_height': 8,  # 5,  # 8
    'num_packets': 5,  # 3,  # 5
    # pytorch
    'np_seed': 4242,
    'device': 'cuda',
    'save_model': True,
    'dtype': torch.float32,
    'ylim': (-150, 150),
    'plot_progress': False,
    'tag': 'warehouse_v3_big_dqn_bigger'
}

In [3]:
warehouse = WarehouseV3(num_aisles=hyperparameters['num_aisles'], rack_height=hyperparameters['rack_height'],
                        num_packets=hyperparameters['num_packets'], seed=hyperparameters['np_seed'])

In [None]:
agent = QAgent(env=warehouse, model=BiggerDQN, config=hyperparameters)
agent.compile()
agent.fit()

  0%|          | 0/50001 [00:00<?, ?it/s]

In [None]:
agent.plot_durations()