In [1]:
#dependencies for the project
import gymnasium as gym
import gym_BinPack3D
from gym_BinPack3D.envs import Box, Rotate
import os

%matplotlib inline


In [2]:
#register the environment
gym.envs.register(
    id='BinPack3D-v1',
    entry_point='gym_BinPack3D.envs:PackingGame',
)

In [3]:
#define the environment.
#container_size: size of the container in 3D
#boxSeqGenerator: how the boxes are generated.
#enabled_rotations: which rotations are allowed for the boxes
#n_foreseeable_box: how many boxes are shown to the agent
#box_set: the set of boxes that are used in the environment. 

env = gym.make('BinPack3D-v1', 
                container_size = (25, 4, 4),
                boxSeqGenerator='CUT-2', 
                enabled_rotations = [Rotate.NOOP, Rotate.XY, Rotate.XZ, Rotate.YZ],
                n_foreseeable_box = 3,
                minSideLen = 1,
                maxSideLen = 2,
            )

DATA_DIR = os.path.join(os.getcwd(), 'data')

env.action_space, env.observation_space

using CUT-2 logic box sequence


  logger.warn(
  logger.warn(


(MultiDiscrete([100   4]),
 Dict('coming_boxes': Box(0.0, 25.0, (3, 3), float32), 'height_map': Box(0.0, 4.0, (25, 4), float32), 'valid_placement_mask': MultiBinary((4, 25, 4))))

In [8]:
# import gymnasium as gym
import matplotlib.animation as animation
from matplotlib import pyplot as plt
import imageio
import numpy as np

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

env = gym.make('BinPack3D-v1', 
                container_size = (9, 11, 13),
                boxSeqGenerator='CUT-2', 
                enabled_rotations = [Rotate.NOOP],
                n_foreseeable_box = 3,
                minSideLen = 2,
                maxSideLen = 5,
            )
vec_env = DummyVecEnv([lambda: env])
model = PPO("MultiInputPolicy", vec_env, verbose=1)
model.learn(total_timesteps=10_000)

#save the model
model.save(DATA_DIR+"/ppo_model")

# del model


using CUT-2 logic box sequence
Using cpu device


  logger.warn(


-----------------------------
| time/              |      |
|    fps             | 525  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 509         |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.020203697 |
|    clip_fraction        | 0.308       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.58       |
|    explained_variance   | -3.96       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0776     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0701     |
|    value_loss           | 0.0766      |
-----------------------------------------
----------------------------------

In [24]:
#train agent using stable baselines3 A2C
from stable_baselines3 import A2C

env = gym.make('BinPack3D-v1', 
                container_size = (25, 4, 4),
                boxSeqGenerator='CUT-2', 
                enabled_rotations = [Rotate.NOOP, Rotate.XY, Rotate.XZ, Rotate.YZ],
                n_foreseeable_box = 3,
                minSideLen = 1,
                maxSideLen = 2,
            )
vec_env = DummyVecEnv([lambda: env])
model = A2C("MultiInputPolicy", vec_env, verbose=1)
model.learn(total_timesteps=10_000)

#save the model
model.save(DATA_DIR+"/a2c_model")

using CUT-2 logic box sequence
Using cpu device


  logger.warn(


------------------------------------
| time/                 |          |
|    fps                | 215      |
|    iterations         | 100      |
|    time_elapsed       | 2        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -5.47    |
|    explained_variance | 0.198    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.831    |
|    value_loss         | 0.0492   |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 220      |
|    iterations         | 200      |
|    time_elapsed       | 4        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -5.36    |
|    explained_variance | 0.581    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 0.96     |
|    value_loss         | 0.0397   |
-

In [20]:
#load the PPO model
model = PPO.load(DATA_DIR+"/ppo_model", env=vec_env)

# vec_env = model.get_env()
# obs = vec_env.reset()


frames = []
obs = model.env.reset()
# frame = vec_env.render("rgb_array")
frame = model.env.render(mode="rgb_array")

for i in range(1000):
    frames.append(frame)
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
    if done:
        break
    frame = model.env.render(mode="rgb_array")
    if frame is None:
        print("Frame is None!!")
        break
    # VecEnv resets automatically
    # if done:
    #   obs = vec_env.reset()
imageio.mimsave(DATA_DIR+"/ppo.gif", frames)

    

In [25]:
#load the A2C model
model = A2C.load(DATA_DIR+"/a2c_model", env=vec_env)

frames = []
obs = model.env.reset()
frame = model.env.render(mode="rgb_array")

for i in range(1000):
    frames.append(frame)
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
    frame = model.env.render(mode="rgb_array")
    if frame is None:
        print("Frame is None!!")
        break

imageio.mimsave(DATA_DIR+"/a2c.gif", frames)

In [22]:
#load the environment with a baseline agent
env = gym.make('BinPack3D-v1', 
             container_size = (9, 11, 13),
                boxSeqGenerator='CUT-2', 
                enabled_rotations = [Rotate.NOOP],
                n_foreseeable_box = 3,
                minSideLen = 2,
                maxSideLen = 5,
            )
frames = []
obs = env.reset()
#set environment to render rgb_array
frame = env.render()

while True:
    frames.append(frame)
    # we cheat the game by look at cut process info and get the 
    # correct pos to place box, achieving perfect packing
    box = env.boxSeqGenerator.next_N_boxes()[0]
    pos = (box.x,box.y)
    pos = env.position_to_actionIdx(pos)
    rot = Rotate.NOOP
    action = (pos, rot)

    obs, reward, done,_, info = env.step(action)
    frame = env.render()
    print(reward,done,info)
    
    if done: break
        
env.render()
print(obs)
imageio.mimsave(DATA_DIR+"/baseline.gif", frames)

using CUT-2 logic box sequence
0.9712509712509712 False {'counter': 1, 'ratio': 0.09712509712509712}
0.3108003108003108 False {'counter': 2, 'ratio': 0.1282051282051282}
0.3885003885003885 False {'counter': 3, 'ratio': 0.16705516705516704}
0.23310023310023312 False {'counter': 4, 'ratio': 0.19036519036519037}
0.3885003885003885 False {'counter': 5, 'ratio': 0.22921522921522922}
0.6216006216006216 False {'counter': 6, 'ratio': 0.2913752913752914}
0.46620046620046623 False {'counter': 7, 'ratio': 0.337995337995338}
0.3108003108003108 False {'counter': 8, 'ratio': 0.3690753690753691}
0.46620046620046623 False {'counter': 9, 'ratio': 0.4156954156954157}
0.3885003885003885 False {'counter': 10, 'ratio': 0.45454545454545453}
0.3108003108003108 False {'counter': 11, 'ratio': 0.48562548562548563}
0.1554001554001554 False {'counter': 12, 'ratio': 0.5011655011655012}
0.23310023310023312 False {'counter': 13, 'ratio': 0.5244755244755245}
0.46620046620046623 False {'counter': 14, 'ratio': 0.571095