### Working 

In [None]:
import gym
import gym_sokoban
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback

# Custom reward function can be added in the environment wrapper
class CustomSokobanEnv(gym.Wrapper):
    def __init__(self, env):
        super(CustomSokobanEnv, self).__init__(env)
        self.env = env

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        # Modify the reward here
        reward -= 0.01  # Penalize each step to encourage faster solutions
        if done and 'all_boxes_on_target' in info and info['all_boxes_on_target']:
            reward += 10.0  # Large reward for solving the puzzle
        return obs, reward, done, info

env = gym.make('Sokoban-small-v1')
env = CustomSokobanEnv(env)

# Define the PPO model
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=3e-4,  # Adjusted learning rate
    n_steps=2048,  # Number of steps to run for each environment per update
    batch_size=64,  # Minibatch size
    n_epochs=10,  # Number of epochs to update the policy
    gamma=0.99,  # Discount factor
    gae_lambda=0.95,  # GAE lambda
    clip_range=0.2,  # Clipping parameter
    ent_coef=0.01,  # Entropy coefficient
)

# Callback for evaluation
eval_callback = EvalCallback(
    env,
    best_model_save_path='./logs/',
    log_path='./logs/',
    eval_freq=10000,
    deterministic=True,
    render=False
)

# # Train the model
# model.learn(total_timesteps=100000, callback=eval_callback)

# # Save the model
# model.save("ppo_sokoban_optimized")

# # Load the trained model
loaded_model = PPO.load("ppo_sokoban_optimized")

# Evaluate the trained model
obs = env.reset()
while True:
    action, _states = loaded_model.predict(obs, deterministic=False)
    action = int(action)
    obs, reward, done, info = env.step(action)
    image = env.render(mode='rgb_array')

    print(f"Action: {action}, Reward: {reward}, Done: {done}, Info: {info}")

    if done:
        print("Episode finished.")
        break
