<a href="https://colab.research.google.com/github/rosshalpin/clip-guided-scene-arrangement/blob/main/DISCRETE_MULTI_AGENT_DissertationProject_v0_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
try:
  import clip
  import stable_baselines3
  import sb3_contrib
  import pettingzoo
  from plot_image_grid import image_grid
  import supersuit as ss
  import optuna
except (ModuleNotFoundError, ImportError):
  !pip install git+https://github.com/openai/CLIP.git
  !pip install stable-baselines3[extra]
  !pip install git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib
  !pip install pettingzoo
  !wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/main/docs/tutorials/utils/plot_image_grid.py
  !pip install supersuit
  !pip install optuna

In [2]:
import os, sys
from google.colab import drive
drive.mount('/content/drive')
nb_path = '/content/notebooks'

!ln -s /content/drive/My\ Drive/Colab\ Notebooks/ $nb_path

sys.path.insert(0,nb_path)

!ln -s /content/gdrive/My\ Drive/ /mydrive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
ln: failed to create symbolic link '/content/notebooks/Colab Notebooks': File exists
ln: failed to create symbolic link '/mydrive': File exists


In [3]:
import pytorch3d
import os
import torch
import matplotlib.pyplot as plt

# Util function for loading meshes
from pytorch3d.io import load_objs_as_meshes, load_obj
from pytorch3d.ops import sample_points_from_meshes

# Data structures and functions for rendering
from pytorch3d.structures import Meshes, join_meshes_as_batch, join_meshes_as_scene, Pointclouds
from pytorch3d.vis.plotly_vis import AxisArgs, plot_batch_individually, plot_scene
from pytorch3d.vis.texture_vis import texturesuv_image_matplotlib
from pytorch3d.renderer import (
    look_at_view_transform,
    FoVPerspectiveCameras, 
    PointLights,
    AmbientLights,
    DirectionalLights, 
    Materials, 
    RasterizationSettings, 
    MeshRenderer, 
    MeshRasterizer,  
    SoftPhongShader,
    TexturesUV,
    TexturesVertex
)

# add path for demo utils functions 
import sys
import os
sys.path.append(os.path.abspath(''))
import IPython.display
import matplotlib.pyplot as plt
from PIL import Image
import torch
import numpy as np

In [4]:
# Setup
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.cuda.set_device(device)
else:
    device = torch.device("cpu")

# Set paths
DATA_DIR = '/content/drive/My Drive/DissertationProject_v0.0/data'

def load_mesh(input_path) -> Meshes:
  obj_filename = os.path.join(DATA_DIR, input_path)
  return load_objs_as_meshes([obj_filename], device=device)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
model.cuda().eval()
input_resolution = model.visual.input_resolution
context_length = model.context_length
vocab_size = model.vocab_size

In [6]:
import copy

from pytorch3d.renderer import (
  HardPhongShader
)

class SceneObject():
    def __init__(self, mesh, scale=1):
      new_mesh = mesh.clone().scale_verts(scale)
      self._mesh = new_mesh
      self._scale = scale
      self._position = self._mesh_position()
      self._prev_position = self._mesh_position()

    @property
    def mesh(self):
      return self._mesh

    @property
    def position(self):
      return self._position

    @position.setter
    def position(self, value):
      # print(value, self._position)
      offset = [round(a-b,3) for a, b in zip(value, self._position)]
      self._set_position_helper(offset)

    def _mesh_position(self):
      return [round(float(((c.cpu()[0]+c.cpu()[1])/2)), 3) for c in self._mesh.get_bounding_boxes()[0]]

    def _set_position_helper(self, value):
      self._prev_position = copy.deepcopy(self._position)
      offset = self._mesh.verts_padded().new_tensor(value).expand(self._mesh.verts_packed().shape)
      self._mesh = self._mesh.offset_verts(offset)
      self._position = self._mesh_position()

    def translate(self, value):
      self._set_position_helper(value)

    def reset_pos(self):
      self._position = copy.deepcopy(self._prev_position)

class Scene():
  def __init__(self, meshes: list, azim, elev, dist):
    self.AZIM = azim
    self.ELEV = elev
    self.num_cameras = max(len(self.AZIM), len(self.ELEV))
    self._meshes = meshes
    self.CAMERA_DIST = dist
    self._scene = join_meshes_as_scene(meshes).extend(self.num_cameras)
    self.device = device
    

  @property
  def scene(self):
    return self._scene

  @scene.setter
  def scene(self, value):
    self._scene = join_meshes_as_scene(value).extend(self.num_cameras)

  @property
  def _lights(self):
    return PointLights(device=device, location=[[0.0, 5.0, 7.0]])
    # return AmbientLights(device=self.device)

  @property
  def _cameras(self):
    R, T = look_at_view_transform(dist=self.CAMERA_DIST, azim=self.AZIM, elev=self.ELEV)
    return FoVPerspectiveCameras(device=self.device, R=R, T=T)

  @property
  def renderer(self):
    return MeshRenderer(
        rasterizer=MeshRasterizer(
            raster_settings=RasterizationSettings(
              image_size=256, 
              faces_per_pixel=1,
              bin_size=None
            )
        ),
        shader=HardPhongShader(
            device=self.device
        )
    )
  
  def render(self):
    return self.renderer(self.scene, cameras=self._cameras, lights=self._lights).cpu().numpy()


In [7]:
from PIL import Image

def get_pil_image(input):
  return Image.fromarray((input * 255).astype('uint8'))

def clip_sim_3(input: list, description: str):
  text = clip.tokenize(description).to(device)
  with torch.no_grad():
    text_features = model.encode_text(text)
  text_features /= text_features.norm(dim=-1, keepdim=True)
  similarities = []
  for image_input in input:
    # image_input = get_pil_image(image_input[0, ..., :3])
    image_input = preprocess(image_input).unsqueeze(0).to(device)

    with torch.no_grad():
      image_features = model.encode_image(image_input).float()

    image_features /= image_features.norm(dim=-1, keepdim=True)
    similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T
    # print(similarity[0][0])
    similarities.append(similarity[0][0])
  return similarities

In [8]:
from itertools import product

DIRECTIONS = list(product(range(-1, 2), repeat=3))
mod = 0.2
ALL_DIRECTIONS = [[a * mod for a in b] for b in DIRECTIONS]
ACTIONS_MAP = {
  i: ALL_DIRECTIONS[i] for i in range(len(ALL_DIRECTIONS))
}

In [9]:
import copy

from gym.spaces import Box, Discrete
import numpy as np
import functools
from pettingzoo import AECEnv
from pettingzoo.utils import agent_selector
from pettingzoo.utils import wrappers
from scipy.stats import mannwhitneyu

class RenderEnv(AECEnv):
  """
  The metadata holds environment constants. From gym, we inherit the "render_modes",
  metadata which specifies which modes can be put into the render() method.
  At least human mode should be supported.
  The "name" metadata allows the environment to be pretty printed.
  """

  metadata = {"render_modes": ["human"], "name": "rps_v2"}

  def __init__(self, objs, guide, limit=None):
    """
    The init method takes in environment arguments and
      should define the following attributes:
    - possible_agents
    - action_spaces
    - observation_spaces

    These attributes should not be changed after initialization.
    """
    self.GUIDE_STRING = guide

    self.limit = limit
    self.rounds = 0

    self.camera_config = {
      'azim': torch.linspace(0, 180, 4),
      'elev': [50],
      'dist': 20.0
    }

    self.actions_map = ACTIONS_MAP

    self.limit_box = [[-10,-1,-10],[10,10,10]]
    self.p_threshold = 0.1
    

    self.best = {}
    self.images = None

    
    self.possible_agents = ["object_" + str(r) for r in range(len(objs))]
    self.agent_mapping = dict(
        zip(self.possible_agents, [copy.deepcopy(obj) for obj in objs])
    )

    self.limited = np.ones((len(self.possible_agents))).astype(np.float32)

    self.scene = Scene(
      meshes=[a.mesh for a in list(self.agent_mapping.values())], 
      **self.camera_config
    )

    self.num_cameras = self.scene.num_cameras
    self.best_sim_matrix = np.zeros(self.num_cameras).astype(np.float32)
    self.prev_sim_matrix = np.zeros(self.num_cameras).astype(np.float32)

    # Gym spaces are defined and documented here: https://gym.openai.com/docs/#spaces
    self._action_spaces = {agent: Discrete(len(self.actions_map)) for agent in self.possible_agents}
    self._observation_spaces = {
        agent: Box(low=-1, high=1, shape=(3,)) for agent in self.possible_agents
    }

  # this cache ensures that same space object is returned for the same agent
  # allows action space seeding to work as expected
  @functools.lru_cache(maxsize=None)
  def observation_space(self, agent):
    # Gym spaces are defined and documented here: https://gym.openai.com/docs/#spaces
    return Discrete(len(self.actions_map))

  @functools.lru_cache(maxsize=None)
  def action_space(self, agent):
    return Discrete(len(self.actions_map))


  def render_scene(self) -> None:
      self.scene = Scene([a.mesh for a in list(self.agent_mapping.values())], **self.camera_config)
      self.images = self.scene.render()

  def clip_scores(self):
    self.render_scene()
    pil_images = [get_pil_image(img[..., :3]) for img in self.images]
    return clip_sim_3(pil_images, self.GUIDE_STRING)


  def limit_action(self, action, i):
    limited = False
    translation_result = [a+b for a,b in zip (list(self.agent_mapping.values())[i].position, action)]
    for i, val in enumerate(translation_result):
        if val < self.limit_box[0][i]:
            limited = True
        elif val > self.limit_box[1][i]:
            limited = True
    return limited

  def perform_test(self, a, b):
    stat, p = mannwhitneyu(a, b, alternative='greater',method='exact')
    return stat, p

  def get_reward(self, sim_matrix) -> int:
    rw = 0

    stat_best, p_best = self.perform_test(sim_matrix, self.best_sim_matrix)
    stat_prev, p_prev = self.perform_test(sim_matrix, self.prev_sim_matrix)

    if p_best <= self.p_threshold:
      self.best_sim_matrix = sim_matrix
      self.best["images"] = self.images
      self.best["scene"] = self.scene.scene

    rw += 1-p_best
    rw += 1-p_prev

    rw = (2 *(rw - -2)/(2- -2)) - 1

    self.prev_sim_matrix = sim_matrix

    return rw


  def take_action(self, i, action):
    action = self.actions_map[action]
    got_limited = self.limit_action(action[:], i)
    if got_limited:
      self.limited[i] = 0.0
    else:
      self.agent_mapping[self.agents[i]].translate(action)
    return self.agent_mapping[self.agents[i]].position

  def render(self, mode="human"):
    """
    Renders the environment. In human mode, it can print to terminal, open
    up a graphical window, or open up some other display that a human can see and understand.
    """
    # if len(self.agents) == 2:
    #     string = "Current state: Agent1: {} , Agent2: {}".format(
    #         MOVES[self.state[self.agents[0]]], MOVES[self.state[self.agents[1]]]
    #     )
    # else:
    #     string = "Game over"
    # print(string)
    pass

  def observe(self, agent):
    """
    Observe should return the observation of the specified agent. This function
    should return a sane observation (though not necessarily the most up to date possible)
    at any time after reset() is called.
    """
    # observation of one agent is the previous state of the other
    return np.array(self.observations[agent])

  def close(self):
    """
    Close should release any graphical displays, subprocesses, network connections
    or any other environment data which should not be kept around after the
    user is no longer using the environment.
    """
    pass

  def reset(self, seed=None):
    """
    Reset needs to initialize the following attributes
    - agents
    - rewards
    - _cumulative_rewards
    - dones
    - infos
    - agent_selection
    And must set up the environment so that render(), step(), and observe()
    can be called without issues.

    Here it sets up the state dictionary which is used by step() and the observations dictionary which is used by step() and observe()
    """

    self.agents = self.possible_agents[:]
    self.rewards = {agent: 0 for agent in self.agents}
    self._cumulative_rewards = {agent: 0 for agent in self.agents}
    self.dones = {agent: False for agent in self.agents}
    self.infos = {agent: {} for agent in self.agents}
    self.state =  {agent: np.zeros(3).astype(np.float32) for agent in self.agents}
    self.observations =  {agent: np.zeros(3).astype(np.float32) for agent in self.agents}
    self.rounds = 0
    self.limited = np.ones((len(self.agents))).astype(np.float32)
    self.best_sim_matrix = np.zeros(self.num_cameras).astype(np.float32)
    self.prev_sim_matrix = np.zeros(self.num_cameras).astype(np.float32)
    """
    Our agent_selector utility allows easy cyclic stepping through the agents list.
    """
    self._agent_selector = agent_selector(self.agents)
    self.agent_selection = self._agent_selector.next()

  def step(self, action):
    """
    step(action) takes in an action for the current agent (specified by
    agent_selection) and needs to update
    - rewards
    - _cumulative_rewards (accumulating the rewards)
    - dones
    - infos
    - agent_selection (to the next agent)
    And any internal state used by observe() or render()
    """

    if self.dones[self.agent_selection]:
        # handles stepping an agent which is already done
        # accepts a None action for the one agent, and moves the agent_selection to
        # the next done agent,  or if there are no more done agents, to the next live agent
        return self._was_done_step(action)
    
    agent = self.agent_selection

    # the agent which stepped last had its _cumulative_rewards accounted for
    # (because it was returned by last()), so the _cumulative_rewards for this
    # agent should start again at 0
    self._cumulative_rewards[agent] = 0

    # stores action of current agent
    self.state[agent] = self.take_action(self.possible_agents.index(agent), action)

    # collect reward if it is the last agent to act
    if self._agent_selector.is_last():
      # rewards for all agents are placed in the .rewards dictionary
      self.rounds +=1 
      self.dones = {agent: self.rounds >= self.limit for agent in self.agents}

      # observe the current state
      for i in self.agents:
        self.observations[i] = {
          self.agents[i]: self.agent_mapping[self.agents[i]].position for i in range(len(self.agents))
      }

      sim_matrix = np.asarray(self.clip_scores()).astype(np.float32)

      overall_reward = self.get_reward(sim_matrix)

      self.rewards = { self.agents[i]: overall_reward * self.limited[i] for i in range(len(self.agents)) }

      self.limited = np.ones((len(self.agents))).astype(np.float32)
    else:
      # necessary so that observe() returns a reasonable observation at all times.
      self.state[self.agents[1 - self.possible_agents.index(agent)]] = None
      # no rewards are allocated until both players give an action
      self._clear_rewards()

    # selects the next agent.
    self.agent_selection = self._agent_selector.next()
    # Adds .rewards to ._cumulative_rewards
    self._accumulate_rewards()

In [10]:
fruit_mesh = load_mesh(f"{DATA_DIR}/fruit_mesh/pear_export.obj")
table_mesh = load_mesh(f"{DATA_DIR}/table_mesh/GenericClassicTable001.obj")

In [11]:
fruit_object = SceneObject(fruit_mesh, scale=0.25)
fruit_objectB = SceneObject(fruit_mesh, scale=0.25)
table_object = SceneObject(table_mesh, scale=8)

table_object.position=[0,0,0]
fruit_objectB.position=[0,0,0]
fruit_object.position=[0,0,0]

In [12]:
env = RenderEnv([fruit_object,fruit_objectB, table_object], "Pieces of fruit on top of a wooden table", limit=50)

In [13]:
env.reset()
count = 0
for agent in env.agent_iter():
    count +=1
    actual_count = int(count / 3)
    observation, reward, done, info = env.last()
    action = Discrete(len(env.actions_map)).sample()
    if done:
      action = None
    env.step(action)
    if done:
        break

In [None]:
image_grid(env.best["images"], rows=1, cols=4, rgb=True)

In [None]:
plot_batch_individually(env.scene.scene[0])

In [13]:
from torch import cuda
env.reset()
cuda.empty_cache()

In [14]:
num_envs = 1
v_env = ss.gym_vec_env_v0(env, num_envs, multiprocessing=False)

  f"{fn_name} took in an environment which does not inherit from gym.Env. Note that gym_vec_env only takes in gym-style environments, not pettingzoo environments."


ValueError: ignored

In [19]:
from stable_baselines3 import PPO

n_envs = 1
n_steps = 64
total_timesteps = (n_steps * n_envs) * 1
train_model = PPO('MlpPolicy', v_env, verbose=1, n_steps=n_steps).learn(n_eval_episodes=64, total_timesteps=total_timesteps)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."


AttributeError: ignored