In [1]:
from pathlib import Path
import torch
import torch.nn as nn
from torchvision import transforms
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from pyquaternion import Quaternion
import cv2
import pandas as pd

import time
from collections import deque, defaultdict

import habitat
from habitat.config import Config
from habitat.config.default import get_config
from habitat.core.env import Env
from habitat.sims.habitat_simulator.actions import HabitatSimActions

# from habitat_baselines.rl.ddppo.policy.resnet import ResNet, resnet50, se_resneXt50
# from habitat_baselines.rl.ppo import PointNavBaselinePolicy
from habitat_baselines.agents.ppo_agents import PPOAgent
from gym.spaces import Box, Dict, Discrete

root = Path('../frames')
transform_ = transforms.ToPILImage()

In [2]:
models = {
#     'rgb':   '/Users/nimit/Documents/robomaster/habitat/models/v2/gibson-2plus-mp3d-train-val-test-se-resneXt50-rgb.pth',
#     'depth': '/Users/nimit/Documents/robomaster/habitat/models/v2/gibson-2plus-resnet50.pth',
#     'depth': '/Users/nimit/Documents/robomaster/habitat/models/v2/gibson-4plus-resnet50.pth',
    'rgb':   '/Users/nimit/Documents/robomaster/habitat/models/v2/rgb.pth',
    'depth': '/Users/nimit/Documents/robomaster/habitat/models/v2/depth.pth'
}

configs = {
    'rgb':   '/Users/nimit/Documents/robomaster/habitat/habitat2robomaster/rgb_test.yaml',
    'depth': '/Users/nimit/Documents/robomaster/habitat/habitat2robomaster/configs/pointgoal-depth-train.yaml',
#     'depth': '/Users/nimit/Documents/robomaster/habitat/habitat-api/habitat_baselines/config/pointnav/ddppo_pointnav.yaml'
}

In [3]:
c = Config()

c.RESOLUTION       = 256
c.HIDDEN_SIZE      = 512
c.RANDOM_SEED      = 7

c.INPUT_TYPE       = 'depth'
c.MODEL_PATH       = models[c.INPUT_TYPE]
c.GOAL_SENSOR_UUID = 'pointgoal_with_gps_compass'

c.freeze()

env = Env(config=get_config(configs[c.INPUT_TYPE]))
agent = PPOAgent(c)

2020-04-16 19:25:21,638 Initializing dataset PointNav-v1
2020-04-16 19:25:21,854 initializing sim Sim-v0
I0416 19:25:23.920087 98255 simulator.py:142] Loaded navmesh /Users/nimit/Documents/robomaster/habitat/habitat-api/data/scene_datasets/habitat-test-scenes/skokloster-castle.navmesh
2020-04-16 19:25:23,924 Initializing task Nav-v0


In [4]:
agent.reset()
observations = env.reset()

i = 1
while not env.episode_over:
    action = agent.act(observations)
    if i == 5:
        break

    cv2.imshow('rgb', observations['rgb'])
    cv2.imshow('rgb', observations['depth'])
    cv2.waitKey(1)

    observations = env.step(action)

    i += 1

metrics = env.get_metrics()
print(metrics)

cv2.destroyWindow('rgb')

{'distance_to_goal': 9.49310302734375, 'success': 0.0, 'spl': 0.0, 'collisions': {'count': 0, 'is_collision': False}}


## DDPPO

In [33]:
from pathlib import Path
import numpy as np
import torch

from gym.spaces import Box, Dict, Discrete

from habitat.config.default import get_config
from habitat import get_config as get_task_config

from habitat_baselines.rl.ddppo.policy.resnet_policy import PointNavResNetPolicy
from habitat_baselines.common.utils import batch_obs

c = Config()

c.RESOLUTION       = 256
c.HIDDEN_SIZE      = 512
c.RANDOM_SEED      = 7

c.INPUT_TYPE       = 'depth'
c.MODEL_PATH       = models[c.INPUT_TYPE]
c.GOAL_SENSOR_UUID = 'pointgoal_with_gps_compass'

c.freeze()

spaces = {
    c.GOAL_SENSOR_UUID: Box( 
        low=np.finfo(np.float32).min,
        high=np.finfo(np.float32).max,
        shape=(2,),
        dtype=np.float32,
    )
}
spaces["depth"] = Box(low=0, high=1, shape=(c.RESOLUTION, c.RESOLUTION, 1), dtype=np.float32)

observation_spaces = Dict(spaces)
action_spaces = Discrete(4)

actor_critic = PointNavResNetPolicy(
    observation_space=observation_spaces,
    action_space=action_spaces,
    hidden_size=c.HIDDEN_SIZE,
    rnn_type='LSTM',
    num_recurrent_layers=2,
    backbone='resnet50',
    goal_sensor_uuid=c.GOAL_SENSOR_UUID,
    normalize_visual_inputs=False
)

batch = {
     # move to GPU
    'depth': torch.stack([torch.from_numpy(np.float64(observations['depth']))], dim=0).to(dtype=torch.float),
    'pointgoal_with_gps_compass': torch.stack([torch.from_numpy(np.float64(observations['pointgoal_with_gps_compass']))], dim=0).to(dtype=torch.float)
}
test_recurrent_hidden_states = torch.zeros(actor_critic.net.num_recurrent_layers, 1, c.HIDDEN_SIZE) # move to GPU
not_done_masks = torch.zeros(1, 1) # move to GPU
prev_actions = torch.zeros(1, 1, dtype=torch.long) # move to GPU

gt_action = torch.LongTensor([3])

optim = torch.optim.Adam(actor_critic.parameters())
criterion = torch.nn.CrossEntropyLoss()

for _ in range(100):
    optim.zero_grad()

    # NOTE: from habitat_baselines/agents/ppo_agents.py#137
    # must replace:
    # * self.test_recurrent_hidden_states
    # * self.prev_actions
    # * self.not_done_masks
    _, actions, action_log_probs, test_recurrent_hidden_states = actor_critic.act(
        batch,
        test_recurrent_hidden_states.detach(),
        prev_actions,
        not_done_masks,
        deterministic=False)
    #  Make masks not done till reset (end of episode) will be called
    not_done_masks = torch.ones(1, 1) # move to GPU
    prev_actions.copy_(actions)

    loss = criterion(actor_critic.prev_distribution.logits, gt_action)

    loss.backward()
    optim.step()

tensor(1.3863, grad_fn=<NllLossBackward>)
tensor(1.3687, grad_fn=<NllLossBackward>)
tensor(1.2881, grad_fn=<NllLossBackward>)
tensor(1.0431, grad_fn=<NllLossBackward>)
tensor(0.6031, grad_fn=<NllLossBackward>)
tensor(0.2244, grad_fn=<NllLossBackward>)
tensor(0.0688, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.0092, grad_fn=<NllLossBackward>)
tensor(0.0042, grad_fn=<NllLossBackward>)
tensor(0.0021, grad_fn=<NllLossBackward>)
tensor(0.0012, grad_fn=<NllLossBackward>)
tensor(0.0007, grad_fn=<NllLossBackward>)
tensor(0.0004, grad_fn=<NllLossBackward>)
tensor(0.0003, grad_fn=<NllLossBackward>)
tensor(0.0002, grad_fn=<NllLossBackward>)
tensor(0.0001, grad_fn=<NllLossBackward>)
tensor(9.5601e-05, grad_fn=<NllLossBackward>)
tensor(7.1642e-05, grad_fn=<NllLossBackward>)
tensor(5.5073e-05, grad_fn=<NllLossBackward>)
tensor(4.3391e-05, grad_fn=<NllLossBackward>)
tensor(3.4928e-05, grad_fn=<NllLossBackward>)
tensor(2.8610e-05, grad_fn=<NllLossBackward>)
tensor(2.3

KeyboardInterrupt: 