In [1]:
import random, numpy as np, matplotlib as mpl, matplotlib.pyplot as plt, seaborn as sns, multiprocessing as mp, time
from collections import defaultdict
mpl.rcParams['lines.linewidth'] = 1
%matplotlib inline

  import pandas.util.testing as tm


# 3d Gridworld

In [2]:
D = 50
N_STATES = D*D*D
N_STATES

125000

In [3]:
def idx_to_coord(idx):
  return idx // (D*D), (idx % (D*D)) // D, idx % D

def coord_to_idx(coord):
  x, y, z = coord
  return x*D*D + y*D + z

def neighbors_c(coord):
  x, y, z = coord
  return [
    ((x + 1) % D, y, z),
    ((x - 1) % D, y, z),
    (x, (y + 1) % D, z),
    (x, (y - 1) % D, z),
    (x, y, (z + 1) % D),
    (x, y, (z - 1) % D)
  ]

def neighbors_i(idx):
  return list(map(coord_to_idx, neighbors_c(idx_to_coord(idx))))

In [4]:
edge_dict = dict()
for node in range(N_STATES):
  for neighbor in neighbors_i(node):
    edge_dict["{} {}".format(*sorted((node, neighbor)))] = 1 #np.random.randint(1, 101) / 100.

In [5]:
with open('3d1.edgelist', 'w') as f:
  for k, v in edge_dict.items():
    f.write('{} {}\n'.format(k, v))

# 2d Taxi

In [6]:
D = 25
N_STATES = D*D * D*D + D*D
N_STATES

391250

In [7]:
def state_to_coord(state):
  return hash(state)

In [8]:
# 5 Dim states: first two are gripper pos, next 2 are obj pos, if they are 
# the same, last coord indicates whether object is held
state_list = []
sh_dict = {}
hs_dict = {}
for a in range(D):
  for b in range(D):
    for c in range(D):
      for d in range(D):
        s = (a, b, c, d, 0)
        sh_dict[s] = hash(s)
        hs_dict[hash(s)] = s
        state_list.append(s)
        if a == c and b == d:
          s = (a, b, c, d, 1)
          sh_dict[s] = hash(s)
          hs_dict[hash(s)] = s
          state_list.append(s)
assert(len(sh_dict) == len(hs_dict) == len(state_list) == N_STATES)

In [9]:
hi_dict = {h:i for i, h in enumerate(hs_dict.keys())}
ih_dict = {i:h for h, i in hi_dict.items()}

In [10]:
D = 25

In [11]:
def twoDneighbors(x, y):
  return list(set([(max(0, x-1), y), (x, max(0, y-1)), (min(D-1, x+1), y), (x, min(D-1, y+1))]) - set([(x, y)]))

In [12]:
twoDneighbors(24, 24)

[(23, 24), (24, 23)]

In [13]:
def idx_to_coord(idx):
  h = ih_dict[idx]
  s = hs_dict[h]
  return s

def coord_to_idx(coord):
  h = sh_dict[coord]
  i = hi_dict[h]
  return i

def neighbors_c(coord):
  ns = []
  c = coord
  # Gripper holding box
  if c[4] == 1:
    # Gripper drops box
    ns.append((c[0],c[1],c[2],c[3],0))
    # Gripper and box move together
    for n in twoDneighbors(c[0], c[1]):
      ns.append((n[0], n[1], n[0], n[1], 1))
  # Gripper not holding box
  elif c[4] == 0:
    # Gripper picks up box if at same position
    if c[0] == c[2] and c[1] == c[3]:
      ns.append((c[0],c[1],c[2],c[3],1))
    # Gripper moves, leaving the box where it is
    for n in twoDneighbors(c[0], c[1]):
      ns.append((n[0],n[1],c[2],c[3], 0))
  return ns

def neighbors_i(idx):
  return list(map(coord_to_idx, neighbors_c(idx_to_coord(idx))))

In [14]:
edge_dict = dict()
for node in range(N_STATES):
  for neighbor in neighbors_i(node):
    edge_dict["{} {}".format(*sorted((node, neighbor)))] = 1 #np.random.randint(1, 101) / 100.

In [15]:
with open('taxi1.edgelist', 'w') as f:
  for k, v in edge_dict.items():
    f.write('{} {}\n'.format(k, v))

In [16]:
ls

3d1.edgelist               [0m[01;32mMake3dAndTaxi.ipynb[0m*   [01;34m__pycache__[0m/
custom_metric_loss_ops.py  metrics_pytorch.py     readme.md
data.py                    metrics_tf1.py         taxi1.edgelist
experiment.py              [01;32mPrepXYDPickles.ipynb[0m*
