# Minigame 12: Batched Evaluation

In this exploration, we're looking at the ability to train policies on small patches and to apply them to larger meshes by amalgamating the logits from each of the NN evaluations.

In [1]:
import math
from math import sin,cos
import random

In [2]:
import sys
import gym
from gym import spaces, utils
import numpy as np
import ray
import ray.rllib.agents.ppo as ppo

Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
from glvis import glvis, to_stream
from ipywidgets import Layout

In [4]:
import matplotlib.pyplot as plt

In [5]:
from mfem import path
import mfem.ser as mfem

Define some synthetic test functions: steps and bumps.

In [6]:
def rotate(x,theta):
    x0 = x[0]
    y0 = x[1]
    x1 = x0*cos(theta)-y0*sin(theta)
    y1 = x0*sin(theta)+y0*cos(theta)
    return [x1,y1]

In [7]:
def step(x):
    x0 = x[0]
    if (x0 < 0.0):
        return 1.0
    else:
        return 0.0

In [8]:
def rotated_step(x, theta):
    xr = rotate(x,theta)
    return step(xr)

In [9]:
def bump(x):
    rsq = x[0]**2 +x[1]**2
    return math.exp(-rsq)

In [10]:
def smooth_step(x):
    return 0.5*(1.0 +math.tanh(x[0]))

In [11]:
def rotated_smooth_step(x,theta):
    xr = rotate(x,theta)
    return smooth_step(xr)

Create classes where we can set the parameters and then eval a bunch of points.

In [12]:
class Step(mfem.PyCoefficient):
    
    def SetParams(self):
        self.theta = random.uniform(0.0, 2.0*math.pi)
        self.dx = [random.uniform(-1.0, 1.0),random.uniform(-1.0, 1.0)]
        
    def EvalValue(self, x):
        return rotated_step(x+self.dx, self.theta)

In [13]:
class Bump(mfem.PyCoefficient):
    
    def SetParams(self):
        self.width = random.uniform(0.1,1.0)
        self.xc = [0.5,0.5]
        self.dx = [random.uniform(-0.5, 0.5),random.uniform(-0.5, 0.5)]

    def EvalValue(self, x):
        return bump((x-self.xc+self.dx)/self.width)

In [14]:
class TwoBump(mfem.PyCoefficient):
    
    def SetParams(self):
        self.width1 = random.uniform(0.1,0.5)
        self.width2 = random.uniform(0.1,0.5)
        self.xc1 = [0.5,0.5]
        self.xc2 = [0.5,0.5]
        self.dx1 = [random.uniform(-0.5, 0.5),random.uniform(-0.5, 0.5)]
        self.dx2 = [random.uniform(-0.5, 0.5),random.uniform(-0.5, 0.5)]

    def EvalValue(self, x):
        #return max(bump((x-self.xc1+self.dx1)/self.width1),bump((x-self.xc2+self.dx2)/self.width2))
        return 0.5*(bump((x-self.xc1+self.dx1)/self.width1)+bump((x-self.xc2+self.dx2)/self.width2))

In [15]:
class SmoothStep(mfem.PyCoefficient):
    
    def SetParams(self):
        self.width = random.uniform(5.0, 10.0)
        self.xc = [0.5,0.5]
        self.theta = random.uniform(0.0, 2.0*math.pi)

    def EvalValue(self, x):
        x -= self.xc
        return rotated_smooth_step(x*self.width, self.theta)

In [16]:
class BumpsAndSmoothStep(mfem.PyCoefficient):
    
    def SetParams(self):
        self.bump = Bump()
        self.bump.SetParams()
        self.smooth_step = SmoothStep()
        self.smooth_step.SetParams()

    def EvalValue(self, x):
        return 0.5*self.bump.EvalValue(x)+0.5*self.smooth_step.EvalValue(x)


Visualize an instance of the test function. Note that each instance has randomly chosen parameters.  For the steps, it's a rotation angle and a displacement.  For the bumps, it's a width and a displacement.

In [17]:
mesh = mfem.Mesh('inline-quad.mesh')
mesh.UniformRefinement()
mesh.UniformRefinement()
fec = mfem.L2_FECollection(p=1, dim=2)
fes = mfem.FiniteElementSpace(mesh, fec)
u = mfem.GridFunction(fes)
c = BumpsAndSmoothStep()
c.SetParams()
u.ProjectCoefficient(c)

In [18]:
glvis((mesh, u), 600, 600,layout = Layout(width='100%', height='600px'))

glvis(layout=Layout(height='600px', width='100%'))

Create the gym environment. Note that in this case, this can be just a dummy environment that only serves to define the observation and action spaces for the purposes of evaluation of the policy.

In [19]:
class AMRGameDummy(gym.Env):
        
    # In RLlib, you need the config arg
    def __init__(self,config):
        self.meshfile = 'inline-quad.mesh'
        self.mesh = mfem.Mesh(self.meshfile)
        
        # The only reason we need to create a fespace and gf here
        # is to find the sizes needed for the action and observation spaces
        dim = self.mesh.Dimension()
        self.order = 1
        self.fec = mfem.L2_FECollection(self.order, dim)
        self.fes = mfem.FiniteElementSpace(self.mesh, self.fec)
        self.u = mfem.GridFunction(self.fes);

        # actions are: refine each element, or do nothing
        self.action_space = spaces.Discrete(self.mesh.GetNE())
        
        # observation space: DOFs
        self.observation_space = spaces.Box(-1.0, 1.0, shape=(self.u.Size(),), dtype=np.float32)
        
    def step(self, action):
        pass
    
    def reset(self):
        pass
    
    def render(self):
        pass

Instantiate the environment.

Now we want to load a trained policy, and apply it in a strided way.

In [20]:
ray.shutdown()
ray.init(ignore_reinit_error=True)

2021-02-15 18:53:39,358	INFO services.py:1173 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8268[39m[22m


{'node_ip_address': '128.15.244.46',
 'raylet_ip_address': '128.15.244.46',
 'redis_address': '128.15.244.46:20645',
 'object_store_address': '/tmp/ray/session_2021-02-15_18-53-38_774821_2120/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-02-15_18-53-38_774821_2120/sockets/raylet',
 'webui_url': '127.0.0.1:8268',
 'session_dir': '/tmp/ray/session_2021-02-15_18-53-38_774821_2120',
 'metrics_export_port': 36502,
 'node_id': '6ed28f90cdd50184da8e9434812d1db1503470e0'}

In [21]:
config = ppo.DEFAULT_CONFIG.copy()
config['framework'] = 'tfe'
agent = ppo.PPOTrainer(config, env=AMRGameDummy)

2021-02-15 18:53:41,759	INFO trainer.py:588 -- Executing eagerly, with eager_tracing=False
2021-02-15 18:53:41,759	INFO trainer.py:618 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=2388)[0m Instructions for updating:
[2m[36m(pid=2388)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2387)[0m Instructions for updating:
[2m[36m(pid=2387)[0m non-resource variables are not supported in the long term


Restore a policy

In [22]:
agent.restore("/home/rwa/ray_results/PPO_AMRGame_2021-02-15_15-05-16nycz59aw/checkpoint_4/checkpoint-4")

2021-02-15 18:53:43,802	INFO trainable.py:329 -- Restored on 128.15.244.46 from checkpoint: /home/rwa/ray_results/PPO_AMRGame_2021-02-15_15-05-16nycz59aw/checkpoint_4/checkpoint-4
2021-02-15 18:53:43,803	INFO trainable.py:336 -- Current state after restoring: {'_iteration': 4, '_timesteps_total': None, '_time_total': 2362.5617892742157, '_episodes_total': 201600}


In [23]:
policy = agent.get_policy()

Now we want to create the larger problem we'll be applying this local indicator on.

In [24]:
mesh = mfem.Mesh('inline-quad-20.mesh')
fec = mfem.L2_FECollection(p=1, dim=2)
fes = mfem.FiniteElementSpace(mesh, fec)
u = mfem.GridFunction(fes)
c = BumpsAndSmoothStep()
c.SetParams()
u.ProjectCoefficient(c)
glvis((mesh, u), 600, 600,layout = Layout(width='100%', height='600px'))

glvis(layout=Layout(height='600px', width='100%'))

Build a map from each element to the elements which consist of the "stencil" around it.

In [25]:
def build_stencils():
    els = {}
    nx = math.sqrt(mesh.GetNE())
    dx = 1.0/nx
    dim = mesh.Dimension()
    els = {}
    c = mfem.Vector(dim)
    x = mfem.Vector(dim)
    for k in range(0,mesh.GetNE()):
        els[k] = []
        mesh.GetElementCenter(k,c)
        full = True
        for j in range(-2,3):
            for i in range(-2,3):
                x[0] = c[0]+i*dx
                x[1] = c[1]+j*dx
                if (x[0] < 0.0): full = False
                if (x[0] > 1.0): full = False
                if (x[1] < 0.0): full = False
                if (x[1] > 1.0): full = False
                pt = [[x[0],x[1]]]
                n, el, ip = mesh.FindPoints(pt)
                els[k].append(el[0])
        if (not full):
            els.pop(k)
    return els

In [26]:
els = build_stencils()

Create the local mesh into which we will copy the dofs for the purposes of creating an observation vector.

In [27]:
obs_mesh = mfem.Mesh('inline-quad.mesh')
obs_fec = mfem.L2_FECollection(p=1, dim=2)
obs_fes = mfem.FiniteElementSpace(obs_mesh, obs_fec)
obs_u = mfem.GridFunction(obs_fes)
glvis((obs_mesh), 600, 600,layout = Layout(width='100%', height='600px'))

glvis(layout=Layout(height='600px', width='100%'))

Now we need a mapping from the "logical" space of the observation mesh into element ids. This has the same ordering as the stencil elements, so we can form 1-to-1 mapping for the purposes of data transfer.

In [28]:
def build_map(obs_mesh):
    id_map = []
    c = [0.5, 0.5]
    x = [0.0, 0.0]
    dx = 1./5.
    for j in range(-2,3):
        for i in range(-2,3):
            x[0] = c[0]+i*dx
            x[1] = c[1]+j*dx
            pt = [[x[0],x[1]]]
            n, el, ip = obs_mesh.FindPoints(pt)
            id_map.append(el[0])
    return id_map

In [29]:
id_map = build_map(obs_mesh)

Create a function to transfer from the stencil associated with a src element k into the observation gf.

In [43]:
def transfer_stencil(k):
    for n in range(len(els[k])):
        dst_el = id_map[n]
        src_el = els[k][n]
        src_dofs = fes.GetElementDofs(src_el)
        dst_dofs = obs_fes.GetElementDofs(dst_el)
        for d in range(len(src_dofs)):
            obs_u[dst_dofs[d]] = u[src_dofs[d]]

Test it out on a specific src element:

In [44]:
transfer_stencil(16)
obs = np.array(obs_u.GetDataArray())
print(len(obs))
action, _, info = policy.compute_single_action(obs, explore=False)
info['action_dist_inputs'][10]

100


-1.2092541

In [45]:
glvis((obs_mesh, obs_u), 600, 600,layout = Layout(width='100%', height='600px'))

glvis(layout=Layout(height='600px', width='100%'))

Iterate over all the elements with full stencils in the src mesh and record logits for each observation:

In [46]:
def compute_logits():
    logits = {}
    center_el = 10
    for k in els:
        transfer_stencil(k)
        obs = np.array(obs_u.GetDataArray())
        action, _, info = policy.compute_single_action(obs, explore=False)
        obs_logits = info['action_dist_inputs']
        logits[k] = obs_logits[center_el]
    return logits

Re-normalize the collected logits into probs.

In [47]:
def compute_distribution():
    logits = compute_logits()
    sumexp = 0.0
    dist = {}
    for k in els:
        logit = logits[k]
        sumexp += math.exp(logit)
    for k in els:
        logit = logits[k]
        dist[k] = math.exp(logit)/sumexp
    return dist

In [35]:
refine_els = []
for p in probs:
    if (probs[p] < 0.0035):
        refine_els.append(p)
refine_els

[59,
 60,
 63,
 64,
 65,
 166,
 169,
 170,
 171,
 172,
 173,
 174,
 194,
 195,
 196,
 197,
 198,
 199,
 200,
 201,
 202,
 203,
 204,
 205,
 206,
 207,
 208,
 209,
 210,
 211,
 212,
 213,
 214,
 215,
 216,
 217,
 218,
 219,
 220,
 221,
 222,
 223,
 224,
 225,
 226,
 227,
 228,
 229,
 230,
 231,
 232,
 233,
 234,
 235,
 248,
 249,
 250,
 251,
 252,
 253,
 254,
 255,
 256,
 257,
 276,
 277,
 278,
 279,
 280,
 281,
 282,
 283,
 284,
 285,
 286,
 287,
 288,
 289,
 290,
 291,
 292,
 293,
 302,
 303,
 312,
 313,
 314,
 315,
 316,
 317,
 318,
 319,
 320,
 321,
 322,
 323,
 324,
 325,
 326,
 327,
 328,
 329,
 330,
 331,
 332,
 333,
 334,
 335,
 336,
 337,
 338,
 339,
 340,
 341,
 342,
 343,
 344,
 345,
 346,
 347,
 348,
 349,
 350,
 351,
 352,
 353,
 354,
 355,
 356,
 357,
 358,
 361,
 376,
 377,
 378,
 379,
 380,
 381,
 382,
 383,
 384,
 385,
 386,
 387]

In [36]:
mesh.GeneralRefinement(mfem.intArray(refine_els))
fes.Update()
u.Update()
glvis((mesh, u), 600, 600,layout = Layout(width='100%', height='600px'))

glvis(layout=Layout(height='600px', width='100%'))

In [60]:
def new_function():
    global mesh, fec, fes, u
    mesh = mfem.Mesh('inline-quad-20.mesh')
    fec = mfem.L2_FECollection(p=1, dim=2)
    fes = mfem.FiniteElementSpace(mesh, fec)
    u = mfem.GridFunction(fes)
    c = BumpsAndSmoothStep()
    c.SetParams()
    u.ProjectCoefficient(c)
    return glvis((mesh, u), 600, 600,layout = Layout(width='100%', height='600px'))

In [61]:
def refine_via_policy(thresh):
    dist = compute_distribution()
    refine_els = []
    for p in dist:
        if (dist[p] < thresh):
            refine_els.append(p)
    mesh.GeneralRefinement(mfem.intArray(refine_els))
    fes.Update()
    u.Update()
    return glvis((mesh, u), 600, 600,layout = Layout(width='100%', height='600px'))

In [76]:
new_function()

glvis(layout=Layout(height='600px', width='100%'))

In [77]:
refine_via_policy(0.003)

glvis(layout=Layout(height='600px', width='100%'))