# Minigame 11: Choose One Element To Refine And Advect Mesh

This is like minigame10, except that we're now "advecting" mesh by a rigid translation. This is like a "remap" method, where the "remap" is just to re-project the known function onto the new mesh.

Some things to explore:

* PPO vs DQN vs ?
* CNN vs MLP vs ?
* order=1 vs order=2 vs ?
* H1 space vs DG space vs ?

Setup PyMFEM:

In [1]:
import math
from math import cos,sin
import random

In [2]:
import copy

In [3]:
import sys
import gym
from gym import spaces, utils
import numpy as np
import ray
import ray.rllib.agents.ppo as ppo
from os.path import expanduser, join
import os

Instructions for updating:
non-resource variables are not supported in the long term


In [4]:
from pyglvis import GlvisWidget

In [5]:
from mfem import path
import mfem.ser as mfem

Start up rllib

In [6]:
ray.shutdown()
# This env setting is necessary to avoid problems within rllib due to serialization and workers
ray.init(ignore_reinit_error=True)
config = ppo.DEFAULT_CONFIG.copy()
config['train_batch_size'] = int(1e4)
config['num_workers'] = 3
config['framework'] = 'tfe'
config

2021-02-12 21:09:45,789	INFO services.py:1173 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m


{'num_workers': 3,
 'num_envs_per_worker': 1,
 'create_env_on_driver': False,
 'rollout_fragment_length': 200,
 'batch_mode': 'truncate_episodes',
 'num_gpus': 0,
 'train_batch_size': 10000,
 'model': {'fcnet_hiddens': [256, 256],
  'fcnet_activation': 'tanh',
  'conv_filters': None,
  'conv_activation': 'relu',
  'free_log_std': False,
  'no_final_linear': False,
  'vf_share_layers': True,
  'use_lstm': False,
  'max_seq_len': 20,
  'lstm_cell_size': 256,
  'lstm_use_prev_action': False,
  'lstm_use_prev_reward': False,
  '_time_major': False,
  'framestack': True,
  'dim': 84,
  'grayscale': False,
  'zero_mean': True,
  'custom_model': None,
  'custom_model_config': {},
  'custom_action_dist': None,
  'custom_preprocessor': None,
  'lstm_use_prev_action_reward': -1},
 'optimizer': {},
 'gamma': 0.99,
 'horizon': None,
 'soft_horizon': False,
 'no_done_at_end': False,
 'env_config': {},
 'env': None,
 'normalize_actions': False,
 'clip_rewards': None,
 'clip_actions': True,
 'preproc

In [7]:
def get_solnstream(mesh,soln):
    mesh.Print(",tmpmesh")
    with open(",tmpmesh","r") as f:
        meshdata = f.read()
    soln.Save(",tmpsoln")
    with open(",tmpsoln","r") as f:
        solndata = f.read()
    solndata = "solution\n"+meshdata+solndata
    return solndata

Define some synthetic test functions: steps and bumps.

In [8]:
def rotate(x,theta):
    x0 = x[0]
    y0 = x[1]
    x1 = x0*cos(theta)-y0*sin(theta)
    y1 = x0*sin(theta)+y0*cos(theta)
    return [x1,y1]

In [9]:
def step(x):
    x0 = x[0]
    if (x0 < 0.0):
        return 1.0
    else:
        return 0.0

In [10]:
def rotated_step(x, theta):
    xr = rotate(x,theta)
    return step(xr)

In [11]:
def bump(x):
    rsq = x[0]**2 +x[1]**2
    return math.exp(-rsq)

In [12]:
def smooth_step(x):
    return 0.5*(1.0 +math.tanh(x[0]))

In [13]:
def rotated_smooth_step(x,theta):
    xr = rotate(x,theta)
    return smooth_step(xr)

Create classes where we can set the parameters and then eval a bunch of points.

In [14]:
class Step(mfem.PyCoefficient):
    
    def SetParams(self):
        self.theta = random.uniform(0.0, 2.0*math.pi)
        self.dx = [random.uniform(-1.0, 1.0),random.uniform(-1.0, 1.0)]
        
    def EvalValue(self, x):
        return rotated_step(x+self.dx, self.theta)

In [15]:
class Bump(mfem.PyCoefficient):
    
    def SetParams(self):
        self.width = random.uniform(0.1,1.0)
        self.xc = [0.5,0.5]
        self.dx = [random.uniform(-0.5, 0.5),random.uniform(-0.5, 0.5)]

    def EvalValue(self, x):
        return bump((x-self.xc+self.dx)/self.width)

In [16]:
class TwoBump(mfem.PyCoefficient):
    
    def SetParams(self):
        self.width1 = random.uniform(0.1,0.5)
        self.width2 = random.uniform(0.1,0.5)
        self.xc1 = [0.5,0.5]
        self.xc2 = [0.5,0.5]
        self.dx1 = [random.uniform(-0.5, 0.5),random.uniform(-0.5, 0.5)]
        self.dx2 = [random.uniform(-0.5, 0.5),random.uniform(-0.5, 0.5)]

    def EvalValue(self, x):
        #return max(bump((x-self.xc1+self.dx1)/self.width1),bump((x-self.xc2+self.dx2)/self.width2))
        return 0.5*(bump((x-self.xc1+self.dx1)/self.width1)+bump((x-self.xc2+self.dx2)/self.width2))

In [17]:
class SmoothStep(mfem.PyCoefficient):
    
    def SetParams(self):
        self.width = random.uniform(5.0, 10.0)
        self.xc = [0.5,0.5]
        self.theta = random.uniform(0.0, 2.0*math.pi)

    def EvalValue(self, x):
        x -= self.xc
        return rotated_smooth_step(x*self.width, self.theta)

In [18]:
class BumpsAndSmoothStep(mfem.PyCoefficient):
    
    def SetParams(self):
        self.bump = Bump()
        self.bump.SetParams()
        self.smooth_step = SmoothStep()
        self.smooth_step.SetParams()

    def EvalValue(self, x):
        return 0.5*self.bump.EvalValue(x)+0.5*self.smooth_step.EvalValue(x)


Visualize an instance of the test function. Note that each instance has randomly chosen parameters.  For the steps, it's a rotation angle and a displacement.  For the bumps, it's a width and a displacement.

In [19]:
mesh = mfem.Mesh('inline-quad.mesh')
mesh.UniformRefinement()
mesh.UniformRefinement()
mesh.UniformRefinement()
fec = mfem.L2_FECollection(p=1, dim=2)
fes = mfem.FiniteElementSpace(mesh, fec)
u = mfem.GridFunction(fes)
c = BumpsAndSmoothStep()
c.SetParams()
u.ProjectCoefficient(c)

In [20]:
gl = GlvisWidget(get_solnstream(mesh,u))
gl

GlvisWidget()

Create the gym environment.

In [21]:
class AMRGame(gym.Env):
    
    class u0_coeff(mfem.PyCoefficient):
        
        def SetParams(self):
            self.fn = BumpsAndSmoothStep()
            self.fn.SetParams()
            
        def EvalValue(self, x):
            return self.fn.EvalValue(x)
        
    # In RLlib, you need the config arg
    def __init__(self,config):
        self.meshfile = 'inline-quad-7.mesh'
        
        # keep a copy of the unrefined mesh so we can restore it
        self.mesh0 = mfem.Mesh(self.meshfile)
        self.mesh = mfem.Mesh(self.meshfile)
        
        self.u = 0.5
        self.v = 0.5
        mag = math.sqrt(self.u**2 +self.v**2)
        nx = math.sqrt(self.mesh.GetNE())
        self.u /= mag
        self.u /= nx
        self.v /= mag
        self.v /= nx
        self.displ = mfem.Vector(self.mesh.GetNV()*2)
        self.displ.Assign(self.u)
        
        # The only reason we need to create a fespace and gf here
        # is to find the sizes needed for the action and observation spaces
        dim = self.mesh.Dimension()
        self.order = 1
        self.fec = mfem.L2_FECollection(self.order, dim)
        self.fes = mfem.FiniteElementSpace(self.mesh, self.fec)
        self.u = mfem.GridFunction(self.fes);

        # actions are: refine each element, or do nothing
        self.action_space = spaces.Discrete(self.mesh.GetNE())
        self.observation_space = spaces.Box(-1.0, 1.0, shape=(self.u.Size(),), dtype=np.float32)
        self.state = None
        
        # call reset to create the first synthetic function
        self.reset()
        
        #self.gl = GlvisWidget(get_solnstream(self.mesh,self.u))
        
    def get_ne(self):
        return self.mesh.GetNE()
    
    def get_size(self):
        return self.u.Size()
    
    # Compute L2 error wrt to the analytic fn definition
    def get_error(self):
        err = self.u.ComputeL2Error(self.u0)
        return err
    
    # Manually refine the elements in the array elems
    def refine_elems(self, elems):
        self.mesh.GeneralRefinement(mfem.intArray(elems))
        self.fes.Update()
        self.u.Update()
        self.u.ProjectCoefficient(self.u0)
        
    def move_mesh(self):
        self.mesh.MoveVertices(self.displ)
            
    # action is the number of the element to refine
    def step(self, action):
        err1 = self.get_error()
        self.move_mesh()
        self.refine_elems([action])
        err2 = self.get_error()
        reward = err1-err2
        done = True
        self.state = self.u.GetDataArray()
        return np.array(self.state), reward, done, {}
    
    # similar to reset, but do not choose a new function
    def reinit(self):
        del self.mesh
        self.mesh = mfem.Mesh(self.mesh0)

        del self.fes
        self.fes = mfem.FiniteElementSpace(self.mesh, self.fec)

        del self.u
        self.u = mfem.GridFunction(self.fes)
        self.u.ProjectCoefficient(self.u0)
        
        self.state = self.u.GetDataArray()
        return np.array(self.state)
    
    # every reset of the env chooses a new synthetic function
    def reset(self):
        self.u0 = self.u0_coeff()
        self.u0.SetParams()
        return self.reinit()
    
    def render(self):
        return GlvisWidget(get_solnstream(self.mesh,self.u))

Instantiate the environment and sanity check it.

In [22]:
env = AMRGame(None)

In [23]:
env.get_ne()

49

In [24]:
env.get_size()

196

In [25]:
env.reset()
env.render()

GlvisWidget()

In [26]:
state, reward, done, info = env.step(0)

In [27]:
env.render()

GlvisWidget()

Show with refinement of element 0. Then we'll test resetting it to the original state.  We're going to need this to go through a searching for the best actions.

In [28]:
env.render()

GlvisWidget()

In [29]:
env.move_mesh()
env.render()

GlvisWidget()

In [30]:
env.reinit() # puts the mesh/fields back in the orig state
env.render()

GlvisWidget()

Ok, try training a policy

In [31]:
os.environ["RAY_PICKLE_VERBOSE_DEBUG"] = "1"
config['train_batch_size'] = int(1e3)
agent = ppo.PPOTrainer(config, env=AMRGame)

2021-02-12 21:09:48,715	INFO trainer.py:588 -- Executing eagerly, with eager_tracing=False
2021-02-12 21:09:48,715	INFO trainer.py:618 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=24850)[0m Instructions for updating:
[2m[36m(pid=24850)[0m non-resource variables are not supported in the long term
[2m[36m(pid=24851)[0m Instructions for updating:
[2m[36m(pid=24851)[0m non-resource variables are not supported in the long term
[2m[36m(pid=24853)[0m Instructions for updating:
[2m[36m(pid=24853)[0m non-resource variables are not supported in the long term


In [32]:
%%time
for n in range(1):
    result = agent.train()
    print("episode reward mean: %f " % result["episode_reward_mean"])

[2m[36m(pid=24850)[0m   arr = np.array(v)
[2m[36m(pid=24851)[0m   arr = np.array(v)
[2m[36m(pid=24853)[0m   arr = np.array(v)


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.
episode reward mean: 0.000075 
CPU times: user 7.26 s, sys: 222 ms, total: 7.48 s
Wall time: 16.1 s


[2m[36m(pid=24850)[0m Instructions for updating:
[2m[36m(pid=24850)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


In [33]:
policy = agent.get_policy()
model = policy.model
print(model.base_model.summary())

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 196)]        0                                            
__________________________________________________________________________________________________
fc_1 (Dense)                    (None, 256)          50432       observations[0][0]               
__________________________________________________________________________________________________
fc_value_1 (Dense)              (None, 256)          50432       observations[0][0]               
__________________________________________________________________________________________________
fc_2 (Dense)                    (None, 256)          65792       fc_1[0][0]                       
______________________________________________________________________________________________

Create a convenience function for applying a policy to a given observation

In [34]:
def apply_policy(model, obs):
    action = agent.compute_action(obs, explore=False) # use deterministic mode
    state, reward, done, info = env.step(action)
    #print("policy chooses action %d with reward %f" % (action, reward))
    return action, reward

[2m[36m(pid=24851)[0m Instructions for updating:
[2m[36m(pid=24851)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=24853)[0m Instructions for updating:
[2m[36m(pid=24853)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


In [35]:
obs = env.reset()
action, reward = apply_policy(model, obs)
action, reward

(48, 0.0004027331533718731)

In [36]:
obs = env.reinit()
action, reward = apply_policy(model, obs)
action, reward

(48, 0.0004027331533718731)

Brute force search for the best choice by trying each one, remembering to reset the environment after each action and after we're done.

In [37]:
def find_optimal(obs):
    u0 = mfem.Vector(obs)
    maxr = 0.0;
    maxel = -1;
    env.reinit()
    ne = env.get_ne()
    for n in range(ne):
        env.reinit()
        state, reward, done, info = env.step(n)
        if reward > maxr:
            maxr = reward
            maxel = n
    #print("max reward is %f by refining element %d" % (maxr, maxel))
    env.reinit()
    return maxel, maxr

In [38]:
obs = env.reset()
maxel, maxr = find_optimal(obs)
env.refine_elems([maxel])
env.render()

GlvisWidget()

Compare with what the policy does:

In [39]:
env.reinit()
apply_policy(model,obs)
env.render()

GlvisWidget()

Define an error estimator based on the difference between the discontinuous and continuous representations. This is only valid for L2 FE spaces.

In [40]:
def find_dgjumps(env):
    
    mesh = env.mesh
    u = env.u
    
    # put the L2 gridfunction into a coefficient so we can project it
    u_disc_coeff = mfem.GridFunctionCoefficient(u)
    h1_fec = mfem.H1_FECollection(p=1, dim=2)
    h1_fes = mfem.FiniteElementSpace(mesh, h1_fec)
    u_h1 = mfem.GridFunction(h1_fes)
    u_h1.ProjectDiscCoefficient(u_disc_coeff, mfem.GridFunction.ARITHMETIC)
    
    # put the H1 smoothed function into a coefficient
    u_h1_coeff = mfem.GridFunctionCoefficient(u_h1)
    
    # create a 0-order L2 field to hold errors
    l2_0_fec = mfem.L2_FECollection(p=0,dim=2)
    l2_0_fes = mfem.FiniteElementSpace(mesh,l2_0_fec)

    # Compute elementwise "errors" between continuous and discontinuous fields
    err_gf = mfem.GridFunction(l2_0_fes);
    u.ComputeElementL2Errors(u_h1_coeff, err_gf);
    
    best_action = np.argmax(err_gf.GetDataArray())
    
    state, reward, done, info = env.step(best_action)
    env.reinit()

    return best_action, reward

In [41]:
env.reset()
action, reward = find_dgjumps(env)
env.step(action)
env.render()

GlvisWidget()

A random policy gives us a scale for the low end.

In [42]:
def apply_random_policy(obs):
    ne = env.get_ne()
    ir = np.random.randint(0,ne)
    state, reward, done, info = env.step(ir)
    env.reinit()
    return ir, reward

Run a more systematic evaluation using an ensemble of samples:

In [43]:
def eval_ensemble(model, ntrials):
    
    ncorrect = 0
    sumsq = 0.0
    maxerrsq = 0.0
    
    dg_ncorrect = 0
    dg_sumsq = 0.0
    dg_maxerrsq = 0.0
    
    rand_ncorrect = 0
    rand_sumsq = 0.0
    rand_maxerrsq = 0.0
    for n in range(ntrials):
        obs = env.reset()
        
        bestaction, bestreward = find_optimal(obs)
        dgaction, dgreward = find_dgjumps(env)
        action, reward = apply_policy(model,obs)
        rand_action, rand_reward = apply_random_policy(obs)
        
        err = bestreward-reward
        maxerrsq = max(err**2,maxerrsq)
        sumsq += err**2
        
        dg_err = bestreward-dgreward
        dg_maxerrsq = max(dg_err**2,dg_maxerrsq)
        dg_sumsq += dg_err**2
        
        rand_err = bestreward-rand_reward
        rand_maxerrsq = max(rand_err**2,rand_maxerrsq)
        rand_sumsq += rand_err**2
        
        if (bestaction == action):
            ncorrect += 1
        if (bestaction == dgaction):
            dg_ncorrect += 1
        if (bestaction == rand_action):
            rand_ncorrect += 1
    
    rms = math.sqrt(sumsq/ntrials)
    corr = 100.*ncorrect/ntrials
    print("policy rms error: ",rms,flush=True)
    print("policy max sq error: ",math.sqrt(maxerrsq),flush=True)
    print("policy % correct: ",corr,flush=True)
    
    dg_rms = math.sqrt(dg_sumsq/ntrials)
    dg_corr = 100.*dg_ncorrect/ntrials
    print("dg rms error: ",dg_rms,flush=True)
    print("dg max sq error: ",math.sqrt(dg_maxerrsq),flush=True)
    print("dg % correct: ",dg_corr,flush=True)
    
    rand_rms = math.sqrt(rand_sumsq/ntrials)
    rand_corr = 100.*rand_ncorrect/ntrials
    print("rand rms error: ",rand_rms,flush=True)
    print("rand max sq error: ",math.sqrt(rand_maxerrsq),flush=True)
    print("rand % correct: ",rand_corr,flush=True)
    
    return rms, math.sqrt(maxerrsq), corr, dg_rms, math.sqrt(dg_maxerrsq), dg_corr, rand_rms, math.sqrt(rand_maxerrsq), rand_corr

eval_ensemble(model, 100)

Run a few eval sample sizes to get a sense of how many are needed to estimate the metrics of the policy

eval_ensemble(model, 200)

eval_ensemble(model, 400)

Let's see if the training process is making progress:

In [None]:
total_episodes = 1.e6
nbatches = 20
batch_size = total_episodes/nbatches
neval = 400

del agent
config['train_batch_size'] = int(batch_size)
agent = ppo.PPOTrainer(config, env=AMRGame)
policy = agent.get_policy()
model = policy.model

rms = [0.0] * nbatches
cor = [0.0] * nbatches
maxerr = [0.0] * nbatches

dg_rms = [0.0] * nbatches
dg_cor = [0.0] * nbatches
dg_maxerr = [0.0] * nbatches

rand_rms = [0.0] * nbatches
rand_cor = [0.0] * nbatches
rand_maxerr = [0.0] * nbatches

for n in range(nbatches):
    print("training batch %d of size %d" % (n,batch_size))
    agent.train()
    print("evaluating on %d instances..." %  neval)
    rms[n], maxerr[n], cor[n], dg_rms[n], dg_maxerr[n], dg_cor[n], rand_rms[n], rand_maxerr[n], rand_cor[n] = eval_ensemble(model, neval)

[2m[36m(pid=24852)[0m Instructions for updating:
[2m[36m(pid=24852)[0m non-resource variables are not supported in the long term


training batch 0 of size 50000


[2m[36m(pid=25224)[0m Instructions for updating:
[2m[36m(pid=25224)[0m non-resource variables are not supported in the long term
[2m[36m(pid=25226)[0m Instructions for updating:
[2m[36m(pid=25226)[0m non-resource variables are not supported in the long term
[2m[36m(pid=24852)[0m   arr = np.array(v)
[2m[36m(pid=25224)[0m   arr = np.array(v)
[2m[36m(pid=25226)[0m   arr = np.array(v)


In [None]:
%matplotlib inline
isteps = list(range(nbatches))
asteps = [i*config['train_batch_size'] for i in isteps]
import matplotlib.pyplot as plt
ax = plt.subplot(211)
ax.set_ylim(0.0001,0.01)
ax.set_ylabel('Error')
line1, = plt.semilogy(asteps,rms[:nbatches], marker='o')
line2, = plt.semilogy(asteps,dg_rms[:nbatches], marker='x')
line3, = plt.semilogy(asteps,maxerr[:nbatches], marker='.')
line4, = plt.semilogy(asteps,dg_maxerr[:nbatches], marker='+')

line1.set_label('RL rms')
line2.set_label('DG rms')
line3.set_label('RL max')
line4.set_label('DG max')
ax.legend()
plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))

ax = plt.subplot(212)
ax.set_ylim(0,100)
ax.set_ylabel('% correct')
ax.set_xlabel('training episodes')
line1, = plt.plot(asteps,cor[:nbatches], marker='o')
line2, = plt.plot(asteps,dg_cor[:nbatches], marker='x')
line1.set_label('RL policy')
line2.set_label('DG')
ax.legend()
plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))

In [None]:
rms

Let's look for cases where the policy gets it right and the DG method gets it wrong.

In [None]:
for n in range(500):
    obs = env.reset()
    opt_action, opt_reward = find_optimal(obs)
    dg_action, dg_reward = find_dgjumps(env)
    pol_action, pol_reward = apply_policy(model, obs)
    if ((pol_action == opt_action) and (dg_action != opt_action)):
        break
env.reinit()
env.step(pol_action)
env.render()

In [None]:
env.reinit()
env.step(dg_action)
env.render()