# Minigame 10: Choose One Element To Refine

This is essentially the global environment for refinement where the action is choosing one element at a time.  However, the observation space is the DOFs directly, not function values.

Some things to explore:

* PPO vs DQN vs ?
* CNN vs MLP vs ?
* order=1 vs order=2 vs ?
* H1 space vs DG space vs ?

Setup PyMFEM:

In [None]:
import math
from math import cos,sin
import random

In [None]:
import copy

In [None]:
import sys
import gym
from gym import spaces, utils
import numpy as np
import ray
import ray.rllib.agents.ppo as ppo
from os.path import expanduser, join
import os

In [None]:
from pyglvis import GlvisWidget

In [None]:
from mfem import path
import mfem.ser as mfem

Start up rllib

In [None]:
ray.shutdown()
# This env setting is necessary to avoid problems within rllib due to serialization and workers
ray.init(ignore_reinit_error=True)
config = ppo.DEFAULT_CONFIG.copy()
config['train_batch_size'] = int(1e4)
config['num_workers'] = 3
config['framework'] = 'tfe'
config

In [None]:
def get_solnstream(mesh,soln):
    mesh.Print(",tmpmesh")
    with open(",tmpmesh","r") as f:
        meshdata = f.read()
    soln.Save(",tmpsoln")
    with open(",tmpsoln","r") as f:
        solndata = f.read()
    solndata = "solution\n"+meshdata+solndata
    return solndata

Define some synthetic test functions: steps and bumps.

In [None]:
def rotate(x,theta):
    x0 = x[0]
    y0 = x[1]
    x1 = x0*cos(theta)-y0*sin(theta)
    y1 = x0*sin(theta)+y0*cos(theta)
    return [x1,y1]

In [None]:
def step(x):
    x0 = x[0]
    if (x0 < 0.0):
        return 1.0
    else:
        return 0.0

In [None]:
def rotated_step(x, theta):
    xr = rotate(x,theta)
    return step(xr)

In [None]:
def bump(x):
    rsq = x[0]**2 +x[1]**2
    return math.exp(-rsq)

Create classes where we can set the values and then eval a bunch of points.

In [None]:
class Step(mfem.PyCoefficient):
    
    def SetParams(self):
        self.theta = random.uniform(0.0, 2.0*math.pi)
        self.dx = [random.uniform(-1.0, 1.0),random.uniform(-1.0, 1.0)]
        
    def EvalValue(self, x):
        return rotated_step(x+self.dx, self.theta)

In [None]:
class Bump(mfem.PyCoefficient):
    
    def SetParams(self):
        self.width = random.uniform(0.1,1.0)
        self.xc = [0.5,0.5]
        self.dx = [random.uniform(-0.5, 0.5),random.uniform(-0.5, 0.5)]

    def EvalValue(self, x):
        return bump((x-self.xc+self.dx)/self.width)

Visualize an instance of the test function. Note that each instance has randomly chosen parameters.  For the steps, it's a rotation angle and a displacement.  For the bumps, it's a width and a displacement.

In [None]:
mesh = mfem.Mesh('inline-quad.mesh')
fec = mfem.L2_FECollection(p=1, dim=2)
fes = mfem.FiniteElementSpace(mesh, fec)
u = mfem.GridFunction(fes)
c = Bump()
c.SetParams()
u.ProjectCoefficient(c)

In [None]:
gl = GlvisWidget(get_solnstream(mesh,u))
gl

Create the gym environment.

In [None]:
class AMRGame(gym.Env):
    
    class u0_coeff(mfem.PyCoefficient):
        
        def SetParams(self):
            self.fn = Bump()
            self.fn.SetParams()
            
        def EvalValue(self, x):
            return self.fn.EvalValue(x)
        
    # In RLlib, you need the config arg
    def __init__(self,config):
        self.meshfile = 'inline-quad.mesh'
        
        # keep a copy of the unrefined mesh so we can restore it
        self.mesh0 = mfem.Mesh(self.meshfile)
        self.mesh = mfem.Mesh(self.meshfile)
        
        # The only reason we need to create a fespace and gf here
        # is to find the sizes needed for the action and observation spaces
        dim = self.mesh.Dimension()
        self.order = 1
        self.fec = mfem.L2_FECollection(self.order, dim)
        self.fes = mfem.FiniteElementSpace(self.mesh, self.fec)
        self.u = mfem.GridFunction(self.fes);

        # actions are: refine each element, or do nothing
        self.action_space = spaces.Discrete(self.mesh.GetNE())
        self.observation_space = spaces.Box(-1.0, 1.0, shape=(self.u.Size(),), dtype=np.float32)
        self.state = None
        
        # call reset to create the first synthetic function
        self.reset()
        
        #self.gl = GlvisWidget(get_solnstream(self.mesh,self.u))
        
    def get_ne(self):
        return self.mesh.GetNE()
    
    def get_size(self):
        return self.u.Size()
    
    # Compute L2 error wrt to the analytic fn definition
    def get_error(self):
        err = self.u.ComputeL2Error(self.u0)
        return err
    
    # Manually refine the elements in the array elems
    def refine_elems(self, elems):
        self.mesh.GeneralRefinement(mfem.intArray(elems))
        self.fes.Update()
        self.u.Update()
        self.u.ProjectCoefficient(self.u0)
            
    # action is the number of the element to refine
    def step(self, action):
        err1 = self.get_error()
        self.refine_elems([action])
        err2 = self.get_error()
        reward = err1-err2
        done = True
        self.state = self.u.GetDataArray()
        return np.array(self.state), reward, done, {}
    
    # similar to reset, but do not choose a new function
    def reinit(self):
        del self.mesh
        self.mesh = mfem.Mesh(self.mesh0)

        del self.fes
        self.fes = mfem.FiniteElementSpace(self.mesh, self.fec)

        del self.u
        self.u = mfem.GridFunction(self.fes)
        self.u.ProjectCoefficient(self.u0)
        
        self.state = self.u.GetDataArray()
        return np.array(self.state)
    
    # every reset of the env chooses a new synthetic function
    def reset(self):
        self.u0 = self.u0_coeff()
        self.u0.SetParams()
        return self.reinit()
    
    def render(self):
        return GlvisWidget(get_solnstream(self.mesh,self.u))

Instantiate the environment and sanity check it.

In [None]:
env = AMRGame(None)

In [None]:
env.get_ne()

In [None]:
env.get_size()

In [None]:
state, reward, done, info = env.step(0)
reward

Show with refinement of element 0. Then we'll test resetting it to the original state.  We're going to need this to go through a searching for the best actions.

In [None]:
env.render()

In [None]:
env.reinit() # puts the mesh back in the orig state, and sets the DOF vector to u0
env.render()

Ok, try training a policy

In [None]:
os.environ["RAY_PICKLE_VERBOSE_DEBUG"] = "1"
agent = ppo.PPOTrainer(config, env=AMRGame)

In [None]:
%%time
for n in range(1):
    result = agent.train()
    print("episode reward mean: %f " % result["episode_reward_mean"])

In [None]:
policy = agent.get_policy()
model = policy.model
print(model.base_model.summary())

Create a convenience function for applying a policy to a given observation

In [None]:
def apply_policy(model, obs):
    action = agent.compute_action(obs)
    state, reward, done, info = env.step(action)
    #print("policy chooses action %d with reward %f" % (action, reward))
    return action, reward

Brute force search for the best choice by trying each one, remembering to reset the environment after each action and after we're done.

In [None]:
def find_best_el(obs):
    u0 = mfem.Vector(obs)
    maxr = 0.0;
    maxel = -1;
    env.reinit()
    ne = env.get_ne()
    for n in range(ne):
        env.reinit()
        state, reward, done, info = env.step(n)
        if reward > maxr:
            maxr = reward
            maxel = n
    #print("max reward is %f by refining element %d" % (maxr, maxel))
    env.reinit()
    return maxel, maxr

In [None]:
obs = env.reset()
maxel, maxr = find_best_el(obs)
env.refine_elems([maxel])
env.render()

Compare with what the policy does:

In [None]:
env.reinit()
apply_policy(model,obs)
env.render()

Run a more systematic evaluation using an ensemble of samples:

In [None]:
def eval_ensemble(model, ntrials):
    ncorrect = 0
    sumsq = 0
    for n in range(ntrials):
        obs = env.reset()
        bestaction, bestreward = find_best_el(obs)
        action, reward = apply_policy(model,obs)
        err = bestreward-reward
        sumsq += err*err
        if (bestaction == action):
            ncorrect += 1
    rms = math.sqrt(sumsq/ntrials)
    corr = 100.*ncorrect/ntrials
    print("rms error: ",rms,flush=True)
    print("% correct: ",corr,flush=True)
    return rms, corr

eval_ensemble(model, 100)

Run a few eval sample sizes to get a sense of how many are needed to estimate the metrics of the policy

In [None]:
eval_ensemble(model, 200)

In [None]:
eval_ensemble(model, 400)

In [None]:
eval_ensemble(model, 800)

Let's see if the training process is making progress:

In [None]:
total_episodes = 1e6
nbatches = 40
batch_size = total_episodes/nbatches
neval = 200

del agent
config['train_batch_size'] = int(batch_size)
agent = ppo.PPOTrainer(config, env=AMRGame)

rms = [0.0] * nbatches
cor = [0.0] * nbatches
for n in range(nbatches):
    print("training batch %d of size %d" % (n,batch_size))
    agent.train()
    print("evaluating on %d instances..." %  neval)
    rms[n], cor[n] = eval_ensemble(model, neval)

In [None]:
%matplotlib inline
isteps = list(range(nbatches))
asteps = [i*config['train_batch_size'] for i in isteps]
import matplotlib.pyplot as plt
ax = plt.subplot(211)
ax.set_ylim(0.0001,0.1)
ax.set_ylabel('RMS error')
plt.semilogy(asteps,rms[:nbatches], marker='o')
plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
ax = plt.subplot(212)
ax.set_ylim(0,100)
ax.set_ylabel('% correct')
ax.set_xlabel('training episodes')
plt.plot(asteps,cor[:nbatches], marker='o')

plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))

In [None]:
rms