# Init

In [2]:
import gym
from gym import wrappers

import numpy as np
from scipy.spatial import KDTree

# Env Settings

In [4]:
_OBS_DIM = 4
_ACT_DIM = 1
_BIGNUM  = 1e5

# Minimum Viable Components

In [5]:
K    = 3 # ---------------------- Number of neighbors to query
N    = 0 # ---------------------- Number of exemplars
F    = np.zeros( (N,_OBS_DIM) ) # Approximating function
V    = np.zeros( (N,) ) # ------- Action values
A    = np.zeros( (N,_ACT_DIM) ) # Actions (Output)
KDT  = None # ------------------- Spatial tree
eps  = 1.0 # -------------------- Exploration probability
rad  = 0.125 # ------------------ Minimum distance between exemplars (Overwrite radius)
vMar = 0.10 # ------------------- Allowed margin on value estimates

In [None]:
def points_from_indices( pnts, ndcs ):
    """ Get the subset of `pnts` designated by `ndcs` """
    N = len( ndcs )
    P = np.zeros( (N,pnts.shape[1],) )
    for i, idx in enumerate( ndcs ):
        P[i,:] = pnts[idx,:]
    return P

In [None]:
def add_particle( state, action, value, getTree = False ):
    """ Add a new particle to the value function """
    N += 1
    F = np.vstack( (F,state, ) )
    V = V.append( value )
    A = np.vstack( (A,action,) )
    if getTree:
        return N, KDTree( F )
    else:
        return N, None
        

def recalc_spatial_tree():
    """ Recalculate spatial tree """
    KDT = KDTree( F )

    
def get_action_and_value_inv_dist( state ):
    """ Estimate the current optimal action and value for the state """
    if N < 1:
        return None, None
    dists, indcs = KDT.query( state, K )
    fractV = []
    indcsV = []
    for i, d in enumerate( dists ):
        if d < _BIGNUM:
            fractV.append( 1.0/d )
            indcsV.append( indcs[i] )
    fractV = np.array( fractV )
    normD  = np.linalg.norm( fractV )
    fractV = fractV / normD
    rtnAct = np.zeros( (_ACT_DIM,) )
    rtnVal = 0.0
    for i, frac in enumerate( fractV ):
        idx = indcsV[i]
        rtnAct += (F[ idx ] * frac)
        rtnVal += (V[ idx ] * frac)
    return rtnAct, rtnVal
   

def eval_particle( state, action, value ):
    
    # 0. Get our estimate of the value of this state
    estAct, estVal = get_action_and_value_inv_dist( state )
    
    # 1. Find out if there is a particle there
    ndcs = KDT.query_ball_point( F, state, rad )

    # 2. If there is a particle already there and the current value is better, then update
    if len( ndcs ) and (value > estVal):
        index = ndcs[0]
        # fNear = points_from_indices( F, ndcs )
        vNear = points_from_indices( V, ndcs )
        if vNear[0] < value:
            A[index,:] = action
            V[index]   = value
        if len( ndcs ) > 1:
            print( "WARNING: NEARNESS CONSTRAINT VIOLATED" )

    # 3. Elif this is an open space that does NOT estimate the value well
    elif abs(estVal - value) > abs(value * vMar):
        add_particle( state, action, value )
        recalc_spatial_tree()
    # Else this is an open space that predicts the value well, No update!
    # N. Return the current number of particles in the estimator
    return N
    
            