Skip to content

Commit

Permalink
Revised how policies are stored, created, etc. for MDPs.
Browse files Browse the repository at this point in the history
  • Loading branch information
kylewray committed Jun 5, 2016
1 parent 55b6955 commit f027ab6
Show file tree
Hide file tree
Showing 32 changed files with 224 additions and 249 deletions.
7 changes: 4 additions & 3 deletions include/nova/error_codes.h
Expand Up @@ -41,10 +41,11 @@ namespace nova {
#define NOVA_ERROR_KERNEL_EXECUTION 7
#define NOVA_ERROR_DEVICE_SYNCHRONIZE 8

// Other warnings which are possible during run time.
#define NOVA_WARNING_INVALID_BELIEF 9
#define NOVA_CONVERGED 10
// Other results, warnings, or errors which are possible during run time.
#define NOVA_CONVERGED 9
#define NOVA_WARNING_INVALID_BELIEF 10
#define NOVA_ERROR_OUT_OF_MEMORY 11
#define NOVA_ERROR_POLICY_CREATION 12

};

Expand Down
8 changes: 4 additions & 4 deletions include/nova/mdp/algorithms/mdp_vi_cpu.h
Expand Up @@ -61,10 +61,10 @@ extern "C" int mdp_vi_initialize_cpu(const MDP *mdp, MDPVICPU *vi);
* Step 2/3: Execute VI for the MDP model specified.
* @param mdp The MDP object.
* @param vi The MDPVICPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy);
extern "C" int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy);

/**
* Step 3/3: The uninitialization step of VI. This sets up the V and pi variables.
Expand All @@ -87,10 +87,10 @@ extern "C" int mdp_vi_update_cpu(const MDP *mdp, MDPVICPU *vi);
* the corresponding actions at each state (pi).
* @param mdp The MDP object.
* @param vi The MDPVICPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy);
extern "C" int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy);

};

Expand Down
8 changes: 4 additions & 4 deletions include/nova/mdp/algorithms/mdp_vi_gpu.h
Expand Up @@ -63,10 +63,10 @@ extern "C" int mdp_vi_initialize_gpu(const MDP *mdp, MDPVIGPU *vi);
* Step 2/3: Execute VI for the MDP model specified.
* @param mdp The MDP object.
* @param vi The MDPVIGPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int mdp_vi_execute_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *&policy);
extern "C" int mdp_vi_execute_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *policy);

/**
* Step 3/3: The uninitialization step of VI. This sets up the V and pi variables.
Expand All @@ -89,10 +89,10 @@ extern "C" int mdp_vi_update_gpu(const MDP *mdp, MDPVIGPU *vi);
* the corresponding actions at each state (pi).
* @param mdp The MDP object.
* @param vi The MDPVIGPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int mdp_vi_get_policy_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *&policy);
extern "C" int mdp_vi_get_policy_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *policy);

};

Expand Down
8 changes: 4 additions & 4 deletions include/nova/mdp/algorithms/ssp_lao_star_cpu.h
Expand Up @@ -62,10 +62,10 @@ extern "C" int ssp_lao_star_initialize_cpu(const MDP *mdp, SSPLAOStarCPU *lao);
* Note we assume the rewards R are all positive costs or 0 for goal states.
* @param mdp The MDP object.
* @param lao The SSPLAOStarCPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int ssp_lao_star_execute_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *&policy);
extern "C" int ssp_lao_star_execute_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *policy);

/**
* Step 3/3: The uninitialization step of LAO*. This sets up the V and pi variables.
Expand All @@ -91,10 +91,10 @@ extern "C" int ssp_lao_star_update_cpu(const MDP *mdp, SSPLAOStarCPU *lao);
* Note we assume the rewards R are all positive costs or 0 for goal states.
* @param mdp The MDP object.
* @param lao The SSPLAOStarCPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int ssp_lao_star_get_policy_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *&policy);
extern "C" int ssp_lao_star_get_policy_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *policy);

};

Expand Down
8 changes: 4 additions & 4 deletions include/nova/mdp/algorithms/ssp_rtdp_cpu.h
Expand Up @@ -68,10 +68,10 @@ extern "C" int ssp_rtdp_initialize_cpu(const MDP *mdp, SSPRTDPCPU *rtdp);
* assumes that the goal can be reached with non-zero probability from all states.
* @param mdp The MDP object.
* @param rtdp The SSPRTDPCPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int ssp_rtdp_execute_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *&policy);
extern "C" int ssp_rtdp_execute_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *policy);

/**
* Step 3/3: The uninitialization step of RTDP. This sets up the V and pi variables.
Expand Down Expand Up @@ -100,10 +100,10 @@ extern "C" int ssp_rtdp_update_cpu(const MDP *mdp, SSPRTDPCPU *rtdp);
* Note we assume the rewards R are all positive costs or 0 for goal states.
* @param mdp The MDP object.
* @param rtdp The SSPRTDPCPU object containing algorithm variables.
* @param policy The resulting value function policy. This will be created and modified.
* @param policy The resulting value function policy. This will be modified.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int ssp_rtdp_get_policy_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *&policy);
extern "C" int ssp_rtdp_get_policy_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *policy);

};

Expand Down
12 changes: 11 additions & 1 deletion include/nova/mdp/policies/mdp_value_function.h
Expand Up @@ -39,7 +39,7 @@ namespace nova {
* @param n The number of states in the MDP.
* @param m The number of actions in the MDP.
* @param r The number of relevant states in the solution. If r == 0,
* then all states are used.
* then all states are used, and S is null.
* @param S The set of relevant states (r array). If this r == 0,
* then this is null, and V and pi are n arrays.
* @param V The values of the relevant states (r array or n array).
Expand All @@ -54,6 +54,16 @@ typedef struct NovaMDPValueFunction {
unsigned int *pi;
} MDPValueFunction;

/**
* Assign variables and allocate the memory *only* for the policy's internal arrays given the parameters.
* @param n The number of states.
* @param m The number of actions.
* @param r Optionally define the number of relevant states (r <= n). If r == 0, then all states are used.
* @return Returns zero upon success, non-zero otherwise.
*/
extern "C" int mdp_value_function_initialize(MDPValueFunction *policy,
unsigned int n, unsigned int m, unsigned int r);

/**
* Free the memory for *only* the policy's internal arrays.
* @param policy The resultant value function. Arrays within will be freed.
Expand Down
12 changes: 6 additions & 6 deletions python/nova/mdp_vi.py
Expand Up @@ -89,14 +89,14 @@ def solve(self):
The MDPValueFunction policy solution to the MDP.
"""

policy = ct.POINTER(mvf.MDPValueFunction)()
policy = mvf.MDPValueFunction()

result = nmvi._nova.mdp_vi_execute_cpu(self.mdpPtr, self, ct.byref(policy))
result = nmvi._nova.mdp_vi_execute_cpu(self.mdpPtr, self, policy)
if result != 0:
print("Failed to execute the 'nova' library's CPU MDP solver.")
raise Exception()

return policy.contents
return policy

def __str__(self):
""" Return the string of the MDP value iteration.
Expand Down Expand Up @@ -178,14 +178,14 @@ def solve(self):
The MDPValueFunction policy solution to the MDP.
"""

policy = ct.POINTER(mvf.MDPValueFunction)()
policy = mvf.MDPValueFunction()

result = nmvi._nova.mdp_vi_execute_gpu(self.mdpPtr, self, ct.byref(policy))
result = nmvi._nova.mdp_vi_execute_gpu(self.mdpPtr, self, policy)
if result != 0:
print("Failed to execute the 'nova' library's GPU MDP solver.")
raise Exception()

return policy.contents
return policy

def __str__(self):
""" Return the string of the MDP value iteration.
Expand Down
4 changes: 4 additions & 0 deletions python/nova/nova_mdp_value_function.py
Expand Up @@ -52,6 +52,10 @@ class NovaMDPValueFunction(ct.Structure):


# Functions from 'mdp_value_function.h'.
_nova.mdp_value_function_initialize.argtypes = (ct.POINTER(NovaMDPValueFunction),
ct.c_uint, # n
ct.c_uint, # m
ct.c_uint) # r
_nova.mdp_value_function_uninitialize.argtypes = tuple([ct.POINTER(NovaMDPValueFunction)])


8 changes: 4 additions & 4 deletions python/nova/nova_mdp_vi.py
Expand Up @@ -60,7 +60,7 @@ class NovaMDPValueIterationCPU(ct.Structure):

_nova.mdp_vi_execute_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaMDPValueIterationCPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))

_nova.mdp_vi_uninitialize_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaMDPValueIterationCPU))
Expand All @@ -70,7 +70,7 @@ class NovaMDPValueIterationCPU(ct.Structure):

_nova.mdp_vi_get_policy_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaMDPValueIterationCPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))


class NovaMDPValueIterationGPU(ct.Structure):
Expand All @@ -90,7 +90,7 @@ class NovaMDPValueIterationGPU(ct.Structure):

_nova.mdp_vi_execute_gpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaMDPValueIterationGPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))

_nova.mdp_vi_uninitialize_gpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaMDPValueIterationGPU))
Expand All @@ -100,5 +100,5 @@ class NovaMDPValueIterationGPU(ct.Structure):

_nova.mdp_vi_get_policy_gpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaMDPValueIterationCPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))

4 changes: 4 additions & 0 deletions python/nova/nova_pomdp_alpha_vectors.py
Expand Up @@ -51,6 +51,10 @@ class NovaPOMDPAlphaVectors(ct.Structure):


# Functions from 'pomdp_alpha_vectors.h'.
_nova.pomdp_alpha_vectors_initialize.argtypes = (ct.POINTER(NovaPOMDPAlphaVectors),
ct.c_uint, # n
ct.c_uint, # m
ct.c_uint) # r
_nova.pomdp_alpha_vectors_value_and_action.argtypes = (ct.POINTER(NovaPOMDPAlphaVectors),
ct.POINTER(ct.c_float), # b
ct.POINTER(ct.c_float), # Vb
Expand Down
4 changes: 2 additions & 2 deletions python/nova/nova_ssp_lao_star.py
Expand Up @@ -59,7 +59,7 @@ class NovaSSPLAOStarCPU(ct.Structure):

_nova.ssp_lao_star_execute_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaSSPLAOStarCPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))

_nova.ssp_lao_star_uninitialize_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaSSPLAOStarCPU))
Expand All @@ -69,6 +69,6 @@ class NovaSSPLAOStarCPU(ct.Structure):

_nova.ssp_lao_star_get_policy_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaSSPLAOStarCPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))


4 changes: 2 additions & 2 deletions python/nova/nova_ssp_rtdp.py
Expand Up @@ -61,7 +61,7 @@ class NovaSSPRTDPCPU(ct.Structure):

_nova.ssp_rtdp_execute_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaSSPRTDPCPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))

_nova.ssp_rtdp_uninitialize_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaSSPRTDPCPU))
Expand All @@ -71,7 +71,7 @@ class NovaSSPRTDPCPU(ct.Structure):

_nova.ssp_rtdp_get_policy_cpu.argtypes = (ct.POINTER(mdp.MDP),
ct.POINTER(NovaSSPRTDPCPU),
ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
ct.POINTER(mvf.MDPValueFunction))



4 changes: 2 additions & 2 deletions python/nova/ssp_lao_star.py
Expand Up @@ -83,14 +83,14 @@ def solve(self):
The MDPValueFunction policy solution to the SSP MDP.
"""

policy = ct.POINTER(mvf.MDPValueFunction)()
policy = mvf.MDPValueFunction()

result = nsls._nova.ssp_lao_star_execute_cpu(self.mdpPtr, self, ct.byref(policy))
if result != 0:
print("Failed to execute the 'nova' library's CPU LAO* solver.")
raise Exception()

return policy.contents
return policy

def __str__(self):
""" Return the string of the SSP LAO* algorithm.
Expand Down
4 changes: 2 additions & 2 deletions python/nova/ssp_rtdp.py
Expand Up @@ -85,14 +85,14 @@ def solve(self):
The MDPValueFunction policy solution to the SSP MDP.
"""

policy = ct.POINTER(mvf.MDPValueFunction)()
policy = mvf.MDPValueFunction()

result = nsr._nova.ssp_rtdp_execute_cpu(self.mdpPtr, self, ct.byref(policy))
if result != 0:
print("Failed to execute the 'nova' library's CPU RTDP solver.")
raise Exception()

return policy.contents
return policy

def __str__(self):
""" Return the string of the SSP RTDP algorithm.
Expand Down
27 changes: 12 additions & 15 deletions src/mdp/algorithms/mdp_vi_cpu.cpp
Expand Up @@ -27,6 +27,7 @@
#include <stdio.h>
#include <cstring>

#include <nova/mdp/policies/mdp_value_function.h>
#include <nova/error_codes.h>
#include <nova/constants.h>

Expand Down Expand Up @@ -97,13 +98,13 @@ int mdp_vi_initialize_cpu(const MDP *mdp, MDPVICPU *vi)
}


int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy)
int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy)
{
// First, ensure data is valid.
if (mdp == nullptr || mdp->n == 0 || mdp->ns == 0 || mdp->m == 0 ||
mdp->S == nullptr || mdp->T == nullptr || mdp->R == nullptr ||
mdp->gamma < 0.0f || mdp->gamma > 1.0f || mdp->horizon < 1 ||
vi == nullptr || policy != nullptr) {
vi == nullptr || policy == nullptr) {
fprintf(stderr, "Error[mdp_vi_execute_cpu]: %s\n", "Invalid arguments.");
return NOVA_ERROR_INVALID_DATA;
}
Expand Down Expand Up @@ -195,23 +196,19 @@ int mdp_vi_update_cpu(const MDP *mdp, MDPVICPU *vi)
}


int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy)
int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy)
{
if (mdp == nullptr || vi == nullptr || policy != nullptr) {
fprintf(stderr, "Error[mdp_vi_get_policy_cpu]: %s\n",
"Invalid arguments. The policy must be undefined.");
if (mdp == nullptr || vi == nullptr || policy == nullptr) {
fprintf(stderr, "Error[mdp_vi_get_policy_cpu]: %s\n", "Invalid arguments.");
return NOVA_ERROR_INVALID_DATA;
}

policy = new MDPValueFunction();

policy->n = mdp->n;
policy->m = mdp->m;
policy->r = 0;

policy->S = nullptr;
policy->V = new float[mdp->n];
policy->pi = new unsigned int[mdp->n];
// Initialize the policy, which allocates memory.
int result = mdp_value_function_initialize(policy, mdp->n, mdp->m, 0);
if (result != NOVA_SUCCESS) {
fprintf(stderr, "Error[mdp_vi_get_policy_cpu]: %s\n", "Could not create the policy.");
return NOVA_ERROR_POLICY_CREATION;
}

// Copy the final (or intermediate) result, both V and pi. This assumes memory has been allocated
// for the variables provided.
Expand Down

0 comments on commit f027ab6

Please sign in to comment.