Revised how policies are stored, created, etc. for MDPs.

kylewray · Jun 5, 2016 · f027ab6 · f027ab6
1 parent 55b6955
commit f027ab6
Show file tree

Hide file tree

Showing 32 changed files with 224 additions and 249 deletions.
diff --git a/include/nova/error_codes.h b/include/nova/error_codes.h
@@ -41,10 +41,11 @@ namespace nova {
 #define NOVA_ERROR_KERNEL_EXECUTION                 7
 #define NOVA_ERROR_DEVICE_SYNCHRONIZE               8
 
-// Other warnings which are possible during run time.
-#define NOVA_WARNING_INVALID_BELIEF                 9
-#define NOVA_CONVERGED                              10
+// Other results, warnings, or errors which are possible during run time.
+#define NOVA_CONVERGED                              9
+#define NOVA_WARNING_INVALID_BELIEF                 10
 #define NOVA_ERROR_OUT_OF_MEMORY                    11
+#define NOVA_ERROR_POLICY_CREATION                  12
 
 };
 

diff --git a/include/nova/mdp/algorithms/mdp_vi_cpu.h b/include/nova/mdp/algorithms/mdp_vi_cpu.h
@@ -61,10 +61,10 @@ extern "C" int mdp_vi_initialize_cpu(const MDP *mdp, MDPVICPU *vi);
  *  Step 2/3: Execute VI for the MDP model specified.
  *  @param  mdp         The MDP object.
  *  @param  vi          The MDPVICPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy);
+extern "C" int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy);
 
 /**
  *  Step 3/3: The uninitialization step of VI. This sets up the V and pi variables.
@@ -87,10 +87,10 @@ extern "C" int mdp_vi_update_cpu(const MDP *mdp, MDPVICPU *vi);
  *  the corresponding actions at each state (pi).
  *  @param  mdp         The MDP object.
  *  @param  vi          The MDPVICPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy);
+extern "C" int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy);
 
 };
 

diff --git a/include/nova/mdp/algorithms/mdp_vi_gpu.h b/include/nova/mdp/algorithms/mdp_vi_gpu.h
@@ -63,10 +63,10 @@ extern "C" int mdp_vi_initialize_gpu(const MDP *mdp, MDPVIGPU *vi);
  *  Step 2/3: Execute VI for the MDP model specified.
  *  @param  mdp         The MDP object.
  *  @param  vi          The MDPVIGPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int mdp_vi_execute_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *&policy);
+extern "C" int mdp_vi_execute_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *policy);
 
 /**
  *  Step 3/3: The uninitialization step of VI. This sets up the V and pi variables.
@@ -89,10 +89,10 @@ extern "C" int mdp_vi_update_gpu(const MDP *mdp, MDPVIGPU *vi);
  *  the corresponding actions at each state (pi).
  *  @param  mdp         The MDP object.
  *  @param  vi          The MDPVIGPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int mdp_vi_get_policy_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *&policy);
+extern "C" int mdp_vi_get_policy_gpu(const MDP *mdp, MDPVIGPU *vi, MDPValueFunction *policy);
 
 };
 

diff --git a/include/nova/mdp/algorithms/ssp_lao_star_cpu.h b/include/nova/mdp/algorithms/ssp_lao_star_cpu.h
@@ -62,10 +62,10 @@ extern "C" int ssp_lao_star_initialize_cpu(const MDP *mdp, SSPLAOStarCPU *lao);
  *  Note we assume the rewards R are all positive costs or 0 for goal states.
  *  @param  mdp         The MDP object.
  *  @param  lao         The SSPLAOStarCPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int ssp_lao_star_execute_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *&policy);
+extern "C" int ssp_lao_star_execute_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *policy);
 
 /**
  *  Step 3/3: The uninitialization step of LAO*. This sets up the V and pi variables.
@@ -91,10 +91,10 @@ extern "C" int ssp_lao_star_update_cpu(const MDP *mdp, SSPLAOStarCPU *lao);
  *  Note we assume the rewards R are all positive costs or 0 for goal states.
  *  @param  mdp         The MDP object.
  *  @param  lao         The SSPLAOStarCPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int ssp_lao_star_get_policy_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *&policy);
+extern "C" int ssp_lao_star_get_policy_cpu(const MDP *mdp, SSPLAOStarCPU *lao, MDPValueFunction *policy);
 
 };
 

diff --git a/include/nova/mdp/algorithms/ssp_rtdp_cpu.h b/include/nova/mdp/algorithms/ssp_rtdp_cpu.h
@@ -68,10 +68,10 @@ extern "C" int ssp_rtdp_initialize_cpu(const MDP *mdp, SSPRTDPCPU *rtdp);
  *  assumes that the goal can be reached with non-zero probability from all states.
  *  @param  mdp         The MDP object.
  *  @param  rtdp        The SSPRTDPCPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int ssp_rtdp_execute_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *&policy);
+extern "C" int ssp_rtdp_execute_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *policy);
 
 /**
  *  Step 3/3: The uninitialization step of RTDP. This sets up the V and pi variables.
@@ -100,10 +100,10 @@ extern "C" int ssp_rtdp_update_cpu(const MDP *mdp, SSPRTDPCPU *rtdp);
  *  Note we assume the rewards R are all positive costs or 0 for goal states.
  *  @param  mdp         The MDP object.
  *  @param  rtdp        The SSPRTDPCPU object containing algorithm variables.
- *  @param  policy      The resulting value function policy. This will be created and modified.
+ *  @param  policy      The resulting value function policy. This will be modified.
  *  @return Returns zero upon success, non-zero otherwise.
  */
-extern "C" int ssp_rtdp_get_policy_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *&policy);
+extern "C" int ssp_rtdp_get_policy_cpu(const MDP *mdp, SSPRTDPCPU *rtdp, MDPValueFunction *policy);
 
 };
 

diff --git a/include/nova/mdp/policies/mdp_value_function.h b/include/nova/mdp/policies/mdp_value_function.h
@@ -39,7 +39,7 @@ namespace nova {
  *  @param  n   The number of states in the MDP.
  *  @param  m   The number of actions in the MDP.
  *  @param  r   The number of relevant states in the solution. If r == 0,
- *              then all states are used.
+ *              then all states are used, and S is null.
  *  @param  S   The set of relevant states (r array). If this r == 0,
  *              then this is null, and V and pi are n arrays.
  *  @param  V   The values of the relevant states (r array or n array).
@@ -54,6 +54,16 @@ typedef struct NovaMDPValueFunction {
     unsigned int *pi;
 } MDPValueFunction;
 
+/**
+ *  Assign variables and allocate the memory *only* for the policy's internal arrays given the parameters.
+ *  @param  n   The number of states.
+ *  @param  m   The number of actions.
+ *  @param  r   Optionally define the number of relevant states (r <= n). If r == 0, then all states are used.
+ *  @return Returns zero upon success, non-zero otherwise.
+ */
+extern "C" int mdp_value_function_initialize(MDPValueFunction *policy,
+        unsigned int n, unsigned int m, unsigned int r);
+
 /**
  *  Free the memory for *only* the policy's internal arrays.
  *  @param  policy  The resultant value function. Arrays within will be freed.

diff --git a/python/nova/mdp_vi.py b/python/nova/mdp_vi.py
@@ -89,14 +89,14 @@ def solve(self):
                 The MDPValueFunction policy solution to the MDP.
         """
 
-        policy = ct.POINTER(mvf.MDPValueFunction)()
+        policy = mvf.MDPValueFunction()
 
-        result = nmvi._nova.mdp_vi_execute_cpu(self.mdpPtr, self, ct.byref(policy))
+        result = nmvi._nova.mdp_vi_execute_cpu(self.mdpPtr, self, policy)
         if result != 0:
             print("Failed to execute the 'nova' library's CPU MDP solver.")
             raise Exception()
 
-        return policy.contents
+        return policy
 
     def __str__(self):
         """ Return the string of the MDP value iteration.
@@ -178,14 +178,14 @@ def solve(self):
                 The MDPValueFunction policy solution to the MDP.
         """
 
-        policy = ct.POINTER(mvf.MDPValueFunction)()
+        policy = mvf.MDPValueFunction()
 
-        result = nmvi._nova.mdp_vi_execute_gpu(self.mdpPtr, self, ct.byref(policy))
+        result = nmvi._nova.mdp_vi_execute_gpu(self.mdpPtr, self, policy)
         if result != 0:
             print("Failed to execute the 'nova' library's GPU MDP solver.")
             raise Exception()
 
-        return policy.contents
+        return policy
 
     def __str__(self):
         """ Return the string of the MDP value iteration.

diff --git a/python/nova/nova_mdp_value_function.py b/python/nova/nova_mdp_value_function.py
@@ -52,6 +52,10 @@ class NovaMDPValueFunction(ct.Structure):
 
 
 # Functions from 'mdp_value_function.h'.
+_nova.mdp_value_function_initialize.argtypes = (ct.POINTER(NovaMDPValueFunction),
+                                                ct.c_uint,      # n
+                                                ct.c_uint,      # m
+                                                ct.c_uint)      # r
 _nova.mdp_value_function_uninitialize.argtypes = tuple([ct.POINTER(NovaMDPValueFunction)])
 
 
diff --git a/python/nova/nova_mdp_vi.py b/python/nova/nova_mdp_vi.py
@@ -60,7 +60,7 @@ class NovaMDPValueIterationCPU(ct.Structure):
 
 _nova.mdp_vi_execute_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                     ct.POINTER(NovaMDPValueIterationCPU),
-                                    ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                    ct.POINTER(mvf.MDPValueFunction))
 
 _nova.mdp_vi_uninitialize_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                     ct.POINTER(NovaMDPValueIterationCPU))
@@ -70,7 +70,7 @@ class NovaMDPValueIterationCPU(ct.Structure):
 
 _nova.mdp_vi_get_policy_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                         ct.POINTER(NovaMDPValueIterationCPU),
-                                        ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                        ct.POINTER(mvf.MDPValueFunction))
 
 
 class NovaMDPValueIterationGPU(ct.Structure):
@@ -90,7 +90,7 @@ class NovaMDPValueIterationGPU(ct.Structure):
 
 _nova.mdp_vi_execute_gpu.argtypes = (ct.POINTER(mdp.MDP),
                                     ct.POINTER(NovaMDPValueIterationGPU),
-                                    ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                    ct.POINTER(mvf.MDPValueFunction))
 
 _nova.mdp_vi_uninitialize_gpu.argtypes = (ct.POINTER(mdp.MDP),
                                     ct.POINTER(NovaMDPValueIterationGPU))
@@ -100,5 +100,5 @@ class NovaMDPValueIterationGPU(ct.Structure):
 
 _nova.mdp_vi_get_policy_gpu.argtypes = (ct.POINTER(mdp.MDP),
                                         ct.POINTER(NovaMDPValueIterationCPU),
-                                        ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                        ct.POINTER(mvf.MDPValueFunction))
 
diff --git a/python/nova/nova_pomdp_alpha_vectors.py b/python/nova/nova_pomdp_alpha_vectors.py
@@ -51,6 +51,10 @@ class NovaPOMDPAlphaVectors(ct.Structure):
 
 
 # Functions from 'pomdp_alpha_vectors.h'.
+_nova.pomdp_alpha_vectors_initialize.argtypes = (ct.POINTER(NovaPOMDPAlphaVectors),
+                                                 ct.c_uint,      # n
+                                                 ct.c_uint,      # m
+                                                 ct.c_uint)      # r
 _nova.pomdp_alpha_vectors_value_and_action.argtypes = (ct.POINTER(NovaPOMDPAlphaVectors),
                                         ct.POINTER(ct.c_float),                 # b
                                         ct.POINTER(ct.c_float),                 # Vb

diff --git a/python/nova/nova_ssp_lao_star.py b/python/nova/nova_ssp_lao_star.py
@@ -59,7 +59,7 @@ class NovaSSPLAOStarCPU(ct.Structure):
 
 _nova.ssp_lao_star_execute_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                            ct.POINTER(NovaSSPLAOStarCPU),
-                                           ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                           ct.POINTER(mvf.MDPValueFunction))
 
 _nova.ssp_lao_star_uninitialize_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                                 ct.POINTER(NovaSSPLAOStarCPU))
@@ -69,6 +69,6 @@ class NovaSSPLAOStarCPU(ct.Structure):
 
 _nova.ssp_lao_star_get_policy_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                               ct.POINTER(NovaSSPLAOStarCPU),
-                                              ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                              ct.POINTER(mvf.MDPValueFunction))
 
 
diff --git a/python/nova/nova_ssp_rtdp.py b/python/nova/nova_ssp_rtdp.py
@@ -61,7 +61,7 @@ class NovaSSPRTDPCPU(ct.Structure):
 
 _nova.ssp_rtdp_execute_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                        ct.POINTER(NovaSSPRTDPCPU),
-                                       ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                       ct.POINTER(mvf.MDPValueFunction))
 
 _nova.ssp_rtdp_uninitialize_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                             ct.POINTER(NovaSSPRTDPCPU))
@@ -71,7 +71,7 @@ class NovaSSPRTDPCPU(ct.Structure):
 
 _nova.ssp_rtdp_get_policy_cpu.argtypes = (ct.POINTER(mdp.MDP),
                                           ct.POINTER(NovaSSPRTDPCPU),
-                                          ct.POINTER(ct.POINTER(mvf.MDPValueFunction)))
+                                          ct.POINTER(mvf.MDPValueFunction))
 
 
 
diff --git a/python/nova/ssp_lao_star.py b/python/nova/ssp_lao_star.py
@@ -83,14 +83,14 @@ def solve(self):
                 The MDPValueFunction policy solution to the SSP MDP.
         """
 
-        policy = ct.POINTER(mvf.MDPValueFunction)()
+        policy = mvf.MDPValueFunction()
 
         result = nsls._nova.ssp_lao_star_execute_cpu(self.mdpPtr, self, ct.byref(policy))
         if result != 0:
             print("Failed to execute the 'nova' library's CPU LAO* solver.")
             raise Exception()
 
-        return policy.contents
+        return policy
 
     def __str__(self):
         """ Return the string of the SSP LAO* algorithm.

diff --git a/python/nova/ssp_rtdp.py b/python/nova/ssp_rtdp.py
@@ -85,14 +85,14 @@ def solve(self):
                 The MDPValueFunction policy solution to the SSP MDP.
         """
 
-        policy = ct.POINTER(mvf.MDPValueFunction)()
+        policy = mvf.MDPValueFunction()
 
         result = nsr._nova.ssp_rtdp_execute_cpu(self.mdpPtr, self, ct.byref(policy))
         if result != 0:
             print("Failed to execute the 'nova' library's CPU RTDP solver.")
             raise Exception()
 
-        return policy.contents
+        return policy
 
     def __str__(self):
         """ Return the string of the SSP RTDP algorithm.

diff --git a/src/mdp/algorithms/mdp_vi_cpu.cpp b/src/mdp/algorithms/mdp_vi_cpu.cpp
@@ -27,6 +27,7 @@
 #include <stdio.h>
 #include <cstring>
 
+#include <nova/mdp/policies/mdp_value_function.h>
 #include <nova/error_codes.h>
 #include <nova/constants.h>
 
@@ -97,13 +98,13 @@ int mdp_vi_initialize_cpu(const MDP *mdp, MDPVICPU *vi)
 }
 
 
-int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy)
+int mdp_vi_execute_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy)
 {
     // First, ensure data is valid.
     if (mdp == nullptr || mdp->n == 0 || mdp->ns == 0 || mdp->m == 0 ||
             mdp->S == nullptr || mdp->T == nullptr || mdp->R == nullptr ||
             mdp->gamma < 0.0f || mdp->gamma > 1.0f || mdp->horizon < 1 ||
-            vi == nullptr || policy != nullptr) {
+            vi == nullptr || policy == nullptr) {
         fprintf(stderr, "Error[mdp_vi_execute_cpu]: %s\n", "Invalid arguments.");
         return NOVA_ERROR_INVALID_DATA;
     }
@@ -195,23 +196,19 @@ int mdp_vi_update_cpu(const MDP *mdp, MDPVICPU *vi)
 }
 
 
-int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *&policy)
+int mdp_vi_get_policy_cpu(const MDP *mdp, MDPVICPU *vi, MDPValueFunction *policy)
 {
-    if (mdp == nullptr || vi == nullptr || policy != nullptr) {
-        fprintf(stderr, "Error[mdp_vi_get_policy_cpu]: %s\n",
-                        "Invalid arguments. The policy must be undefined.");
+    if (mdp == nullptr || vi == nullptr || policy == nullptr) {
+        fprintf(stderr, "Error[mdp_vi_get_policy_cpu]: %s\n", "Invalid arguments.");
         return NOVA_ERROR_INVALID_DATA;
     }
 
-    policy = new MDPValueFunction();
-
-    policy->n = mdp->n;
-    policy->m = mdp->m;
-    policy->r = 0;
-
-    policy->S = nullptr;
-    policy->V = new float[mdp->n];
-    policy->pi = new unsigned int[mdp->n];
+    // Initialize the policy, which allocates memory.
+    int result = mdp_value_function_initialize(policy, mdp->n, mdp->m, 0);
+    if (result != NOVA_SUCCESS) {
+        fprintf(stderr, "Error[mdp_vi_get_policy_cpu]: %s\n", "Could not create the policy.");
+        return NOVA_ERROR_POLICY_CREATION;
+    }
 
     // Copy the final (or intermediate) result, both V and pi. This assumes memory has been allocated
     // for the variables provided.