Permalink
Browse files

Adding streams to startTiming/stopTiming calls

  • Loading branch information...
1 parent 9acc49d commit afa07afd8985b062942fa8b3496c61ab3d919082 @harrism harrism committed May 8, 2012
Showing with 38 additions and 38 deletions.
  1. +2 −2 runtime/src/dustFunctions.cpp
  2. +32 −32 runtime/src/gpu_iterate.cpp
  3. +4 −4 runtime/src/sort_bodies_gpu.cpp
@@ -88,7 +88,7 @@ void octree::sort_dust(tree_structure &tree)
{
if(tree.n_dust == 0) return;
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
//Start reduction to get the boundary's of the dust
boundaryReduction.set_arg<int>(0, &tree.n_dust);
@@ -240,7 +240,7 @@ void octree::sort_dust(tree_structure &tree)
- devContext.stopTiming("DustSortReorder", 0);
+ devContext.stopTiming("DustSortReorder", -1, execStream->s());
}
@@ -258,9 +258,9 @@ bool octree::iterate_once(IterationData &idata) {
}
//predict localtree
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
predict(this->localTree);
- devContext.stopTiming("Predict", 9);
+ devContext.stopTiming("Predict", 9, execStream->s());
#ifdef USE_DUST
//Predict, sort and set properties
@@ -285,13 +285,13 @@ bool octree::iterate_once(IterationData &idata) {
//domains
t1 = get_time();
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
gpu_updateDomainDistribution(idata.lastGravTime);
- devContext.stopTiming("DomainUpdate", 6);
+ devContext.stopTiming("DomainUpdate", 6, execStream->s());
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
gpuRedistributeParticles();
- devContext.stopTiming("Exchange", 6);
+ devContext.stopTiming("Exchange", 6, execStream->s());
needDomainUpdate = false;
@@ -325,22 +325,22 @@ bool octree::iterate_once(IterationData &idata) {
//Rebuild the tree
this->sort_bodies(this->localTree, needDomainUpdate);
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
this->build(this->localTree);
- devContext.stopTiming("Tree-construction", 2);
+ devContext.stopTiming("Tree-construction", 2, execStream->s());
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
this->allocateTreePropMemory(this->localTree);
- devContext.stopTiming("Memory", 11);
+ devContext.stopTiming("Memory", 11, execStream->s());
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
this->compute_properties(this->localTree);
- devContext.stopTiming("Compute-properties", 3);
+ devContext.stopTiming("Compute-properties", 3, execStream->s());
#ifdef DO_BLOCK_TIMESTEP
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
setActiveGrpsFunc(this->localTree);
- devContext.stopTiming("setActiveGrpsFunc", 10);
+ devContext.stopTiming("setActiveGrpsFunc", 10, execStream->s());
idata.Nact_since_last_tree_rebuild = 0;
#endif
@@ -359,9 +359,9 @@ bool octree::iterate_once(IterationData &idata) {
else
{
//Dont rebuild only update the current boxes
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
this->compute_properties(this->localTree);
- devContext.stopTiming("Compute-properties", 3);
+ devContext.stopTiming("Compute-properties", 3, execStream->s());
#ifdef USE_DUST
setDustGroupProperties(this->localTree);
@@ -371,17 +371,17 @@ bool octree::iterate_once(IterationData &idata) {
//Approximate gravity
t1 = get_time();
- devContext.startTiming();
+ devContext.startTiming(gravStream->s());
approximate_gravity(this->localTree);
- devContext.stopTiming("Approximation", 4);
+ devContext.stopTiming("Approximation", 4, gravStream->s());
if(nProcs > 1) makeLET();
#ifdef USE_DUST
- devContext.startTiming();
+ devContext.startTiming(gravStream->s());
approximate_dust(this->localTree);
- devContext.stopTiming("Approximation_dust", 4);
+ devContext.stopTiming("Approximation_dust", 4, gravStream->s());
#endif
gravStream->sync();
@@ -394,9 +394,9 @@ bool octree::iterate_once(IterationData &idata) {
LOGF(stderr, "APPTIME [%d]: Iter: %d\t%g \n", procId, iter, idata.lastGravTime);
//Corrector
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
correct(this->localTree);
- devContext.stopTiming("Correct", 8);
+ devContext.stopTiming("Correct", 8, execStream->s());
#ifdef USE_DUST
@@ -407,19 +407,19 @@ bool octree::iterate_once(IterationData &idata) {
if(nProcs > 1)
{
t1 = get_time();
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
mpiSync();
- devContext.stopTiming("Unbalance", 12);
+ devContext.stopTiming("Unbalance", 12, execStream->s());
idata.lastWaitTime += get_time() - t1;
idata.totalWaitTime += idata.lastWaitTime;
}
idata.Nact_since_last_tree_rebuild += this->localTree.n_active_particles;
//Compute energies
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
double de = compute_energies(this->localTree); de=de;
- devContext.stopTiming("Energy", 7);
+ devContext.stopTiming("Energy", 7, execStream->s());
if(snapshotIter > 0)
{
@@ -497,9 +497,9 @@ void octree::iterate_setup(IterationData &idata) {
t1 = get_time();
//Approximate gravity
- devContext.startTiming();
+ devContext.startTiming(gravStream->s());
approximate_gravity(this->localTree);
- devContext.stopTiming("Approximation", 4);
+ devContext.stopTiming("Approximation", 4, gravStream->s());
#ifdef USE_DUST
//Sort the dust
@@ -512,9 +512,9 @@ void octree::iterate_setup(IterationData &idata) {
//Set the group properties of dust
setDustGroupProperties(this->localTree);
- devContext.startTiming();
+ devContext.startTiming(gravStream->s());
approximate_dust(this->localTree);
- devContext.stopTiming("Approximatin_dust", 4);
+ devContext.stopTiming("Approximatin_dust", 4, gravStream->s());
//Correct
correctDustStep(this->localTree);
#endif
@@ -894,9 +894,9 @@ void octree::approximate_gravity_let(tree_structure &tree, tree_structure &remot
remoteTree.fullRemoteTree.h2d(bufferSize); //Only copy required data
tree.activePartlist.zeroMem();
-// devContext.startTiming();
+// devContext.startTiming(gravStream->s());
approxGravLET.execute(gravStream->s());
-// devContext.stopTiming("Approximation_let", 5);
+// devContext.stopTiming("Approximation_let", 5, gravStream->s());
letRunning = true;
@@ -99,7 +99,7 @@ void octree::getBoundariesGroups(tree_structure &tree, real4 &r_min, real4 &r_ma
void octree::sort_bodies(tree_structure &tree, bool doDomainUpdate) {
//We assume the bodies are already onthe GPU
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
real4 r_min = {+1e10, +1e10, +1e10, +1e10};
real4 r_max = {-1e10, -1e10, -1e10, -1e10};
@@ -165,10 +165,10 @@ void octree::sort_bodies(tree_structure &tree, bool doDomainUpdate) {
// are preserved, if they are the same srcValues will be overwritten
gpuSort(devContext, srcValues, tree.bodies_key,srcValues, tree.n, 32, 3, tree);
- devContext.stopTiming("Sorting", 0);
+ devContext.stopTiming("Sorting", 0, execStream->s());
//Call the reorder data functions
- devContext.startTiming();
+ devContext.startTiming(execStream->s());
static int oneRunFull = 0;
@@ -309,7 +309,7 @@ void octree::sort_bodies(tree_structure &tree, bool doDomainUpdate) {
} //end if
- devContext.stopTiming("Data-reordering", 1);
+ devContext.stopTiming("Data-reordering", 1, execStream->s());
}

0 comments on commit afa07af

Please sign in to comment.