From 07fd8096da7ebbccf1a429441f9cdc7f0beca71a Mon Sep 17 00:00:00 2001
From: Chris Olivier <cjolivier01@gmail.com>
Date: Fri, 30 Mar 2018 17:57:39 -0700
Subject: [PATCH] fix crash when profiler not enabled (#10306)

* fix crash when profiler not enabled

* fix

* Update graph_executor.cc

* Update graph_executor.cc

* use nosetests to try and prevent thehang

* shutdown after GPU pass

* remove temp

* remove temp
---
 src/engine/threaded_engine.h          |  3 +-
 src/executor/graph_executor.cc        |  4 ++-
 tests/python/gpu/test_operator_gpu.py | 43 ++++++++++++++-------------
 tests/python/gpu/test_tvm_bridge.py   |  3 +-
 4 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h
index d72784d04987..673fa3bf9ebb 100644
--- a/src/engine/threaded_engine.h
+++ b/src/engine/threaded_engine.h
@@ -398,7 +398,8 @@ class ThreadedEngine : public Engine {
   }
 
   int bulk_size() const override {
-    return profiler::Profiler::Get()->AggregateRunning() ? 0 :  BulkStatusStore::Get()->bulk_size;
+    const profiler::Profiler *prof = profiler::Profiler::Get();
+    return (prof && prof->AggregateRunning()) ? 0 :  BulkStatusStore::Get()->bulk_size;
   }
 
   int set_bulk_size(int bulk_size) override {
diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc
index fa5931e5c849..32772f2108d1 100644
--- a/src/executor/graph_executor.cc
+++ b/src/executor/graph_executor.cc
@@ -1348,8 +1348,9 @@ void GraphExecutor::InitOpSegs() {
   // Generate segments based on the graph structure
   bool prefer_bulk_exec_inference = dmlc::GetEnv("MXNET_EXEC_BULK_EXEC_INFERENCE", true);
   // Whether to perform bulk exec for training
+  const profiler::Profiler *prof = profiler::Profiler::Get();
   bool prefer_bulk_exec = dmlc::GetEnv("MXNET_EXEC_BULK_EXEC_TRAIN", 1)
-                          && !profiler::Profiler::Get()->AggregateEnabled();
+                          && (!prof || !prof->AggregateEnabled());
 
   bool is_training = num_forward_nodes_ != total_num_nodes;
 
@@ -1362,6 +1363,7 @@ void GraphExecutor::InitOpSegs() {
   }
 }
 
+
 void GraphExecutor::BulkTrainingOpSegs(size_t total_num_nodes) {
   // The maximum number of node in a segment executed in bulk
   size_t num_nodes_threshold = dmlc::GetEnv("MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN", 15);
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index cb422e2263af..198704156be7 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -904,81 +904,81 @@ def test_1d_pooling(pool_type):
         kernel = (4,)
         pad = (2,)
         stride = (2,)
-    
+
         ctx_list = []
         sym_list = []
-    
+
         pooling_convention = 'valid'
-    
+
         ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
-    
+
         check_consistency(sym_list, ctx_list)
-    
+
     def test_2d_pooling(pool_type):
         data = (2, 3, 20, 20)
         kernel = (4, 4)
         pad = (2, 2)
         stride = (2, 2)
-    
+
         ctx_list = []
         sym_list = []
-    
+
         pooling_convention = 'valid'
-    
+
         ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling_v1(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling_v1(kernel=kernel, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
-    
+
         ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
         sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
                                        pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
-    
+
         check_consistency(sym_list, ctx_list)
 
     test_1d_pooling('max')
@@ -1784,3 +1784,4 @@ def test_kernel_error_checking():
 if __name__ == '__main__':
     import nose
     nose.runmodule()
+
diff --git a/tests/python/gpu/test_tvm_bridge.py b/tests/python/gpu/test_tvm_bridge.py
index 69a713d6a282..4b1105a0585c 100644
--- a/tests/python/gpu/test_tvm_bridge.py
+++ b/tests/python/gpu/test_tvm_bridge.py
@@ -62,4 +62,5 @@ def check(target, dtype):
 
 
 if __name__ == "__main__":
-    test_tvm_bridge()
+    import nose
+    nose.runmodule()