WARNING: Logging before flag parsing goes to stderr.
W0715 07:44:12.649469 140736213103488 deprecation_wrapper.py:119] From /Users/sam/dev/models/official/utils/logging/hooks.py:26: The name tf.train.SessionRunHook is deprecated. Please use tf.estimator.SessionRunHook instead.

W0715 07:44:12.650149 140736213103488 deprecation_wrapper.py:119] From /Users/sam/dev/models/official/utils/logging/metric_hook.py:26: The name tf.train.LoggingTensorHook is deprecated. Please use tf.estimator.LoggingTensorHook instead.

W0715 07:44:12.650352 140736213103488 deprecation_wrapper.py:119] From wide_deep_test.py:30: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.

W0715 07:44:12.650449 140736213103488 deprecation_wrapper.py:119] From wide_deep_test.py:30: The name tf.logging.ERROR is deprecated. Please use tf.compat.v1.logging.ERROR instead.

Running tests under Python 3.6.5: /Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/bin/python
2019-07-15 07:44:14.645804: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-07-15 07:44:14.707764: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:15.424249: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:15.499332: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:15.534204: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:15.574194: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:16.045289: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:16.175550: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:16.386038: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:16.476929: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_58.json

2019-07-15 07:44:18.329981: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:18.375422: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:18.471081: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:18.512578: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:18.563499: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:19.340853: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:19.523467: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:19.927389: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:20.033841: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_206.json

2019-07-15 07:44:20.083920: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_220.json

2019-07-15 07:44:20.095422: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_222.json

2019-07-15 07:44:20.105715: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_223.json

[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c9_sdk_4) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T3] = =(X_T4[])
// With Index Variables Made Integral:
// X_T5[d0 : _T3] = =(X_T4[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T3] = =(X_T4[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c9_sdk_4(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c9_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T3[od0 : _T2] = =(X_I_2[od0])
// With Index Variables Made Integral:
// X_T3[od0 : _T2] = =(X_I_2[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T3[od0 : _T2] = =(X_I_2[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range      X_T3     X_I_2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c9_sdk_2(
    device void* X_T3_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T3 = static_cast<device ptrdiff_t*>(X_T3_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T3 = agg[0];
  X_T3[0] = LX_T3;
}

[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c9_sdk_4) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T3] = =(X_T4[])
// With Index Variables Made Integral:
// X_T5[d0 : _T3] = =(X_T4[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T3] = =(X_T4[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c9_sdk_4(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c9_sdk_3) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[] = *(X_T3[d1])
// With Index Variables Made Integral:
// X_T4[] = *(X_T3[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T4[] = *(X_T3[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c9_sdk_3(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c9_sdk_5) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T6[d0 : _T4] = =(X_T5[d0])
// With Index Variables Made Integral:
// X_T6[d0 : _T4] = =(X_T5[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T6[d0 : _T4] = =(X_T5[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T6      X_T5  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c9_sdk_5(
    device void* X_T6_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T6 = static_cast<device ptrdiff_t*>(X_T6_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T6 = agg[0];
  X_T6[0] = LX_T6;
}

[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c9_sdk_7) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:16z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T8[1 + d0 : _T6] = =(X_T7[d0])
// With Index Variables Made Integral:
// X_T8[1 + d0 : _T6] = =(X_T7[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T8[1 + d0 : _T6] = =(X_T7[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T8      X_T7  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c9_sdk_7(
    device void* X_T8_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T8 = static_cast<device ptrdiff_t*>(X_T8_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T8 = (X_T8 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T8 = agg[0];
  X_T8[0] = LX_T8;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Compiler::Build> Compilation failure:
Compilation failed: 

program_source:32:7: warning: unused variable 'tid'
  int tid = _tid;
      ^
program_source:134:14: error: call to 'select' is ambiguous
    agg[0] = select((ptrdiff_t)agg[0], (ptrdiff_t)val1, (bool)o0_cond);
             ^~~~~~
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:56:21: note: candidate function
  METAL_FUNC char   select    (char a, char b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:57:21: note: candidate function
  METAL_FUNC uchar  select    (uchar a, uchar b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:58:21: note: candidate function
  METAL_FUNC short  select    (short a, short b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:59:21: note: candidate function
  METAL_FUNC ushort select    (ushort a, ushort b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:60:21: note: candidate function
  METAL_FUNC int    select    (int a, int b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:61:21: note: candidate function
  METAL_FUNC uint   select    (uint a, uint b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:62:21: note: candidate function
  METAL_FUNC half   select    (half a, half b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:63:21: note: candidate function
  METAL_FUNC float  select    (float a, float b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:151:29: note: candidate function
  METAL_FUNC vec<half,2>    select(vec<half,2> a, vec<half,2> b, vec<bool,2> c) { return vec<half,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:152:29: note: candidate function
  METAL_FUNC vec<float,2>   select(vec<float,2> a, vec<float,2> b, vec<bool,2> c) { return vec<float,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:156:29: note: candidate function
  METAL_FUNC vec<half,3>    select(vec<half,3> a, vec<half,3> b, vec<bool,3> c) { return vec<half,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:157:29: note: candidate function
  METAL_FUNC vec<float,3>   select(vec<float,3> a, vec<float,3> b, vec<bool,3> c) { return vec<float,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:161:29: note: candidate function
  METAL_FUNC vec<half,4>    select(vec<half,4> a, vec<half,4> b, vec<bool,4> c) { return vec<half,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:162:29: note: candidate function
  METAL_FUNC vec<float,4>   select(vec<float,4> a, vec<float,4> b, vec<bool,4> c) { return vec<float,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:167:29: note: candidate function
  METAL_FUNC vec<char,2>    select(vec<char,2> a, vec<char,2> b, vec<bool,2> c) { return vec<char,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:168:29: note: candidate function
  METAL_FUNC vec<uchar,2>   select(vec<uchar,2> a, vec<uchar,2> b, vec<bool,2> c) { return vec<uchar,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:169:29: note: candidate function
  METAL_FUNC vec<short,2>   select(vec<short,2> a, vec<short,2> b, vec<bool,2> c) { return vec<short,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:170:29: note: candidate function
  METAL_FUNC vec<ushort,2>  select(vec<ushort,2> a, vec<ushort,2> b, vec<bool,2> c) { return vec<ushort,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:171:29: note: candidate function
  METAL_FUNC vec<int,2>     select(vec<int,2> a, vec<int,2> b, vec<bool,2> c) { return vec<int,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:172:29: note: candidate function
  METAL_FUNC vec<uint,2>    select(vec<uint,2> a, vec<uint,2> b, vec<bool,2> c) { return vec<uint,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:173:29: note: candidate function
  METAL_FUNC vec<char,3>    select(vec<char,3> a, vec<char,3> b, vec<bool,3> c) { return vec<char,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:174:29: note: candidate function
  METAL_FUNC vec<uchar,3>   select(vec<uchar,3> a, vec<uchar,3> b, vec<bool,3> c) { return vec<uchar,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:175:29: note: candidate function
  METAL_FUNC vec<short,3>   select(vec<short,3> a, vec<short,3> b, vec<bool,3> c) { return vec<short,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:176:29: note: candidate function
  METAL_FUNC vec<ushort,3>  select(vec<ushort,3> a, vec<ushort,3> b, vec<bool,3> c) { return vec<ushort,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:177:29: note: candidate function
  METAL_FUNC vec<int,3>     select(vec<int,3> a, vec<int,3> b, vec<bool,3> c) { return vec<int,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:178:29: note: candidate function
  METAL_FUNC vec<uint,3>    select(vec<uint,3> a, vec<uint,3> b, vec<bool,3> c) { return vec<uint,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:179:29: note: candidate function
  METAL_FUNC vec<char,4>    select(vec<char,4> a, vec<char,4> b, vec<bool,4> c) { return vec<char,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:180:29: note: candidate function
  METAL_FUNC vec<uchar,4>   select(vec<uchar,4> a, vec<uchar,4> b, vec<bool,4> c) { return vec<uchar,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:181:29: note: candidate function
  METAL_FUNC vec<short,4>   select(vec<short,4> a, vec<short,4> b, vec<bool,4> c) { return vec<short,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:182:29: note: candidate function
  METAL_FUNC vec<ushort,4>  select(vec<ushort,4> a, vec<ushort,4> b, vec<bool,4> c) { return vec<ushort,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:183:29: note: candidate function
  METAL_FUNC vec<int,4>     select(vec<int,4> a, vec<int,4> b, vec<bool,4> c) { return vec<int,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:184:29: note: candidate function
  METAL_FUNC vec<uint,4>    select(vec<uint,4> a, vec<uint,4> b, vec<bool,4> c) { return vec<uint,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_1) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T3[] = *(X_T2[d1])
// With Index Variables Made Integral:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range      X_T3      X_T2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_1(
    device void* X_T3_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T3 = static_cast<device ptrdiff_t*>(X_T3_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T3 = agg[0];
  X_T3[0] = LX_T3;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_0) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T2[od0 : _T0] = =(X_I_0[od0])
// With Index Variables Made Integral:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range      X_T2     X_I_0  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_0(
    device void* X_T2_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T2 = static_cast<device ptrdiff_t*>(X_T2_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T2 = agg[0];
  X_T2[0] = LX_T2;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_3) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T2] = =(X_T4[d0])
// With Index Variables Made Integral:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_3(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_5) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T7[1 + d0 : _T4] = =(X_T6[d0])
// With Index Variables Made Integral:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T7      X_T6  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_5(
    device void* X_T7_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T7 = static_cast<device ptrdiff_t*>(X_T7_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T7 = (X_T7 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T7 = agg[0];
  X_T7[0] = LX_T7;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_0) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T2[od0 : _T0] = =(X_I_0[od0])
// With Index Variables Made Integral:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range      X_T2     X_I_0  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_0(
    device void* X_T2_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T2 = static_cast<device ptrdiff_t*>(X_T2_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T2 = agg[0];
  X_T2[0] = LX_T2;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_1) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T3[] = *(X_T2[d1])
// With Index Variables Made Integral:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range      X_T3      X_T2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_1(
    device void* X_T3_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T3 = static_cast<device ptrdiff_t*>(X_T3_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T3 = agg[0];
  X_T3[0] = LX_T3;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_3) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T2] = =(X_T4[d0])
// With Index Variables Made Integral:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_3(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_5) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T7[1 + d0 : _T4] = =(X_T6[d0])
// With Index Variables Made Integral:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T7      X_T6  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_5(
    device void* X_T7_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T7 = static_cast<device ptrdiff_t*>(X_T7_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T7 = (X_T7 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T7 = agg[0];
  X_T7[0] = LX_T7;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_0) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T2[od0 : _T0] = =(X_I_0[od0])
// With Index Variables Made Integral:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range      X_T2     X_I_0  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_0(
    device void* X_T2_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T2 = static_cast<device ptrdiff_t*>(X_T2_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T2 = agg[0];
  X_T2[0] = LX_T2;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_1) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T3[] = *(X_T2[d1])
// With Index Variables Made Integral:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range      X_T3      X_T2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_1(
    device void* X_T3_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T3 = static_cast<device ptrdiff_t*>(X_T3_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T3 = agg[0];
  X_T3[0] = LX_T3;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_3) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T2] = =(X_T4[d0])
// With Index Variables Made Integral:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_3(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_5) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T7[1 + d0 : _T4] = =(X_T6[d0])
// With Index Variables Made Integral:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T7      X_T6  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_5(
    device void* X_T7_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T7 = static_cast<device ptrdiff_t*>(X_T7_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T7 = (X_T7 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T7 = agg[0];
  X_T7[0] = LX_T7;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_0) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T2[od0 : _T0] = =(X_I_0[od0])
// With Index Variables Made Integral:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range      X_T2     X_I_0  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_0(
    device void* X_T2_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T2 = static_cast<device ptrdiff_t*>(X_T2_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T2 = agg[0];
  X_T2[0] = LX_T2;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_1) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T3[] = *(X_T2[d1])
// With Index Variables Made Integral:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range      X_T3      X_T2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_1(
    device void* X_T3_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T3 = static_cast<device ptrdiff_t*>(X_T3_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T3 = agg[0];
  X_T3[0] = LX_T3;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_3) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T2] = =(X_T4[d0])
// With Index Variables Made Integral:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                  2019-07-15 07:44:20.117543: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_219.json

2019-07-15 07:44:20.129694: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_221.json

2019-07-15 07:44:20.139838: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_218.json

2019-07-15 07:44:22.781917: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:22.897439: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:23.040835: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:23.104018: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:23.184973: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:24.352460: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:24.605921: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:25.195935: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:25.328545: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_311.json

2019-07-15 07:44:25.359006: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_307.json

2019-07-15 07:44:25.485162: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:28.054417: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:28.170137: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:28.315463: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:28.375814: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:28.458204: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:29.587785: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:29.841637: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:30.424721: I /Users/sam/dev/ngraph-bridge/src/ngraph_rewrite_pass.cc:247] NGraph using backend: PLAIDML
2019-07-15 07:44:30.551022: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_491.json

 1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_3(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_5) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T7[1 + d0 : _T4] = =(X_T6[d0])
// With Index Variables Made Integral:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T7      X_T6  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_5(
    device void* X_T7_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T7 = static_cast<device ptrdiff_t*>(X_T7_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T7 = (X_T7 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T7 = agg[0];
  X_T7[0] = LX_T7;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_0) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T2[od0 : _T0] = =(X_I_0[od0])
// With Index Variables Made Integral:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range      X_T2     X_I_0  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_0(
    device void* X_T2_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T2 = static_cast<device ptrdiff_t*>(X_T2_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T2 = agg[0];
  X_T2[0] = LX_T2;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_1) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T3[] = *(X_T2[d1])
// With Index Variables Made Integral:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range      X_T3      X_T2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_1(
    device void* X_T3_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T3 = static_cast<device ptrdiff_t*>(X_T3_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T3 = agg[0];
  X_T3[0] = LX_T3;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_3) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T2] = =(X_T4[d0])
// With Index Variables Made Integral:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_3(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_5) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T7[1 + d0 : _T4] = =(X_T6[d0])
// With Index Variables Made Integral:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T7      X_T6  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_5(
    device void* X_T7_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T7 = static_cast<device ptrdiff_t*>(X_T7_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T7 = (X_T7 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T7 = agg[0];
  X_T7[0] = LX_T7;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_0) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T2[od0 : _T0] = =(X_I_0[od0])
// With Index Variables Made Integral:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T2[od0 : _T0] = =(X_I_0[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range      X_T2     X_I_0  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_0(
    device void* X_T2_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T2 = static_cast<device ptrdiff_t*>(X_T2_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T2 = agg[0];
  X_T2[0] = LX_T2;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_1) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T3[] = *(X_T2[d1])
// With Index Variables Made Integral:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T3[] = *(X_T2[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range      X_T3      X_T2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_1(
    device void* X_T3_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T3 = static_cast<device ptrdiff_t*>(X_T3_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T3 = agg[0];
  X_T3[0] = LX_T3;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_3) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T5[d0 : _T2] = =(X_T4[d0])
// With Index Variables Made Integral:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T5[d0 : _T2] = =(X_T4[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T5      X_T4  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_3(
    device void* X_T5_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T5 = static_cast<device ptrdiff_t*>(X_T5_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T5 = agg[0];
  X_T5[0] = LX_T5;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_2) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T1] = =(X_T3[])
// With Index Variables Made Integral:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T1] = =(X_T3[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4      X_T3  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_2(
    device void* X_T4_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T4 = static_cast<device ptrdiff_t*>(X_T4_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  X_T4[0] = LX_T4;
}

[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c5_sdk_5) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:20z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T7[1 + d0 : _T4] = =(X_T6[d0])
// With Index Variables Made Integral:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T7[1 + d0 : _T4] = =(X_T6[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T7      X_T6  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c5_sdk_5(
    device void* X_T7_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T7 = static_cast<device ptrdiff_t*>(X_T7_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T7 = (X_T7 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T7 = agg[0];
  X_T7[0] = LX_T7;
}

[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Compiler::Build> Compilation failure:
Compilation failed: 

program_source:32:7: warning: unused variable 'tid'
  int tid = _tid;
      ^
program_source:134:14: error: call to 'select' is ambiguous
    agg[0] = select((ptrdiff_t)agg[0], (ptrdiff_t)val1, (bool)o0_cond);
             ^~~~~~
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:56:21: note: candidate function
  METAL_FUNC char   select    (char a, char b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:57:21: note: candidate function
  METAL_FUNC uchar  select    (uchar a, uchar b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:58:21: note: candidate function
  METAL_FUNC short  select    (short a, short b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:59:21: note: candidate function
  METAL_FUNC ushort select    (ushort a, ushort b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:60:21: note: candidate function
  METAL_FUNC int    select    (int a, int b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:61:21: note: candidate function
  METAL_FUNC uint   select    (uint a, uint b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:62:21: note: candidate function
  METAL_FUNC half   select    (half a, half b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:63:21: note: candidate function
  METAL_FUNC float  select    (float a, float b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:151:29: note: candidate function
  METAL_FUNC vec<half,2>    select(vec<half,2> a, vec<half,2> b, vec<bool,2> c) { return vec<half,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:152:29: note: candidate function
  METAL_FUNC vec<float,2>   select(vec<float,2> a, vec<float,2> b, vec<bool,2> c) { return vec<float,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:156:29: note: candidate function
  METAL_FUNC vec<half,3>    select(vec<half,3> a, vec<half,3> b, vec<bool,3> c) { return vec<half,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:157:29: note: candidate function
  METAL_FUNC vec<float,3>   select(vec<float,3> a, vec<float,3> b, vec<bool,3> c) { return vec<float,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:161:29: note: candidate function
  METAL_FUNC vec<half,4>    select(vec<half,4> a, vec<half,4> b, vec<bool,4> c) { return vec<half,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:162:29: note: candidate function
  METAL_FUNC vec<float,4>   select(vec<float,4> a, vec<float,4> b, vec<bool,4> c) { return vec<float,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:167:29: note: candidate function
  METAL_FUNC vec<char,2>    select(vec<char,2> a, vec<char,2> b, vec<bool,2> c) { return vec<char,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:168:29: note: candidate function
  METAL_FUNC vec<uchar,2>   select(vec<uchar,2> a, vec<uchar,2> b, vec<bool,2> c) { return vec<uchar,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:169:29: note: candidate function
  METAL_FUNC vec<short,2>   select(vec<short,2> a, vec<short,2> b, vec<bool,2> c) { return vec<short,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:170:29: note: candidate function
  METAL_FUNC vec<ushort,2>  select(vec<ushort,2> a, vec<ushort,2> b, vec<bool,2> c) { return vec<ushort,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:171:29: note: candidate function
  METAL_FUNC vec<int,2>     select(vec<int,2> a, vec<int,2> b, vec<bool,2> c) { return vec<int,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:172:29: note: candidate function
  METAL_FUNC vec<uint,2>    select(vec<uint,2> a, vec<uint,2> b, vec<bool,2> c) { return vec<uint,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:173:29: note: candidate function
  METAL_FUNC vec<char,3>    select(vec<char,3> a, vec<char,3> b, vec<bool,3> c) { return vec<char,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:174:29: note: candidate function
  METAL_FUNC vec<uchar,3>   select(vec<uchar,3> a, vec<uchar,3> b, vec<bool,3> c) { return vec<uchar,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:175:29: note: candidate function
  METAL_FUNC vec<short,3>   select(vec<short,3> a, vec<short,3> b, vec<bool,3> c) { return vec<short,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:176:29: note: candidate function
  METAL_FUNC vec<ushort,3>  select(vec<ushort,3> a, vec<ushort,3> b, vec<bool,3> c) { return vec<ushort,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:177:29: note: candidate function
  METAL_FUNC vec<int,3>     select(vec<int,3> a, vec<int,3> b, vec<bool,3> c) { return vec<int,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:178:29: note: candidate function
  METAL_FUNC vec<uint,3>    select(vec<uint,3> a, vec<uint,3> b, vec<bool,3> c) { return vec<uint,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:179:29: note: candidate function
  METAL_FUNC vec<char,4>    select(vec<char,4> a, vec<char,4> b, vec<bool,4> c) { return vec<char,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:180:29: note: candidate function
  METAL_FUNC vec<uchar,4>   select(vec<uchar,4> a, vec<uchar,4> b, vec<bool,4> c) { return vec<uchar,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:181:29: note: candidate function
  METAL_FUNC vec<short,4>   select(vec<short,4> a, vec<short,4> b, vec<bool,4> c) { return vec<short,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:182:29: note: candidate function
  METAL_FUNC vec<ushort,4>  select(vec<ushort,4> a, vec<ushort,4> b, vec<bool,4> c) { return vec<ushort,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:183:29: note: candidate function
  METAL_FUNC vec<int,4>     select(vec<int,4> a, vec<int,4> b, vec<bool,4> c) { return vec<int,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:184:29: note: candidate function
  METAL_FUNC vec<uint,4>    select(vec<uint,4> a, vec<uint,4> b, vec<bool,4> c) { return vec<uint,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^

[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_31) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T50[] = *(X_T49[d1])
// With Index Variables Made Integral:
// X_T50[] = *(X_T49[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T50[] = *(X_T49[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range     X_T50     X_T49  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_31(
    device void* X_T50_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T50 = static_cast<device ptrdiff_t*>(X_T50_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T50 = agg[0];
  X_T50[0] = LX_T50;
}

[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_30) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T49[od0 : _T48] = =(X_I_2[od0])
// With Index Variables Made Integral:
// X_T49[od0 : _T48] = =(X_I_2[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T49[od0 : _T48] = =(X_I_2[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range     X_T49     X_I_2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_30(
    device void* X_T49_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T49 = static_cast<device ptrdiff_t*>(X_T49_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T49 = agg[0];
  X_T49[0] = LX_T49;
}

[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_33) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T52[d0 : _T50] = =(X_T51[d0])
// With Index Variables Made Integral:
// X_T52[d0 : _T50] = =(X_T51[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T52[d0 : _T50] = =(X_T51[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T52     X_T51  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_33(
    device void* X_T52_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T52 = static_cast<device ptrdiff_t*>(X_T52_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T52 = agg[0];
  X_T52[0] = LX_T52;
}

[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_32) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T51[d0 : _T49] = =(X_T50[])
// With Index Variables Made Integral:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T51     X_T50  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_32(
    device void* X_T51_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T51 = static_cast<device ptrdiff_t*>(X_T51_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T51 = agg[0];
  X_T51[0] = LX_T51;
}

[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_32) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T51[d0 : _T49] = =(X_T50[])
// With Index Variables Made Integral:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T51     X_T50  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_32(
    device void* X_T51_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T51 = static_cast<device ptrdiff_t*>(X_T51_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T51 = agg[0];
  X_T51[0] = LX_T51;
}

[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_35) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:25z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T54[1 + d0 : _T52] = =(X_T53[d0])
// With Index Variables Made Integral:
// X_T54[1 + d0 : _T52] = =(X_T53[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T54[1 + d0 : _T52] = =(X_T53[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T54     X_T53  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_35(
    device void* X_T54_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T54 = static_cast<device ptrdiff_t*>(X_T54_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T54 = (X_T54 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T54 = agg[0];
  X_T54[0] = LX_T54;
}

[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c9_sdk_1) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T4[d0 : _T3] = =(X_I_0[])
// With Index Variables Made Integral:
// X_T4[d0 : _T3] = =(X_I_0[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T4[d0 : _T3] = =(X_I_0[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range      X_T4     X_I_0  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Elementwise input X_I_1 shape: i64(1):(1):8 bytes
// Elementwise input X_I_2 shape: i64(1):(1):8 bytes
// Elementwise input X_I_3 shape: i64(1):(1):8 bytes
// Elementwise input X_I_4 shape: i64(1):(1):8 bytes
// Elementwise input X_I_5 shape: i64(1):(1):8 bytes
// Elementwise input X_I_6 shape: i64(1):(1):8 bytes
// Elementwise op: X_T5 = cmp_ge(X_I_1, X_T4)
// Elementwise op: X_T8 = cmp_ge(X_I_2, X_T4)
// Elementwise op: X_T11 = cmp_ge(X_I_3, X_T4)
// Elementwise op: X_T14 = cmp_ge(X_I_4, X_T4)
// Elementwise op: X_T17 = cmp_ge(X_I_5, X_T4)
// Elementwise op: X_T18 = cond(X_T17, X_I_5_0, X_T2)
// Elementwise op: X_T20 = cmp_ge(X_I_6, X_T4)
// Elementwise op: X_T21 = cond(X_T20, X_I_6_0, X_T2)
// Elementwise op: X_T22 = as_int(X_T21, X_T1)
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 11
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 176
// Computed mem write: 768
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c9_sdk_1(
    device void* X_T11_arg_ [[ buffer(0) ]],
    device void* X_T14_arg_ [[ buffer(1) ]],
    device void* X_T18_arg_ [[ buffer(2) ]],
    device void* X_T22_arg_ [[ buffer(3) ]],
    device void* X_T5_arg_ [[ buffer(4) ]],
    device void* X_T8_arg_ [[ buffer(5) ]],
    device const void* in1_arg_ [[ buffer(6) ]],
    device const void* X_I_1_arg_ [[ buffer(7) ]],
    device const void* X_I_2_arg_ [[ buffer(8) ]],
    device const void* X_I_3_arg_ [[ buffer(9) ]],
    device const void* X_I_4_arg_ [[ buffer(10) ]],
    device const void* X_I_5_arg_ [[ buffer(11) ]],
    device const void* X_I_6_arg_ [[ buffer(12) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device bool* X_T11 = static_cast<device bool*>(X_T11_arg_);
  device bool* X_T14 = static_cast<device bool*>(X_T14_arg_);
  device int* X_T18 = static_cast<device int*>(X_T18_arg_);
  device char* X_T22 = static_cast<device char*>(X_T22_arg_);
  device bool* X_T5 = static_cast<device bool*>(X_T5_arg_);
  device bool* X_T8 = static_cast<device bool*>(X_T8_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  device const ptrdiff_t* X_I_1 = static_cast<device const ptrdiff_t*>(X_I_1_arg_);
  device const ptrdiff_t* X_I_2 = static_cast<device const ptrdiff_t*>(X_I_2_arg_);
  device const ptrdiff_t* X_I_3 = static_cast<device const ptrdiff_t*>(X_I_3_arg_);
  device const ptrdiff_t* X_I_4 = static_cast<device const ptrdiff_t*>(X_I_4_arg_);
  device const ptrdiff_t* X_I_5 = static_cast<device const ptrdiff_t*>(X_I_5_arg_);
  device const ptrdiff_t* X_I_6 = static_cast<device const ptrdiff_t*>(X_I_6_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T4 = agg[0];
  ptrdiff_t LX_I_1 = X_I_1[0];
  ptrdiff_t LX_I_2 = X_I_2[0];
  ptrdiff_t LX_I_3 = X_I_3[0];
  ptrdiff_t LX_I_4 = X_I_4[0];
  ptrdiff_t LX_I_5 = X_I_5[0];
  ptrdiff_t LX_I_6 = X_I_6[0];
  bool LX_T5 = (LX_I_1 >= LX_T4);
  bool LX_T8 = (LX_I_2 >= LX_T4);
  bool LX_T11 = (LX_I_3 >= LX_T4);
  bool LX_T14 = (LX_I_4 >= LX_T4);
  bool LX_T17 = (LX_I_5 >= LX_T4);
  int LX_T18 = select((char)0, (char)1, (bool)LX_T17);
  bool LX_T20 = (LX_I_6 >= LX_T4);
  int LX_T21 = select((char)0, (char)1, (bool)LX_T20);
  char LX_T22 = LX_T21;
  X_T11[0] = LX_T11;
  X_T14[0] = LX_T14;
  X_T18[0] = LX_T18;
  X_T22[0] = LX_T22;
  X_T5[0] = LX_T5;
  X_T8[0] = LX_T8;
}

[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_31) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T50[] = *(X_T49[d1])
// With Index Variables Made Integral:
// X_T50[] = *(X_T49[d1]), 500000000 + d1 < 1000000000
// Constraints:{ 0 <= d1 < 1, 0 <= 500000000 + d1 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d1 < 1 }
// Defracted:
// X_T50[] = *(X_T49[d1]), 500000000 + d1 < 1000000000
// Flattened:
//              Range     X_T50     X_T49  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64():():8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_31(
    device void* X_T50_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T50 = static_cast<device ptrdiff_t*>(X_T50_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {1, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    ptrdiff_t agg_rhs = (agg[0] * val1);
    agg[0] = agg_rhs;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T50 = agg[0];
  X_T50[0] = LX_T50;
}

[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_30) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T49[od0 : _T48] = =(X_I_2[od0])
// With Index Variables Made Integral:
// X_T49[od0 : _T48] = =(X_I_2[od0]), 500000000 + od0 < 1000000000
// Constraints:{ 0 <= od0 < 1, 0 <= od0 < 2, 0 <= 500000000 + od0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= od0 < 1 }
// Defracted:
// X_T49[od0 : _T48] = =(X_I_2[od0]), 500000000 + od0 < 1000000000
// Flattened:
//              Range     X_T49     X_I_2  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_30(
    device void* X_T49_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T49 = static_cast<device ptrdiff_t*>(X_T49_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 1)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T49 = agg[0];
  X_T49[0] = LX_T49;
}

[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_33) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T52[d0 : _T50] = =(X_T51[d0])
// With Index Variables Made Integral:
// X_T52[d0 : _T50] = =(X_T51[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T52[d0 : _T50] = =(X_T51[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T52     X_T51  
//      off                   0         0  
//      vec                   12019-07-15 07:44:30.577377: I /Users/sam/dev/ngraph-bridge/src/ngraph_utils.cc:258] Serializing graph to: tf_function_error_ngraph_cluster_490.json

[ RUN      ] BaseTest.test_end_to_end_deep
[  FAILED  ] BaseTest.test_end_to_end_deep
[ RUN      ] BaseTest.test_end_to_end_wide
[  FAILED  ] BaseTest.test_end_to_end_wide
[ RUN      ] BaseTest.test_end_to_end_wide_deep
[  FAILED  ] BaseTest.test_end_to_end_wide_deep
[ RUN      ] BaseTest.test_input_fn
[       OK ] BaseTest.test_input_fn
[ RUN      ] BaseTest.test_session
[       OK ] BaseTest.test_session
[ RUN      ] BaseTest.test_wide_deep_estimator_training
[  FAILED  ] BaseTest.test_wide_deep_estimator_training
======================================================================
ERROR: test_end_to_end_deep (__main__.BaseTest)
test_end_to_end_deep (__main__.BaseTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _do_call
    return fn(*args)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _run_fn
    options, feed_dict, fetch_list, target_list, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: SC compilation failure
Likely a problem in input program, however cannot be more specific

	 [[{{node ngraph_cluster_58}}]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "wide_deep_test.py", line 137, in test_end_to_end_deep
    synth=False, max_train=None)
  File "/Users/sam/dev/models/official/utils/testing/integration.py", line 58, in run_synthetic
    main(args)
  File "/Users/sam/dev/models/official/wide_deep/wide_deep.py", line 204, in main
    model.train(input_fn=train_input_fn, hooks=train_hooks)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 367, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1158, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1192, in _train_model_default
    saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1484, in _train_with_estimator_spec
    _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 754, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1252, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1353, in run
    raise six.reraise(*original_exc_info)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/six.py", line 693, in reraise
    raise value
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1338, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1411, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1169, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
    run_metadata_ptr)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1173, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
    run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: SC compilation failure
Likely a problem in input program, however cannot be more specific

	 [[{{node ngraph_cluster_58}}]]

======================================================================
ERROR: test_end_to_end_wide (__main__.BaseTest)
test_end_to_end_wide (__main__.BaseTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _do_call
    return fn(*args)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _run_fn
    options, feed_dict, fetch_list, target_list, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: Compilation failed: 

program_source:32:7: warning: unused variable 'tid'
  int tid = _tid;
      ^
program_source:134:14: error: call to 'select' is ambiguous
    agg[0] = select((ptrdiff_t)agg[0], (ptrdiff_t)val1, (bool)o0_cond);
             ^~~~~~
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:56:21: note: candidate function
  METAL_FUNC char   select    (char a, char b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:57:21: note: candidate function
  METAL_FUNC uchar  select    (uchar a, uchar b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:58:21: note: candidate function
  METAL_FUNC short  select    (short a, short b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:59:21: note: candidate function
  METAL_FUNC ushort select    (ushort a, ushort b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:60:21: note: candidate function
  METAL_FUNC int    select    (int a, int b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:61:21: note: candidate function
  METAL_FUNC uint   select    (uint a, uint b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:62:21: note: candidate function
  METAL_FUNC half   select    (half a, half b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:63:21: note: candidate function
  METAL_FUNC float  select    (float a, float b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:151:29: note: candidate function
  METAL_FUNC vec<half,2>    select(vec<half,2> a, vec<half,2> b, vec<bool,2> c) { return vec<half,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:152:29: note: candidate function
  METAL_FUNC vec<float,2>   select(vec<float,2> a, vec<float,2> b, vec<bool,2> c) { return vec<float,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:156:29: note: candidate function
  METAL_FUNC vec<half,3>    select(vec<half,3> a, vec<half,3> b, vec<bool,3> c) { return vec<half,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:157:29: note: candidate function
  METAL_FUNC vec<float,3>   select(vec<float,3> a, vec<float,3> b, vec<bool,3> c) { return vec<float,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:161:29: note: candidate function
  METAL_FUNC vec<half,4>    select(vec<half,4> a, vec<half,4> b, vec<bool,4> c) { return vec<half,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:162:29: note: candidate function
  METAL_FUNC vec<float,4>   select(vec<float,4> a, vec<float,4> b, vec<bool,4> c) { return vec<float,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:167:29: note: candidate function
  METAL_FUNC vec<char,2>    select(vec<char,2> a, vec<char,2> b, vec<bool,2> c) { return vec<char,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:168:29: note: candidate function
  METAL_FUNC vec<uchar,2>   select(vec<uchar,2> a, vec<uchar,2> b, vec<bool,2> c) { return vec<uchar,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:169:29: note: candidate function
  METAL_FUNC vec<short,2>   select(vec<short,2> a, vec<short,2> b, vec<bool,2> c) { return vec<short,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:170:29: note: candidate function
  METAL_FUNC vec<ushort,2>  select(vec<ushort,2> a, vec<ushort,2> b, vec<bool,2> c) { return vec<ushort,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:171:29: note: candidate function
  METAL_FUNC vec<int,2>     select(vec<int,2> a, vec<int,2> b, vec<bool,2> c) { return vec<int,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:172:29: note: candidate function
  METAL_FUNC vec<uint,2>    select(vec<uint,2> a, vec<uint,2> b, vec<bool,2> c) { return vec<uint,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:173:29: note: candidate function
  METAL_FUNC vec<char,3>    select(vec<char,3> a, vec<char,3> b, vec<bool,3> c) { return vec<char,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:174:29: note: candidate function
  METAL_FUNC vec<uchar,3>   select(vec<uchar,3> a, vec<uchar,3> b, vec<bool,3> c) { return vec<uchar,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:175:29: note: candidate function
  METAL_FUNC vec<short,3>   select(vec<short,3> a, vec<short,3> b, vec<bool,3> c) { return vec<short,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:176:29: note: candidate function
  METAL_FUNC vec<ushort,3>  select(vec<ushort,3> a, vec<ushort,3> b, vec<bool,3> c) { return vec<ushort,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:177:29: note: candidate function
  METAL_FUNC vec<int,3>     select(vec<int,3> a, vec<int,3> b, vec<bool,3> c) { return vec<int,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:178:29: note: candidate function
  METAL_FUNC vec<uint,3>    select(vec<uint,3> a, vec<uint,3> b, vec<bool,3> c) { return vec<uint,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:179:29: note: candidate function
  METAL_FUNC vec<char,4>    select(vec<char,4> a, vec<char,4> b, vec<bool,4> c) { return vec<char,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:180:29: note: candidate function
  METAL_FUNC vec<uchar,4>   select(vec<uchar,4> a, vec<uchar,4> b, vec<bool,4> c) { return vec<uchar,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:181:29: note: candidate function
  METAL_FUNC vec<short,4>   select(vec<short,4> a, vec<short,4> b, vec<bool,4> c) { return vec<short,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:182:29: note: candidate function
  METAL_FUNC vec<ushort,4>  select(vec<ushort,4> a, vec<ushort,4> b, vec<bool,4> c) { return vec<ushort,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:183:29: note: candidate function
  METAL_FUNC vec<int,4>     select(vec<int,4> a, vec<int,4> b, vec<bool,4> c) { return vec<int,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:184:29: note: candidate function
  METAL_FUNC vec<uint,4>    select(vec<uint,4> a, vec<uint,4> b, vec<bool,4> c) { return vec<uint,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^


	 [[{{node ngraph_cluster_206}}]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "wide_deep_test.py", line 129, in test_end_to_end_wide
    synth=False, max_train=None)
  File "/Users/sam/dev/models/official/utils/testing/integration.py", line 58, in run_synthetic
    main(args)
  File "/Users/sam/dev/models/official/wide_deep/wide_deep.py", line 204, in main
    model.train(input_fn=train_input_fn, hooks=train_hooks)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 367, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1158, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1192, in _train_model_default
    saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1484, in _train_with_estimator_spec
    _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 754, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1252, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1353, in run
    raise six.reraise(*original_exc_info)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/six.py", line 693, in reraise
    raise value
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1338, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1411, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1169, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
    run_metadata_ptr)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1173, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
    run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: Compilation failed: 

program_source:32:7: warning: unused variable 'tid'
  int tid = _tid;
      ^
program_source:134:14: error: call to 'select' is ambiguous
    agg[0] = select((ptrdiff_t)agg[0], (ptrdiff_t)val1, (bool)o0_cond);
             ^~~~~~
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:56:21: note: candidate function
  METAL_FUNC char   select    (char a, char b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:57:21: note: candidate function
  METAL_FUNC uchar  select    (uchar a, uchar b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:58:21: note: candidate function
  METAL_FUNC short  select    (short a, short b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:59:21: note: candidate function
  METAL_FUNC ushort select    (ushort a, ushort b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:60:21: note: candidate function
  METAL_FUNC int    select    (int a, int b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:61:21: note: candidate function
  METAL_FUNC uint   select    (uint a, uint b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:62:21: note: candidate function
  METAL_FUNC half   select    (half a, half b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:63:21: note: candidate function
  METAL_FUNC float  select    (float a, float b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:151:29: note: candidate function
  METAL_FUNC vec<half,2>    select(vec<half,2> a, vec<half,2> b, vec<bool,2> c) { return vec<half,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:152:29: note: candidate function
  METAL_FUNC vec<float,2>   select(vec<float,2> a, vec<float,2> b, vec<bool,2> c) { return vec<float,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:156:29: note: candidate function
  METAL_FUNC vec<half,3>    select(vec<half,3> a, vec<half,3> b, vec<bool,3> c) { return vec<half,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:157:29: note: candidate function
  METAL_FUNC vec<float,3>   select(vec<float,3> a, vec<float,3> b, vec<bool,3> c) { return vec<float,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:161:29: note: candidate function
  METAL_FUNC vec<half,4>    select(vec<half,4> a, vec<half,4> b, vec<bool,4> c) { return vec<half,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:162:29: note: candidate function
  METAL_FUNC vec<float,4>   select(vec<float,4> a, vec<float,4> b, vec<bool,4> c) { return vec<float,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:167:29: note: candidate function
  METAL_FUNC vec<char,2>    select(vec<char,2> a, vec<char,2> b, vec<bool,2> c) { return vec<char,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:168:29: note: candidate function
  METAL_FUNC vec<uchar,2>   select(vec<uchar,2> a, vec<uchar,2> b, vec<bool,2> c) { return vec<uchar,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:169:29: note: candidate function
  METAL_FUNC vec<short,2>   select(vec<short,2> a, vec<short,2> b, vec<bool,2> c) { return vec<short,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:170:29: note: candidate function
  METAL_FUNC vec<ushort,2>  select(vec<ushort,2> a, vec<ushort,2> b, vec<bool,2> c) { return vec<ushort,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:171:29: note: candidate function
  METAL_FUNC vec<int,2>     select(vec<int,2> a, vec<int,2> b, vec<bool,2> c) { return vec<int,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:172:29: note: candidate function
  METAL_FUNC vec<uint,2>    select(vec<uint,2> a, vec<uint,2> b, vec<bool,2> c) { return vec<uint,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:173:29: note: candidate function
  METAL_FUNC vec<char,3>    select(vec<char,3> a, vec<char,3> b, vec<bool,3> c) { return vec<char,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:174:29: note: candidate function
  METAL_FUNC vec<uchar,3>   select(vec<uchar,3> a, vec<uchar,3> b, vec<bool,3> c) { return vec<uchar,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:175:29: note: candidate function
  METAL_FUNC vec<short,3>   select(vec<short,3> a, vec<short,3> b, vec<bool,3> c) { return vec<short,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:176:29: note: candidate function
  METAL_FUNC vec<ushort,3>  select(vec<ushort,3> a, vec<ushort,3> b, vec<bool,3> c) { return vec<ushort,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:177:29: note: candidate function
  METAL_FUNC vec<int,3>     select(vec<int,3> a, vec<int,3> b, vec<bool,3> c) { return vec<int,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:178:29: note: candidate function
  METAL_FUNC vec<uint,3>    select(vec<uint,3> a, vec<uint,3> b, vec<bool,3> c) { return vec<uint,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:179:29: note: candidate function
  METAL_FUNC vec<char,4>    select(vec<char,4> a, vec<char,4> b, vec<bool,4> c) { return vec<char,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:180:29: note: candidate function
  METAL_FUNC vec<uchar,4>   select(vec<uchar,4> a, vec<uchar,4> b, vec<bool,4> c) { return vec<uchar,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:181:29: note: candidate function
  METAL_FUNC vec<short,4>   select(vec<short,4> a, vec<short,4> b, vec<bool,4> c) { return vec<short,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:182:29: note: candidate function
  METAL_FUNC vec<ushort,4>  select(vec<ushort,4> a, vec<ushort,4> b, vec<bool,4> c) { return vec<ushort,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:183:29: note: candidate function
  METAL_FUNC vec<int,4>     select(vec<int,4> a, vec<int,4> b, vec<bool,4> c) { return vec<int,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:184:29: note: candidate function
  METAL_FUNC vec<uint,4>    select(vec<uint,4> a, vec<uint,4> b, vec<bool,4> c) { return vec<uint,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^


	 [[{{node ngraph_cluster_206}}]]

======================================================================
ERROR: test_end_to_end_wide_deep (__main__.BaseTest)
test_end_to_end_wide_deep (__main__.BaseTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _do_call
    return fn(*args)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _run_fn
    options, feed_dict, fetch_list, target_list, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: Compilation failed: 

program_source:32:7: warning: unused variable 'tid'
  int tid = _tid;
      ^
program_source:134:14: error: call to 'select' is ambiguous
    agg[0] = select((ptrdiff_t)agg[0], (ptrdiff_t)val1, (bool)o0_cond);
             ^~~~~~
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:56:21: note: candidate function
  METAL_FUNC char   select    (char a, char b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:57:21: note: candidate function
  METAL_FUNC uchar  select    (uchar a, uchar b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:58:21: note: candidate function
  METAL_FUNC short  select    (short a, short b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:59:21: note: candidate function
  METAL_FUNC ushort select    (ushort a, ushort b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:60:21: note: candidate function
  METAL_FUNC int    select    (int a, int b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:61:21: note: candidate function
  METAL_FUNC uint   select    (uint a, uint b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:62:21: note: candidate function
  METAL_FUNC half   select    (half a, half b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:63:21: note: candidate function
  METAL_FUNC float  select    (float a, float b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:151:29: note: candidate function
  METAL_FUNC vec<half,2>    select(vec<half,2> a, vec<half,2> b, vec<bool,2> c) { return vec<half,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:152:29: note: candidate function
  METAL_FUNC vec<float,2>   select(vec<float,2> a, vec<float,2> b, vec<bool,2> c) { return vec<float,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:156:29: note: candidate function
  METAL_FUNC vec<half,3>    select(vec<half,3> a, vec<half,3> b, vec<bool,3> c) { return vec<half,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:157:29: note: candidate function
  METAL_FUNC vec<float,3>   select(vec<float,3> a, vec<float,3> b, vec<bool,3> c) { return vec<float,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:161:29: note: candidate function
  METAL_FUNC vec<half,4>    select(vec<half,4> a, vec<half,4> b, vec<bool,4> c) { return vec<half,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:162:29: note: candidate function
  METAL_FUNC vec<float,4>   select(vec<float,4> a, vec<float,4> b, vec<bool,4> c) { return vec<float,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:167:29: note: candidate function
  METAL_FUNC vec<char,2>    select(vec<char,2> a, vec<char,2> b, vec<bool,2> c) { return vec<char,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:168:29: note: candidate function
  METAL_FUNC vec<uchar,2>   select(vec<uchar,2> a, vec<uchar,2> b, vec<bool,2> c) { return vec<uchar,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:169:29: note: candidate function
  METAL_FUNC vec<short,2>   select(vec<short,2> a, vec<short,2> b, vec<bool,2> c) { return vec<short,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:170:29: note: candidate function
  METAL_FUNC vec<ushort,2>  select(vec<ushort,2> a, vec<ushort,2> b, vec<bool,2> c) { return vec<ushort,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:171:29: note: candidate function
  METAL_FUNC vec<int,2>     select(vec<int,2> a, vec<int,2> b, vec<bool,2> c) { return vec<int,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:172:29: note: candidate function
  METAL_FUNC vec<uint,2>    select(vec<uint,2> a, vec<uint,2> b, vec<bool,2> c) { return vec<uint,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:173:29: note: candidate function
  METAL_FUNC vec<char,3>    select(vec<char,3> a, vec<char,3> b, vec<bool,3> c) { return vec<char,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:174:29: note: candidate function
  METAL_FUNC vec<uchar,3>   select(vec<uchar,3> a, vec<uchar,3> b, vec<bool,3> c) { return vec<uchar,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:175:29: note: candidate function
  METAL_FUNC vec<short,3>   select(vec<short,3> a, vec<short,3> b, vec<bool,3> c) { return vec<short,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:176:29: note: candidate function
  METAL_FUNC vec<ushort,3>  select(vec<ushort,3> a, vec<ushort,3> b, vec<bool,3> c) { return vec<ushort,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:177:29: note: candidate function
  METAL_FUNC vec<int,3>     select(vec<int,3> a, vec<int,3> b, vec<bool,3> c) { return vec<int,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:178:29: note: candidate function
  METAL_FUNC vec<uint,3>    select(vec<uint,3> a, vec<uint,3> b, vec<bool,3> c) { return vec<uint,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:179:29: note: candidate function
  METAL_FUNC vec<char,4>    select(vec<char,4> a, vec<char,4> b, vec<bool,4> c) { return vec<char,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:180:29: note: candidate function
  METAL_FUNC vec<uchar,4>   select(vec<uchar,4> a, vec<uchar,4> b, vec<bool,4> c) { return vec<uchar,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:181:29: note: candidate function
  METAL_FUNC vec<short,4>   select(vec<short,4> a, vec<short,4> b, vec<bool,4> c) { return vec<short,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:182:29: note: candidate function
  METAL_FUNC vec<ushort,4>  select(vec<ushort,4> a, vec<ushort,4> b, vec<bool,4> c) { return vec<ushort,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:183:29: note: candidate function
  METAL_FUNC vec<int,4>     select(vec<int,4> a, vec<int,4> b, vec<bool,4> c) { return vec<int,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:184:29: note: candidate function
  METAL_FUNC vec<uint,4>    select(vec<uint,4> a, vec<uint,4> b, vec<bool,4> c) { return vec<uint,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^


	 [[{{node ngraph_cluster_311}}]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "wide_deep_test.py", line 145, in test_end_to_end_wide_deep
    synth=False, max_train=None)
  File "/Users/sam/dev/models/official/utils/testing/integration.py", line 58, in run_synthetic
    main(args)
  File "/Users/sam/dev/models/official/wide_deep/wide_deep.py", line 204, in main
    model.train(input_fn=train_input_fn, hooks=train_hooks)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 367, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1158, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1192, in _train_model_default
    saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1484, in _train_with_estimator_spec
    _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 754, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1252, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1353, in run
    raise six.reraise(*original_exc_info)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/six.py", line 693, in reraise
    raise value
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1338, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1411, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1169, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
    run_metadata_ptr)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1173, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
    run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: Compilation failed: 

program_source:32:7: warning: unused variable 'tid'
  int tid = _tid;
      ^
program_source:134:14: error: call to 'select' is ambiguous
    agg[0] = select((ptrdiff_t)agg[0], (ptrdiff_t)val1, (bool)o0_cond);
             ^~~~~~
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:56:21: note: candidate function
  METAL_FUNC char   select    (char a, char b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:57:21: note: candidate function
  METAL_FUNC uchar  select    (uchar a, uchar b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:58:21: note: candidate function
  METAL_FUNC short  select    (short a, short b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:59:21: note: candidate function
  METAL_FUNC ushort select    (ushort a, ushort b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:60:21: note: candidate function
  METAL_FUNC int    select    (int a, int b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:61:21: note: candidate function
  METAL_FUNC uint   select    (uint a, uint b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:62:21: note: candidate function
  METAL_FUNC half   select    (half a, half b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:63:21: note: candidate function
  METAL_FUNC float  select    (float a, float b, bool c) { return c ? b : a; };
                    ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:151:29: note: candidate function
  METAL_FUNC vec<half,2>    select(vec<half,2> a, vec<half,2> b, vec<bool,2> c) { return vec<half,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:152:29: note: candidate function
  METAL_FUNC vec<float,2>   select(vec<float,2> a, vec<float,2> b, vec<bool,2> c) { return vec<float,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:156:29: note: candidate function
  METAL_FUNC vec<half,3>    select(vec<half,3> a, vec<half,3> b, vec<bool,3> c) { return vec<half,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:157:29: note: candidate function
  METAL_FUNC vec<float,3>   select(vec<float,3> a, vec<float,3> b, vec<bool,3> c) { return vec<float,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:161:29: note: candidate function
  METAL_FUNC vec<half,4>    select(vec<half,4> a, vec<half,4> b, vec<bool,4> c) { return vec<half,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:162:29: note: candidate function
  METAL_FUNC vec<float,4>   select(vec<float,4> a, vec<float,4> b, vec<bool,4> c) { return vec<float,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:167:29: note: candidate function
  METAL_FUNC vec<char,2>    select(vec<char,2> a, vec<char,2> b, vec<bool,2> c) { return vec<char,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:168:29: note: candidate function
  METAL_FUNC vec<uchar,2>   select(vec<uchar,2> a, vec<uchar,2> b, vec<bool,2> c) { return vec<uchar,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:169:29: note: candidate function
  METAL_FUNC vec<short,2>   select(vec<short,2> a, vec<short,2> b, vec<bool,2> c) { return vec<short,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:170:29: note: candidate function
  METAL_FUNC vec<ushort,2>  select(vec<ushort,2> a, vec<ushort,2> b, vec<bool,2> c) { return vec<ushort,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:171:29: note: candidate function
  METAL_FUNC vec<int,2>     select(vec<int,2> a, vec<int,2> b, vec<bool,2> c) { return vec<int,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:172:29: note: candidate function
  METAL_FUNC vec<uint,2>    select(vec<uint,2> a, vec<uint,2> b, vec<bool,2> c) { return vec<uint,2>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:173:29: note: candidate function
  METAL_FUNC vec<char,3>    select(vec<char,3> a, vec<char,3> b, vec<bool,3> c) { return vec<char,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:174:29: note: candidate function
  METAL_FUNC vec<uchar,3>   select(vec<uchar,3> a, vec<uchar,3> b, vec<bool,3> c) { return vec<uchar,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:175:29: note: candidate function
  METAL_FUNC vec<short,3>   select(vec<short,3> a, vec<short,3> b, vec<bool,3> c) { return vec<short,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:176:29: note: candidate function
  METAL_FUNC vec<ushort,3>  select(vec<ushort,3> a, vec<ushort,3> b, vec<bool,3> c) { return vec<ushort,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:177:29: note: candidate function
  METAL_FUNC vec<int,3>     select(vec<int,3> a, vec<int,3> b, vec<bool,3> c) { return vec<int,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:178:29: note: candidate function
  METAL_FUNC vec<uint,3>    select(vec<uint,3> a, vec<uint,3> b, vec<bool,3> c) { return vec<uint,3>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:179:29: note: candidate function
  METAL_FUNC vec<char,4>    select(vec<char,4> a, vec<char,4> b, vec<bool,4> c) { return vec<char,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:180:29: note: candidate function
  METAL_FUNC vec<uchar,4>   select(vec<uchar,4> a, vec<uchar,4> b, vec<bool,4> c) { return vec<uchar,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:181:29: note: candidate function
  METAL_FUNC vec<short,4>   select(vec<short,4> a, vec<short,4> b, vec<bool,4> c) { return vec<short,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:182:29: note: candidate function
  METAL_FUNC vec<ushort,4>  select(vec<ushort,4> a, vec<ushort,4> b, vec<bool,4> c) { return vec<ushort,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:183:29: note: candidate function
  METAL_FUNC vec<int,4>     select(vec<int,4> a, vec<int,4> b, vec<bool,4> c) { return vec<int,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^
/System/Library/PrivateFrameworks/GPUCompiler.framework/Versions/3802/Libraries/lib/clang/802.4/include/metal/metal_relational:184:29: note: candidate function
  METAL_FUNC vec<uint,4>    select(vec<uint,4> a, vec<uint,4> b, vec<bool,4> c) { return vec<uint,4>(select(a[0],b[0],c[0]),select(a[1],b[1],c[1]),select(a[2],b[2],c[2]),select(a[3],b[3],c[3]));}
                            ^


	 [[{{node ngraph_cluster_311}}]]

======================================================================
ERROR: test_wide_deep_estimator_training (__main__.BaseTest)
test_wide_deep_estimator_training (__main__.BaseTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _do_call
    return fn(*args)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _run_fn
    options, feed_dict, fetch_list, target_list, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: SC compilation failure
Likely a problem in input program, however cannot be more specific

	 [[{{node ngraph_cluster_491}}]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "wide_deep_test.py", line 121, in test_wide_deep_estimator_training
    self.build_and_test_estimator('wide_deep')
  File "wide_deep_test.py", line 103, in build_and_test_estimator
    model.train(input_fn=get_input_fn(1, True, 1), steps=1)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 367, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1158, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1192, in _train_model_default
    saving_listeners)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1484, in _train_with_estimator_spec
    _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 754, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1252, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1353, in run
    raise six.reraise(*original_exc_info)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/six.py", line 693, in reraise
    raise value
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1338, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1411, in run
    run_metadata=run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1169, in run
    return self._sess.run(*args, **kwargs)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
    run_metadata_ptr)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1173, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
    run_metadata)
  File "/Users/sam/dev/ngraph-bridge/build_cmake/venv-tf-py3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Caught exception while executing nGraph computation: SC compilation failure
Likely a problem in input program, however cannot be more specific

	 [[{{node ngraph_cluster_491}}]]

----------------------------------------------------------------------
Ran 6 tests in 17.962s

FAILED (errors=4)
['CPU', 'INTERPRETER', 'PLAIDML', 'NOP']
Parsing /var/folders/wy/wxfdgdvn23zbw8q9mmv5q0br0000gq/T/wide_deep_testr39pru4w/tmpkjfmcrh7/adult.data
Parsing /var/folders/wy/wxfdgdvn23zbw8q9mmv5q0br0000gq/T/wide_deep_testr39pru4w/tmpcbzokitn/adult.data
Parsing /var/folders/wy/wxfdgdvn23zbw8q9mmv5q0br0000gq/T/wide_deep_testr39pru4w/tmp2zsxxxgr/adult.data
Parsing /var/folders/wy/wxfdgdvn23zbw8q9mmv5q0br0000gq/T/wide_deep_testr39pru4w/tmpri0kptec/test.csv
Parsing wide_deep_test.csv
         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_33(
    device void* X_T52_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T52 = static_cast<device ptrdiff_t*>(X_T52_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T52 = agg[0];
  X_T52[0] = LX_T52;
}

[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_32) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T51[d0 : _T49] = =(X_T50[])
// With Index Variables Made Integral:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T51     X_T50  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_32(
    device void* X_T51_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T51 = static_cast<device ptrdiff_t*>(X_T51_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T51 = agg[0];
  X_T51[0] = LX_T51;
}

[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_32) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T51[d0 : _T49] = =(X_T50[])
// With Index Variables Made Integral:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T51[d0 : _T49] = =(X_T50[]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T51     X_T50  
//      off                   0         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(1):(1):8 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_32(
    device void* X_T51_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T51 = static_cast<device ptrdiff_t*>(X_T51_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T51 = agg[0];
  X_T51[0] = LX_T51;
}

[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	NewComputePipelineState(kernel_c10_sdk_35) failed: SC compilation failure
Likely a problem in input program, however cannot be more specific
[ERR] 2019-07-15T05:44:30z src/ngraph/runtime/plaidml/plaidml_logger.cpp 46	Source code: 
// Original:
// X_T54[1 + d0 : _T52] = =(X_T53[d0])
// With Index Variables Made Integral:
// X_T54[1 + d0 : _T52] = =(X_T53[d0]), 500000000 + d0 < 1000000000
// Constraints:{ 0 <= d0 < 1, 0 <= 1 + d0 < 2, 0 <= 500000000 + d0 < 1000000000 }
// Merged Parallel Constraints:{ 0 <= d0 < 1 }
// Defracted:
// X_T54[1 + d0 : _T52] = =(X_T53[d0]), 500000000 + d0 < 1000000000
// Flattened:
//              Range     X_T54     X_T53  
//      off                   1         0  
//      vec                   1         1  
// 
// Names: {  }
// Ranges: {  }
// Out stride: {  }
// Input 1 offset: 0
// Input 1 stride: {  }
// Tile size: {  }
// Contraction output var shape: i64(2):(1):16 bytes
// Computed true ops: 2
// Computed work groups: 1
// Computed inner loops: 1
// Computed shared mem: 8
// Computed out regs: 2048
// Computed mem read: 128
// Computed mem write: 128
// Computed operations: 1
// Computed rollups: 0
// Computed threads used: 1
// lwork = 1, 1, 1
// gwork = 1, 1, 1

kernel void kernel_c10_sdk_35(
    device void* X_T54_arg_ [[ buffer(0) ]],
    device const void* in1_arg_ [[ buffer(1) ]],
    uint _tid [[ thread_index_in_threadgroup ]],
    uint3 _groupid [[ threadgroup_position_in_grid ]],
    uint3 _globalid [[ thread_position_in_grid ]]
)
{
  device ptrdiff_t* X_T54 = static_cast<device ptrdiff_t*>(X_T54_arg_);
  device const ptrdiff_t* in1 = static_cast<device const ptrdiff_t*>(in1_arg_);
  X_T54 = (X_T54 + 1);
  int tid = _tid;
  ptrdiff_t agg[1] = {0, };
  threadgroup ptrdiff_t in1_shared[1];
  {
    {
      in1_shared[0] = in1[clamp(0, 0, 0)];
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    ptrdiff_t val1 = in1_shared[0];
    agg[0] = val1;
    threadgroup_barrier(mem_flags::mem_threadgroup);
  }
  ptrdiff_t LX_T54 = agg[0];
  X_T54[0] = LX_T54;
}