From c5e3550521c350d021f1b1bf7bb91a1fe21a3027 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 18 Aug 2020 13:49:30 -0700 Subject: [PATCH] Fix `codegen/cuda` gcc-5.4 compilation issues Most of the fixes is the same old enum-is-not-hasheable error In manager.cpp use std::unordered_map::emplace rather than `insert` to avoid error triggered by missed copy elision This regression was introduced by https://github.com/pytorch/pytorch/pull/43129 --- .../jit/codegen/cuda/lower_thread_predicate.h | 6 ++++-- torch/csrc/jit/codegen/cuda/lower_utils.cpp | 16 ++++++++-------- torch/csrc/jit/codegen/cuda/lower_utils.h | 2 +- torch/csrc/jit/codegen/cuda/manager.cpp | 2 +- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h b/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h index 236da4078bc77..184640280283f 100644 --- a/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h +++ b/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h @@ -22,8 +22,10 @@ namespace fuser { */ class TORCH_CUDA_API ThreadPredicateMap { public: - using SourceMapType = - std::unordered_map>; + using SourceMapType = std::unordered_map< + ParallelType, + std::unordered_set, + TypeHash>; using MapType = std::unordered_map< const TensorView*, std::pair>; diff --git a/torch/csrc/jit/codegen/cuda/lower_utils.cpp b/torch/csrc/jit/codegen/cuda/lower_utils.cpp index 1393d2ffb5bef..1e25cb7a758e3 100644 --- a/torch/csrc/jit/codegen/cuda/lower_utils.cpp +++ b/torch/csrc/jit/codegen/cuda/lower_utils.cpp @@ -479,13 +479,13 @@ bool isUnrolledFor(const Expr* expr) { ParallelType::Unroll; } -const std::unordered_map ParallelTypeBitmap::pt_to_offset_{ - {ParallelType::BIDx, 0}, - {ParallelType::BIDy, 1}, - {ParallelType::BIDz, 2}, - {ParallelType::TIDx, 3}, - {ParallelType::TIDy, 4}, - {ParallelType::TIDz, 5}}; +const std::unordered_map + ParallelTypeBitmap::pt_to_offset_{{ParallelType::BIDx, 0}, + {ParallelType::BIDy, 1}, + {ParallelType::BIDz, 2}, + {ParallelType::TIDx, 3}, + {ParallelType::TIDy, 4}, + {ParallelType::TIDz, 5}}; const std::unordered_map ParallelTypeBitmap::offset_to_pt_ = {{0, ParallelType::BIDx}, @@ -554,7 +554,7 @@ bool ParallelTypeBitmap::operator[](size_t pos) const { std::map ParallelTypeBitmap::getMap() const { std::map map; for (const auto& pt_offset : pt_to_offset_) { - map.emplace(std::make_pair(pt_offset.first, bitset_[pt_offset.second])); + map.emplace(pt_offset.first, bitset_[pt_offset.second]); } return map; } diff --git a/torch/csrc/jit/codegen/cuda/lower_utils.h b/torch/csrc/jit/codegen/cuda/lower_utils.h index a9a01babd88d4..92c7c438b870f 100644 --- a/torch/csrc/jit/codegen/cuda/lower_utils.h +++ b/torch/csrc/jit/codegen/cuda/lower_utils.h @@ -125,7 +125,7 @@ class ParallelTypeBitmap { private: ParallelTypeBitmap(const std::bitset& bs) : bitset_(bs) {} std::bitset bitset_; - const static std::unordered_map pt_to_offset_; + const static std::unordered_map pt_to_offset_; const static std::unordered_map offset_to_pt_; }; diff --git a/torch/csrc/jit/codegen/cuda/manager.cpp b/torch/csrc/jit/codegen/cuda/manager.cpp index 076803dce2fa5..51407ea7fca98 100644 --- a/torch/csrc/jit/codegen/cuda/manager.cpp +++ b/torch/csrc/jit/codegen/cuda/manager.cpp @@ -91,7 +91,7 @@ class CudaFusionManager { int32_t kernel_id = getNextUniqueID(); graph_cache_ids_[repr] = kernel_id; TORCH_CHECK( - graph_cache_.insert({kernel_id, std::make_unique(graph)}) + graph_cache_.emplace(kernel_id, std::make_unique(graph)) .second); } return graph_cache_ids_[repr];