From c5e3550521c350d021f1b1bf7bb91a1fe21a3027 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Tue, 18 Aug 2020 13:49:30 -0700
Subject: [PATCH] Fix `codegen/cuda` gcc-5.4 compilation issues

Most of the fixes is the same old enum-is-not-hasheable error
In manager.cpp use std::unordered_map::emplace rather than `insert` to avoid error triggered by missed copy elision
This regression was introduced by https://github.com/pytorch/pytorch/pull/43129
---
 .../jit/codegen/cuda/lower_thread_predicate.h    |  6 ++++--
 torch/csrc/jit/codegen/cuda/lower_utils.cpp      | 16 ++++++++--------
 torch/csrc/jit/codegen/cuda/lower_utils.h        |  2 +-
 torch/csrc/jit/codegen/cuda/manager.cpp          |  2 +-
 4 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h b/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h
index 236da4078bc77..184640280283f 100644
--- a/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h
+++ b/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h
@@ -22,8 +22,10 @@ namespace fuser {
  */
 class TORCH_CUDA_API ThreadPredicateMap {
  public:
-  using SourceMapType =
-      std::unordered_map<ParallelType, std::unordered_set<const TensorView*>>;
+  using SourceMapType = std::unordered_map<
+      ParallelType,
+      std::unordered_set<const TensorView*>,
+      TypeHash>;
   using MapType = std::unordered_map<
       const TensorView*,
       std::pair<ir_utils::ParallelTypeBitmap, SourceMapType>>;
diff --git a/torch/csrc/jit/codegen/cuda/lower_utils.cpp b/torch/csrc/jit/codegen/cuda/lower_utils.cpp
index 1393d2ffb5bef..1e25cb7a758e3 100644
--- a/torch/csrc/jit/codegen/cuda/lower_utils.cpp
+++ b/torch/csrc/jit/codegen/cuda/lower_utils.cpp
@@ -479,13 +479,13 @@ bool isUnrolledFor(const Expr* expr) {
       ParallelType::Unroll;
 }
 
-const std::unordered_map<ParallelType, int> ParallelTypeBitmap::pt_to_offset_{
-    {ParallelType::BIDx, 0},
-    {ParallelType::BIDy, 1},
-    {ParallelType::BIDz, 2},
-    {ParallelType::TIDx, 3},
-    {ParallelType::TIDy, 4},
-    {ParallelType::TIDz, 5}};
+const std::unordered_map<ParallelType, int, TypeHash>
+    ParallelTypeBitmap::pt_to_offset_{{ParallelType::BIDx, 0},
+                                      {ParallelType::BIDy, 1},
+                                      {ParallelType::BIDz, 2},
+                                      {ParallelType::TIDx, 3},
+                                      {ParallelType::TIDy, 4},
+                                      {ParallelType::TIDz, 5}};
 
 const std::unordered_map<int, ParallelType> ParallelTypeBitmap::offset_to_pt_ =
     {{0, ParallelType::BIDx},
@@ -554,7 +554,7 @@ bool ParallelTypeBitmap::operator[](size_t pos) const {
 std::map<ParallelType, bool> ParallelTypeBitmap::getMap() const {
   std::map<ParallelType, bool> map;
   for (const auto& pt_offset : pt_to_offset_) {
-    map.emplace(std::make_pair(pt_offset.first, bitset_[pt_offset.second]));
+    map.emplace(pt_offset.first, bitset_[pt_offset.second]);
   }
   return map;
 }
diff --git a/torch/csrc/jit/codegen/cuda/lower_utils.h b/torch/csrc/jit/codegen/cuda/lower_utils.h
index a9a01babd88d4..92c7c438b870f 100644
--- a/torch/csrc/jit/codegen/cuda/lower_utils.h
+++ b/torch/csrc/jit/codegen/cuda/lower_utils.h
@@ -125,7 +125,7 @@ class ParallelTypeBitmap {
  private:
   ParallelTypeBitmap(const std::bitset<num_p_type>& bs) : bitset_(bs) {}
   std::bitset<num_p_type> bitset_;
-  const static std::unordered_map<ParallelType, int> pt_to_offset_;
+  const static std::unordered_map<ParallelType, int, TypeHash> pt_to_offset_;
   const static std::unordered_map<int, ParallelType> offset_to_pt_;
 };
 
diff --git a/torch/csrc/jit/codegen/cuda/manager.cpp b/torch/csrc/jit/codegen/cuda/manager.cpp
index 076803dce2fa5..51407ea7fca98 100644
--- a/torch/csrc/jit/codegen/cuda/manager.cpp
+++ b/torch/csrc/jit/codegen/cuda/manager.cpp
@@ -91,7 +91,7 @@ class CudaFusionManager {
       int32_t kernel_id = getNextUniqueID();
       graph_cache_ids_[repr] = kernel_id;
       TORCH_CHECK(
-          graph_cache_.insert({kernel_id, std::make_unique<GraphCache>(graph)})
+          graph_cache_.emplace(kernel_id, std::make_unique<GraphCache>(graph))
               .second);
     }
     return graph_cache_ids_[repr];