Update on "Add support for labels to ttir analysis"

cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx chenyang78 aakhundov kadeng [ghstack-poisoned]
pytorch · Feb 14, 2024 · ed6bd38 · ed6bd38
2 parents dd3672b + 8c2b814
commit ed6bd38
Show file tree

Hide file tree

Showing 9 changed files with 65 additions and 67 deletions.
diff --git a/functorch/op_analysis/gen_data.py b/functorch/op_analysis/gen_data.py
@@ -148,19 +148,19 @@ def remove_prefix(input_string, prefix):
 
 if True:
     with open("run_ops.txt") as f:
-        opinfo_ops = [remove_suffix(i.strip(), ".default") for i in f.readlines()]
+        opinfo_ops = [remove_suffix(i.strip(), ".default") for i in f]
     with open("count_ops.txt") as f:
-        opinfo_counts = [i.strip() for i in f.readlines()]
+        opinfo_counts = [i.strip() for i in f]
         opinfo_counts = defaultdict(int, dict(zip(opinfo_ops, opinfo_counts)))
 
     def count_fn(x):
         return opinfo_counts[x["full_name"]]
 
     with open("run_decompositions.txt") as f:
-        decomposed_ops = [remove_suffix(i.strip(), ".default") for i in f.readlines()]
+        decomposed_ops = [remove_suffix(i.strip(), ".default") for i in f]
 
     with open("public_api") as f:
-        ref_api = [i.strip() for i in f.readlines()]
+        ref_api = [i.strip() for i in f]
 
     def has_ref_impl(x):
         name = x["name"]

diff --git a/setup.py b/setup.py
@@ -365,7 +365,7 @@ def get_submodule_folders():
     with open(git_modules_path) as f:
         return [
             os.path.join(cwd, line.split("=", 1)[1].strip())
-            for line in f.readlines()
+            for line in f
             if line.strip().startswith("path")
         ]
 

diff --git a/test/distributed/elastic/multiprocessing/redirects_test.py b/test/distributed/elastic/multiprocessing/redirects_test.py
@@ -123,7 +123,7 @@ def _redirect_large_buffer(self, print_fn, num_lines=500_000):
                 print_fn(i)
 
         with open(stdout_log) as fp:
-            actual = {int(line.split(":")[1]) for line in fp.readlines()}
+            actual = {int(line.split(":")[1]) for line in fp}
             expected = set(range(num_lines))
             self.assertSetEqual(expected, actual)
 

diff --git a/test/mobile/test_upgrader_codegen.py b/test/mobile/test_upgrader_codegen.py
@@ -22,9 +22,9 @@ def test_generate_bytecode(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             write_cpp(tmpdirname, sorted_upgrader_list)
             with open(os.path.join(tmpdirname, 'upgrader_mobile.cpp')) as file_name:
-                actual_output = [line.strip() for line in file_name.readlines() if line]
+                actual_output = [line.strip() for line in file_name if line]
             with open(str(upgrader_mobile_cpp_path)) as file_name:
-                expect_output = [line.strip() for line in file_name.readlines() if line]
+                expect_output = [line.strip() for line in file_name if line]
             actual_output_filtered = list(filter(lambda token: len(token) != 0, actual_output))
             expect_output_filtered = list(filter(lambda token: len(token) != 0, expect_output))
 

diff --git a/test/test_hub.py b/test/test_hub.py
@@ -43,7 +43,7 @@ def _assert_trusted_list_is_empty(self):
 
     def _assert_in_trusted_list(self, line):
         with open(self.trusted_list_path) as f:
-            assert line in (l.strip() for l in f.readlines())
+            assert line in (l.strip() for l in f)
 
     @retry(Exception, tries=3)
     def test_load_from_github(self):

diff --git a/tools/nvcc_fix_deps.py b/tools/nvcc_fix_deps.py
@@ -39,7 +39,7 @@ def resolve_include(path: Path, include_dirs: List[Path]) -> Path:
 def repair_depfile(depfile: TextIO, include_dirs: List[Path]) -> None:
     changes_made = False
     out = ""
-    for line in depfile.readlines():
+    for line in depfile:
         if ":" in line:
             colon_pos = line.rfind(":")
             out += line[: colon_pos + 1]

diff --git a/tools/setup_helpers/gen_version_header.py b/tools/setup_helpers/gen_version_header.py
@@ -62,7 +62,7 @@ def main(args: argparse.Namespace) -> None:
 
     with open(args.template_path) as input:
         with open(args.output_path, "w") as output:
-            for line in input.readlines():
+            for line in input:
                 output.write(apply_replacements(replacements, line))
 
 

diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp
@@ -2793,61 +2793,59 @@ c10::intrusive_ptr<Work> ProcessGroupNCCL::allreduce_sparse(
   tensor = tensor.coalesce();
   at::Tensor outputTensor =
       torch::zeros(tensor.sizes(), tensor.options().layout(torch::kStrided));
-}
-int dev_in_group = 0;
-auto work = collective(
-    tensor,
-    outputTensor,
-    [&](at::Tensor& input,
-        at::Tensor& output,
-        ncclComm_t comm,
-        at::cuda::CUDAStream& stream) {
-      auto ncclDataType = getNcclDataType(input.scalar_type());
-      auto ncclReduceOp =
-          getNcclReduceOp(opts.reduceOp, input, ncclDataType, comm);
-
-      size_t num_elements = output.numel();
-      auto indices = input.indices();
-      auto sizes = input.sizes();
-      int colSize = sizes[1];
-      auto rows = indices[0];
-      size_t blockCount = rows.sizes()[0];
-      auto recvIndices = indices[0] * colSize;
-
-      // prevent output and recvIndices from being freed
-      c10::cuda::CUDACachingAllocator::recordStream(
-          output.storage().data_ptr(), stream);
-      c10::cuda::CUDACachingAllocator::recordStream(
-          recvIndices.storage().data_ptr(), stream);
-      auto result = ncclAllReduceSparseBlock(
-          input._values().data_ptr(), // sendbuff
-          recvIndices.data_ptr<int64_t>(), // recv_indices
-          blockCount, // block_count
-          colSize, // block_length
-          output.data_ptr(), // recvbuff
-          output.numel(), // recv_count
-          ncclDataType,
-          ncclReduceOp,
-          comm,
-          stream.stream());
-      return result;
-    },
-    [](at::cuda::CUDAStream& ncclStream,
-       c10::intrusive_ptr<ProcessGroupNCCL::WorkNCCL>& work) {},
-    [&](at::cuda::CUDAStream& ncclStream,
-        c10::intrusive_ptr<ProcessGroupNCCL::WorkNCCL>& work) {
-      // Convert output tensors to sparse and back into tensors.
-      at::cuda::CUDAStreamGuard guard(ncclStream);
-      if (opts.sparseIndices.has_value()) {
-        tensor = at::sparse_coo_tensor(
-            opts.sparseIndices.value(), outputTensor, tensor.sizes());
-      } else {
-        tensor = outputTensor.to_sparse();
-      }
-    },
-    OpType::_ALLREDUCE_SPARSE,
-    "nccl:all_reduce_sparse");
-return work;
+  auto work = collective(
+      tensor,
+      outputTensor,
+      [&](at::Tensor& input,
+          at::Tensor& output,
+          ncclComm_t comm,
+          at::cuda::CUDAStream& stream) {
+        auto ncclDataType = getNcclDataType(input.scalar_type());
+        auto ncclReduceOp =
+            getNcclReduceOp(opts.reduceOp, input, ncclDataType, comm);
+
+        size_t num_elements = output.numel();
+        auto indices = input.indices();
+        auto sizes = input.sizes();
+        int colSize = sizes[1];
+        auto rows = indices[0];
+        size_t blockCount = rows.sizes()[0];
+        auto recvIndices = indices[0] * colSize;
+
+        // prevent output and recvIndices from being freed
+        c10::cuda::CUDACachingAllocator::recordStream(
+            output.storage().data_ptr(), stream);
+        c10::cuda::CUDACachingAllocator::recordStream(
+            recvIndices.storage().data_ptr(), stream);
+        auto result = ncclAllReduceSparseBlock(
+            input._values().data_ptr(), // sendbuff
+            recvIndices.data_ptr<int64_t>(), // recv_indices
+            blockCount, // block_count
+            colSize, // block_length
+            output.data_ptr(), // recvbuff
+            output.numel(), // recv_count
+            ncclDataType,
+            ncclReduceOp,
+            comm,
+            stream.stream());
+        return result;
+      },
+      [](at::cuda::CUDAStream& ncclStream,
+         c10::intrusive_ptr<ProcessGroupNCCL::WorkNCCL>& work) {},
+      [&](at::cuda::CUDAStream& ncclStream,
+          c10::intrusive_ptr<ProcessGroupNCCL::WorkNCCL>& work) {
+        // Convert output tensors to sparse and back into tensors.
+        at::cuda::CUDAStreamGuard guard(ncclStream);
+        if (opts.sparseIndices.has_value()) {
+          tensor = at::sparse_coo_tensor(
+              opts.sparseIndices.value(), outputTensor, tensor.sizes());
+        } else {
+          tensor = outputTensor.to_sparse();
+        }
+      },
+      OpType::_ALLREDUCE_SPARSE,
+      "nccl:all_reduce_sparse");
+  return work;
 #else
   // If the nccl branch is not "exp" then we just error
   C10_THROW_ERROR(

diff --git a/torch/utils/data/datapipes/gen_pyi.py b/torch/utils/data/datapipes/gen_pyi.py
@@ -70,7 +70,7 @@ def parse_datapipe_file(file_path: str) -> Tuple[Dict[str, str], Dict[str, str],
         open_paren_count = 0
         method_name, class_name, signature = "", "", ""
         skip = False
-        for line in f.readlines():
+        for line in f:
             if line.count("\"\"\"") % 2 == 1:
                 skip = not skip
             if skip or "\"\"\"" in line:  # Saving docstrings