From 52033faaffe768c6f6e58f7d111bfc1a55880ff3 Mon Sep 17 00:00:00 2001 From: Ayham Tannous Date: Mon, 6 Nov 2023 15:03:11 -0800 Subject: [PATCH] Add file name and size to the serialization metadata logging (#113077) Summary: To be able to get more info on serialization/deserialization events, adding these two files to the metadata logging. - file_name - file_size Test Plan: buck2 test mode/dev caffe2/caffe2/serialize:inline_container_test Reviewed By: davidberard98 Differential Revision: D51040426 --- caffe2/serialize/inline_container.cc | 19 ++++++++++++------- caffe2/serialize/inline_container_test.cc | 9 +++++++-- torch/package/package_importer.py | 5 ++++- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/caffe2/serialize/inline_container.cc b/caffe2/serialize/inline_container.cc index fbf96a1db2356..e836d24c3e045 100644 --- a/caffe2/serialize/inline_container.cc +++ b/caffe2/serialize/inline_container.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -88,7 +89,7 @@ static std::string basename(const std::string& name) { static std::string parentdir(const std::string& name) { size_t end = name.find_last_of('/'); - if(end == std::string::npos) { + if (end == std::string::npos) { end = name.find_last_of('\\'); } @@ -179,7 +180,9 @@ void PyTorchStreamReader::init() { } c10::LogAPIUsageMetadata( "pytorch.stream.reader.metadata", - {{"serialization_id", serialization_id_}}); + {{"serialization_id", serialization_id_}, + {"file_name", archive_name_}, + {"file_size", str(mz_zip_get_archive_size(ar_.get()))}}); // version check at::DataPtr version_ptr; @@ -700,8 +703,8 @@ void PyTorchStreamWriter::writeEndOfFile() { ~Finalizer() { var_ = true; } - private: - bool& var_; + private: + bool& var_; } f(finalized_); auto allRecords = getAllWrittenRecords(); @@ -736,6 +739,11 @@ void PyTorchStreamWriter::writeEndOfFile() { mz_zip_writer_finalize_archive(ar_.get()); mz_zip_writer_end(ar_.get()); valid("writing central directory for archive ", archive_name_.c_str()); + c10::LogAPIUsageMetadata( + "pytorch.stream.writer.metadata", + {{"serialization_id", serialization_id_}, + {"file_name", archive_name_}, + {"file_size", str(mz_zip_get_archive_size(ar_.get()))}}); if (file_stream_.is_open()) { file_stream_.close(); } @@ -779,9 +787,6 @@ void PyTorchStreamWriter::writeSerializationId() { kSerializationIdRecordName, serialization_id_.c_str(), serialization_id_.size()); - c10::LogAPIUsageMetadata( - "pytorch.stream.writer.metadata", - {{"serialization_id", serialization_id_}}); } } diff --git a/caffe2/serialize/inline_container_test.cc b/caffe2/serialize/inline_container_test.cc index b2313d39e6a9c..4e027f681961d 100644 --- a/caffe2/serialize/inline_container_test.cc +++ b/caffe2/serialize/inline_container_test.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -408,9 +409,13 @@ TEST(PytorchStreamWriterAndReader, LogAPIUsageMetadata) { ASSERT_EQ(logs.size(), 2); std::map> expected_logs = { {"pytorch.stream.writer.metadata", - {{"serialization_id", writer.serializationId()}}}, + {{"serialization_id", writer.serializationId()}, + {"file_name", "archive"}, + {"file_size", str(oss.str().length())}}}, {"pytorch.stream.reader.metadata", - {{"serialization_id", writer.serializationId()}}} + {{"serialization_id", writer.serializationId()}, + {"file_name", "archive"}, + {"file_size", str(iss.str().length())}}} }; ASSERT_EQ(expected_logs, logs); diff --git a/torch/package/package_importer.py b/torch/package/package_importer.py index 13b96f13d8775..e32bc61444ce4 100644 --- a/torch/package/package_importer.py +++ b/torch/package/package_importer.py @@ -101,7 +101,10 @@ def __init__( torch._C._log_api_usage_metadata( "torch.package.PackageImporter.metadata", - {"serialization_id": self.zip_reader.serialization_id()}, + { + "serialization_id": self.zip_reader.serialization_id(), + "file_name": self.filename + }, ) self.root = _PackageNode(None)