diff --git a/io/io/inc/TBufferFile.h b/io/io/inc/TBufferFile.h index 3d2e37141fcc4..2c36d15b6335e 100644 --- a/io/io/inc/TBufferFile.h +++ b/io/io/inc/TBufferFile.h @@ -52,6 +52,16 @@ class TBufferFile : public TBufferIO { TStreamerInfo *fInfo{nullptr}; ///< Pointer to TStreamerInfo object writing/reading the buffer InfoList_t fInfoStack; ///< Stack of pointers to the TStreamerInfos + using ByteCountLocator_t = std::size_t; // This might become a pair if we implement chunked keys + using ByteCountStack_t = std::vector; + ByteCountStack_t fByteCountStack; ///; + // fByteCounts will be stored either in the header/summary tkey or at the end + // of the last segment/chunk for a large TKey. + ByteCountFinder_t fByteCounts; ///< Map to find the byte count value for a given position + // Default ctor TBufferFile() {} // NOLINT: not allowed to use = default because of TObject::kIsOnHeap detection, see ROOT-10300 @@ -62,6 +72,7 @@ class TBufferFile : public TBufferIO { Long64_t CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const TClass *clss, const char* classname); void CheckCount(UInt_t offset) override; UInt_t CheckObject(UInt_t offset, const TClass *cl, Bool_t readClass = kFALSE); + UInt_t ReserveByteCount(); void WriteObjectClass(const void *actualObjStart, const TClass *actualClass, Bool_t cacheReuse) override; @@ -106,6 +117,9 @@ class TBufferFile : public TBufferIO { TObject *ReadObject(const TClass *cl) override; + const ByteCountFinder_t& GetByteCounts() const { return fByteCounts; } + void SetByteCounts(ByteCountFinder_t &&byteCounts) { fByteCounts = std::move(byteCounts); } + using TBufferIO::CheckObject; // basic types and arrays of basic types diff --git a/io/io/src/TBufferFile.cxx b/io/io/src/TBufferFile.cxx index d169d2348b347..4cbf1f321097d 100644 --- a/io/io/src/TBufferFile.cxx +++ b/io/io/src/TBufferFile.cxx @@ -323,7 +323,26 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) && (fBufCur >= fBuffer) && static_cast(fBufCur - fBuffer) <= std::numeric_limits::max() && "Byte count position is after the end of the buffer"); - const UInt_t cnt = UInt_t(fBufCur - fBuffer) - UInt_t(cntpos) - sizeof(UInt_t); + + // We can either make this unconditional or we could split the routine + // in two, one with a new signature and guarantee to get the 64bit position + // (which may be chunk number + local offset) and one with the old signature + // which uses the stack to get the position and call the new one. + // This (of course) also requires that we do the 'same' to the WriteVersion + // routines. + R__ASSERT( !fByteCountStack.empty() ); + if (cntpos == kMaxInt) { + cntpos = fByteCountStack.back(); + } + fByteCountStack.pop_back(); + // if we are not in the same TKey chunk or if the cntpos is too large to fit in UInt_t + // let's postpone the writing of the byte count + const ULong64_t full_cnt = ULong64_t(fBufCur - fBuffer) - cntpos - sizeof(UInt_t); + if (full_cnt >= kMaxMapCount) { + fByteCounts[cntpos] = full_cnt; + return; + } + UInt_t cnt = static_cast(full_cnt); char *buf = (char *)(fBuffer + cntpos); // if true, pack byte count in two consecutive shorts, so it can @@ -360,10 +379,25 @@ void TBufferFile::SetByteCount(ULong64_t cntpos, Bool_t packInVersion) Long64_t TBufferFile::CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const TClass *clss, const char *classname) { - if (!bcnt) return 0; - R__ASSERT(startpos <= kMaxUInt && bcnt <= kMaxUInt); - Long64_t offset = 0; + R__ASSERT(!fByteCountStack.empty()); + if (startpos == kMaxInt) { + // The position is above 1GB and was cached using a 32 bit variable. + startpos = fByteCountStack.back(); + } + if (bcnt == kMaxInt) { + // in case we are checking a byte count for which we postponed + // the writing because it was too large, we retrieve it now + auto it = fByteCounts.find(startpos); + if (it != fByteCounts.end()) { + bcnt = it->second; + } + } + fByteCountStack.pop_back(); + + if (!bcnt || startpos == kMaxInt) + return 0; + Long64_t offset = 0; Longptr_t endpos = Longptr_t(fBuffer) + startpos + bcnt + sizeof(UInt_t); if (Longptr_t(fBufCur) != endpos) { @@ -390,7 +424,7 @@ Long64_t TBufferFile::CheckByteCount(ULong64_t startpos, ULong64_t bcnt, const T if ( ((char *)endpos) > fBufMax ) { offset = fBufMax-fBufCur; Error("CheckByteCount", - "Byte count probably corrupted around buffer position %llu:\n\t%llu for a possible maximum of %lld", + "Byte count probably corrupted around buffer position %llu:\n\tByte count is %llu while the buffer size is %lld", startpos, bcnt, offset); fBufCur = fBufMax; @@ -2694,8 +2728,7 @@ void TBufferFile::WriteObjectClass(const void *actualObjectStart, const TClass * } // reserve space for leading byte count - UInt_t cntpos = UInt_t(fBufCur-fBuffer); - fBufCur += sizeof(UInt_t); + UInt_t cntpos = ReserveByteCount(); // write class of object first Int_t mapsize = fMap->Capacity(); // The slot depends on the capacity and WriteClass might induce an increase. @@ -2749,6 +2782,8 @@ TClass *TBufferFile::ReadClass(const TClass *clReq, UInt_t *objTag) bcnt = 0; } else { fVersion = 1; + if (objTag) + fByteCountStack.push_back(fBufCur - fBuffer); startpos = UInt_t(fBufCur-fBuffer); *this >> tag; } @@ -2932,7 +2967,10 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass if (startpos) { // before reading object save start position - *startpos = UInt_t(fBufCur-fBuffer); + auto full_startpos = fBufCur - fBuffer; + *startpos = full_startpos < kMaxInt ? UInt_t(full_startpos) : kMaxInt; + if (bcnt) + fByteCountStack.push_back(full_startpos); } // read byte count (older files don't have byte count) @@ -2956,9 +2994,29 @@ Version_t TBufferFile::ReadVersion(UInt_t *startpos, UInt_t *bcnt, const TClass fBufCur -= sizeof(UInt_t); v.cnt = 0; } - if (bcnt) *bcnt = (v.cnt & ~kByteCountMask); + if (bcnt) { + if (!v.cnt) { + // no byte count stored + *bcnt = 0; + fByteCountStack.pop_back(); + } else { + *bcnt = (v.cnt & ~kByteCountMask); + if (*bcnt == 0) { + // The byte count was stored but is zero, this means the data + // did not fit and thus we stored it in 'fByteCounts' instead. + // Mark this case by setting startpos to kMaxInt. + *bcnt = kMaxInt; + if (startpos) + *startpos = kMaxInt; + } + } + } frombuf(this->fBufCur,&version); + // NOTE: The code above is not straightforward to refactor by a call + // to ReadVersionNoCheckSum because of the following code need the + // 'raw' value in `v`. + if (version<=1) { if (version <= 0) { if (cl) { @@ -3038,7 +3096,10 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) if (startpos) { // before reading object save start position - *startpos = UInt_t(fBufCur-fBuffer); + auto full_startpos = fBufCur - fBuffer; + *startpos = full_startpos < kMaxInt ? UInt_t(full_startpos) : kMaxInt; + if (bcnt) + fByteCountStack.push_back(full_startpos); } // read byte count (older files don't have byte count) @@ -3062,7 +3123,23 @@ Version_t TBufferFile::ReadVersionNoCheckSum(UInt_t *startpos, UInt_t *bcnt) fBufCur -= sizeof(UInt_t); v.cnt = 0; } - if (bcnt) *bcnt = (v.cnt & ~kByteCountMask); + if (bcnt) { + if (!v.cnt) { + // no byte count stored + *bcnt = 0; + fByteCountStack.pop_back(); + } else { + *bcnt = (v.cnt & ~kByteCountMask); + if (*bcnt == 0) { + // The byte count was stored but is zero, this means the data + // did not fit and thus we stored it in 'fByteCounts' instead. + // Mark this case by setting startpos to kMaxInt. + *bcnt = kMaxInt; + if (startpos) + *startpos = kMaxInt; + } + } + } frombuf(this->fBufCur,&version); return version; @@ -3148,8 +3225,7 @@ UInt_t TBufferFile::WriteVersion(const TClass *cl, Bool_t useBcnt) UInt_t cntpos = 0; if (useBcnt) { // reserve space for leading byte count - cntpos = UInt_t(fBufCur-fBuffer); - fBufCur += sizeof(UInt_t); + cntpos = ReserveByteCount(); } Version_t version = cl->GetClassVersion(); @@ -3178,8 +3254,7 @@ UInt_t TBufferFile::WriteVersionMemberWise(const TClass *cl, Bool_t useBcnt) UInt_t cntpos = 0; if (useBcnt) { // reserve space for leading byte count - cntpos = UInt_t(fBufCur-fBuffer); - fBufCur += sizeof(UInt_t); + cntpos = ReserveByteCount(); } Version_t version = cl->GetClassVersion(); @@ -3327,6 +3402,22 @@ UInt_t TBufferFile::CheckObject(UInt_t offset, const TClass *cl, Bool_t readClas return offset; } +//////////////////////////////////////////////////////////////////////////////// +/// Reserve space for a leading byte count and return the position where to +/// store the byte count value. +/// +/// \param[in] mask The mask to apply to the placeholder value (default kByteCountMask) +/// \return The position (cntpos) where the byte count should be stored later, +/// or 0 if the position exceeds kMaxInt + +UInt_t TBufferFile::ReserveByteCount() +{ + // reserve space for leading byte count + auto full_cntpos = fBufCur - fBuffer; + fByteCountStack.push_back(full_cntpos); + *this << (UInt_t)kByteCountMask; // placeholder for byte count + return full_cntpos < kMaxInt ? full_cntpos : kMaxInt; +} //////////////////////////////////////////////////////////////////////////////// /// Read max bytes from the I/O buffer into buf. The function returns diff --git a/tree/tree/src/TBranch.cxx b/tree/tree/src/TBranch.cxx index 6b9d1102625a9..768434017ea86 100644 --- a/tree/tree/src/TBranch.cxx +++ b/tree/tree/src/TBranch.cxx @@ -979,6 +979,7 @@ Int_t TBranch::FillEntryBuffer(TBasket* basket, TBuffer* buf, Int_t& lnew) fEntryBuffer->SetBufferOffset(objectStart); *fEntryBuffer >> tag; if (tag & kByteCountMask) { + // Ignore byte count. *fEntryBuffer >> tag; } if (tag == kNewClassTag) { @@ -1457,7 +1458,7 @@ bool TBranch::SupportsBulkRead() const { /// the number of elements corresponding to each entries. /// /// For each entry the number of elements is the multiplication of -/// +/// /// ~~~{.cpp} /// TLeaf *leaf = static_cast(branch->GetListOfLeaves()->At(0)); /// auto len = leaf->GetLen();