From d2cf15fff0ae7e154a8c8785a625749802c64eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Gonz=C3=A1lez?= Date: Sat, 4 Jan 2025 17:32:53 +0100 Subject: [PATCH] Make server resource file checksumming code memory-efficient At the moment, the server resource file checksum calculation code reads the entire file to a memory buffer, which is then handed over to a MD5 hasher. Then, the file is copied to the HTTP cache directory by writing the buffer contents to a file in that directory. This algorithm is simple and works well for smaller resource files, but has O(n) space complexity, meaning that it may cause heap or address space exhaustion problems for bigger files, such as IMG archives, textures and sounds, and for resources with many files, which is a realistic concern especially since #3203. In fact, Discord user _Ben_ reported an out of memory server crash stemming from this code when trying to add big IMG archives to a resource. These changes make the algorithm have constant space complexity by not reading the file to a buffer and using the `CChecksum::GenerateChecksumFromFile` method instead. Because no buffer with the file contents is available any longer, `FileSize` is used to compute the resource file size hint, and `FileCopy` is used to copy the file to the HTTP cache directory. Thus, the new code should be much more scalable, but has a higher constant overhead due to the additional file operations for fetching its size and copying it. Luckily, opening a file to get its size is a fairly cheap operation in most sane environments, and reading it again for making a copy should hit the OS disk cache in RAM, making it also fairly cheap. All in all, I couldn't measure a negative performance impact for the common case of small resource files on a local filesystem. (Of course, I could indeed measure a favorable difference in memory usage during resource load.) In my view, a bigger potential concern would be the posibility of TOCTOU bugs that the repeated operations over the same file introduce: an external thread could race against the server thread to swap the contents of the file being checksummed after such checksum is computed, but before it's copied to the HTTP cache directory. After giving this concern some thought, I concluded that, fortunately, it's not a problem in this case because clients would detect a checksum mismatch after downloading the file, refusing to deal with it and thus sidestepping any security concerns. (Conceptually, this is similar to what would already happen if the HTTP cache directory gets corrupted.) --- Client/mods/deathmatch/logic/CResource.cpp | 5 +---- Server/mods/deathmatch/logic/CResource.cpp | 17 +++++++++-------- Server/mods/deathmatch/logic/CResourceFile.h | 6 +++--- .../logic/packets/CResourceStartPacket.cpp | 2 +- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Client/mods/deathmatch/logic/CResource.cpp b/Client/mods/deathmatch/logic/CResource.cpp index dcf119ce468..3fd85ced898 100644 --- a/Client/mods/deathmatch/logic/CResource.cpp +++ b/Client/mods/deathmatch/logic/CResource.cpp @@ -317,10 +317,7 @@ void CResource::Load() else if (pResourceFile->IsAutoDownload()) { // Check the file contents - if (CChecksum::GenerateChecksumFromFileUnsafe(pResourceFile->GetName()) == pResourceFile->GetServerChecksum()) - { - } - else + if (CChecksum::GenerateChecksumFromFileUnsafe(pResourceFile->GetName()) != pResourceFile->GetServerChecksum()) { HandleDownloadedFileTrouble(pResourceFile, false); } diff --git a/Server/mods/deathmatch/logic/CResource.cpp b/Server/mods/deathmatch/logic/CResource.cpp index 279f1937a76..e22d2bdbaab 100644 --- a/Server/mods/deathmatch/logic/CResource.cpp +++ b/Server/mods/deathmatch/logic/CResource.cpp @@ -510,13 +510,14 @@ std::future CResource::GenerateChecksumForFile(CResourceFile* pResource if (!GetFilePath(pResourceFile->GetName(), strPath)) return SString(); - std::vector buffer; - FileLoad(strPath, buffer); - uint uiFileSize = buffer.size(); - const char* pFileContents = uiFileSize ? buffer.data() : ""; - CChecksum Checksum = CChecksum::GenerateChecksumFromBuffer(pFileContents, uiFileSize); - pResourceFile->SetLastChecksum(Checksum); - pResourceFile->SetLastFileSize(uiFileSize); + auto checksumOrError = CChecksum::GenerateChecksumFromFile(strPath); + if (std::holds_alternative(checksumOrError)) + { + return SString(std::get(checksumOrError)); + } + + pResourceFile->SetLastChecksum(std::get(checksumOrError)); + pResourceFile->SetLastFileSizeHint(FileSize(strPath)); // Check if file is blocked char szHashResult[33]; @@ -547,7 +548,7 @@ std::future CResource::GenerateChecksumForFile(CResourceFile* pResource if (pResourceFile->GetLastChecksum() != cachedChecksum) { - if (!FileSave(strCachedFilePath, pFileContents, uiFileSize)) + if (!FileCopy(strPath, strCachedFilePath)) { return SString("Could not copy '%s' to '%s'\n", *strPath, *strCachedFilePath); } diff --git a/Server/mods/deathmatch/logic/CResourceFile.h b/Server/mods/deathmatch/logic/CResourceFile.h index b61f332f36d..2a4a6a2f2c0 100644 --- a/Server/mods/deathmatch/logic/CResourceFile.h +++ b/Server/mods/deathmatch/logic/CResourceFile.h @@ -45,7 +45,7 @@ class CResourceFile eResourceType m_type; class CLuaMain* m_pVM; CChecksum m_checksum; // Checksum last time this was loaded, generated by GenerateChecksum() - uint m_uiFileSize; + uint m_uiFileSizeHint; map m_attributeMap; // Map of attributes from the meta.xml file public: @@ -65,9 +65,9 @@ class CResourceFile CChecksum GetLastChecksum() { return m_checksum; } void SetLastChecksum(CChecksum checksum) { m_checksum = checksum; } - void SetLastFileSize(uint uiFileSize) { m_uiFileSize = uiFileSize; } + void SetLastFileSizeHint(uint uiFileSizeHint) { m_uiFileSizeHint = uiFileSizeHint; } - double GetApproxSize() { return m_uiFileSize; } // Only used by download counters + uint GetSizeHint() { return m_uiFileSizeHint; } // Only used by download counters string GetMetaFileAttribute(const string& key) { return m_attributeMap[key]; } SString GetCachedPathFilename(bool bForceClientCachePath = false); }; diff --git a/Server/mods/deathmatch/logic/packets/CResourceStartPacket.cpp b/Server/mods/deathmatch/logic/packets/CResourceStartPacket.cpp index 2b6062ae9ad..c27dbeb3c40 100644 --- a/Server/mods/deathmatch/logic/packets/CResourceStartPacket.cpp +++ b/Server/mods/deathmatch/logic/packets/CResourceStartPacket.cpp @@ -115,7 +115,7 @@ bool CResourceStartPacket::Write(NetBitStreamInterface& BitStream) const CChecksum checksum = (*iter)->GetLastChecksum(); BitStream.Write(checksum.ulCRC); BitStream.Write((const char*)checksum.md5.data, sizeof(checksum.md5.data)); - BitStream.Write((*iter)->GetApproxSize()); + BitStream.Write((double)(*iter)->GetSizeHint()); // Has to be double for bitstream format compatibility if ((*iter)->GetType() == CResourceScriptItem::RESOURCE_FILE_TYPE_CLIENT_FILE) { CResourceClientFileItem* pRCFItem = reinterpret_cast(*iter);