From 2c7f8190b832a11a5eb2fdf8b32386db8b44d051 Mon Sep 17 00:00:00 2001 From: mimimi085181 Date: Sun, 1 Feb 2015 15:36:19 +0100 Subject: [PATCH] Allow multiple texture cache entries for textures at the same address This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop. The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again. Since this would break efb copies, don't do it for efb copies. Castlevania 3 goes from 80 fps to 115 fps for me. There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR. Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916) --- Source/Core/VideoCommon/TextureCacheBase.cpp | 69 ++++++++++++++------ Source/Core/VideoCommon/TextureCacheBase.h | 2 +- 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index b40f9d0882e0..0263cd99383f 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -329,31 +329,47 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) palette_size = TexDecoder_GetPaletteSize(texformat); u64 tlut_hash = GetHash64(&texMem[tlutaddr], palette_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); - // Mix the tlut hash into the texture hash. So we only have to compare it one. + // Mix the tlut hash into the texture hash. So we only have to compare it once. tex_hash ^= tlut_hash; - // NOTE: For non-paletted textures, texID is equal to the texture address. - // A paletted texture, however, may have multiple texIDs assigned though depending on the currently used tlut. - // This (changing texID depending on the tlut_hash) is a trick to get around - // an issue with Metroid Prime's fonts (it has multiple sets of fonts on each other - // stored in a single texture and uses the palette to make different characters - // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, - // we must make sure that a paletted texture gets assigned multiple IDs for each tlut used. - // - // EFB copys however didn't know anything about the tlut, so don't change the texID if there - // already is an efb copy at this source. This makes those textures less broken when using efb to texture. - // Examples are the mini map in Twilight Princess and objects on the targetting computer in Rogue Squadron 2(RS2). - // TODO: Convert those textures using the right palette, so they display correctly - auto iter = textures.find(texID); - if (iter == textures.end() || !iter->second->IsEfbCopy()) - texID ^= ((u32)tlut_hash) ^(u32)(tlut_hash >> 32); + // TODO: Convert paletted textures, which are efb copies, using the right palette, so they display correctly } // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain // e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we limit the mipmap count to 6 there tex_levels = std::min(IntLog2(std::max(width, height)) + 1, tex_levels); - TCacheEntryBase*& entry = textures[texID]; + TCacheEntryBase* entry = nullptr; + + // Use multiple cache entries for textures at the same address, if they are loaded during the same frame. If this is not done, there is + // only one texture cache entry for those textures, which gets overwritten and recreated all the time, so effectively none of those + // textures is cached. Multiple cache entries prevent slowdowns in Metroid Prime and Castlevania 3. Metroid Prime has multiple sets of + // fonts on each other stored in a single texture and uses the palette to make different characters visible or invisible. In Castlevania 3 + // all textures use the same address. If there were multiple cache entries for all textures at the same address, no texture cache entries + // would get overwritten at all. Overwriting a cache entry is faster than creating a new one, if it's unlikely that the old entry is used + // again. The restriction to textures loaded during the same frame, seems to be a good heuristic to determine when to use muliple cache + // entries and when not. + // For efb copies, the entry from CopyRenderTargetToTexture has to be used, or else it was done in vain. + std::pair iter_range = textures.equal_range(texID); + TexCache::iterator iter = iter_range.first; + if (iter != iter_range.second) + { + if (iter->second->IsEfbCopy() || (iter->second->frameCount != FRAMECOUNT_INVALID && std::next(iter, 1) == iter_range.second)) + { + entry = iter->second; + } + else + { + while (!entry && iter != iter_range.second) + { + if (tex_hash == iter->second->hash) + entry = iter->second; + else + ++iter; + } + } + } + if (entry) { // 1. Calculate reference hash: @@ -379,6 +395,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) // pool this texture and make a new one later FreeTexture(entry); + textures.erase(iter); } std::unique_ptr hires_tex; @@ -431,9 +448,12 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) config.width = width; config.height = height; config.levels = texLevels; + entry = AllocateTexture(config); GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); + textures.insert(TexCache::value_type(texID, entry)); + entry->SetGeneralParameters(address, texture_size, full_format); entry->SetDimensions(nativeW, nativeH, tex_levels); entry->hash = tex_hash; @@ -791,9 +811,14 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat unsigned int scaled_tex_w = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledX(tex_w) : tex_w; unsigned int scaled_tex_h = g_ActiveConfig.bCopyEFBScaled ? Renderer::EFBToScaledY(tex_h) : tex_h; - TCacheEntryBase*& entry = textures[dstAddr]; - if (entry) - FreeTexture(entry); + // remove all texture cache entries at dstAddr + std::pair iter_range = textures.equal_range(dstAddr); + TexCache::iterator iter = iter_range.first; + while (iter != iter_range.second) + { + FreeTexture(iter->second); + iter = textures.erase(iter); + } // create the texture TCacheEntryConfig config; @@ -802,7 +827,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat config.height = scaled_tex_h; config.layers = FramebufferManagerBase::GetEFBLayers(); - entry = AllocateTexture(config); + TCacheEntryBase* entry = AllocateTexture(config); // TODO: Using the wrong dstFormat, dumb... entry->SetGeneralParameters(dstAddr, 0, dstFormat); @@ -812,6 +837,8 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat entry->frameCount = FRAMECOUNT_INVALID; entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat); + + textures.insert(TexCache::value_type(dstAddr, entry)); } TextureCache::TCacheEntryBase* TextureCache::AllocateTexture(const TCacheEntryConfig& config) diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 1d4ebe9ba7e4..0ef6a1ff5436 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -135,7 +135,7 @@ class TextureCache static TCacheEntryBase* AllocateTexture(const TCacheEntryConfig& config); static void FreeTexture(TCacheEntryBase* entry); - typedef std::map TexCache; + typedef std::multimap TexCache; typedef std::unordered_multimap TexPool; static TexCache textures;