Skip to content

Commit

Permalink
[linux-kernel] Backport PR facebook#2863
Browse files Browse the repository at this point in the history
Improve the codegen for zstd fast & double fast by shrinking the
function size, which also improves the stack usage on some compilers.
See PR facebook#2863 [0] for details.

[0] facebook#2863
  • Loading branch information
terrelln committed Nov 16, 2021
1 parent 40e5bad commit 93ec0d5
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 26 deletions.
61 changes: 48 additions & 13 deletions lib/compress/zstd_double_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,26 @@ size_t ZSTD_compressBlock_doubleFast_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(dictMode, mls) \
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_##dictMode); \
}

ZSTD_GEN_FN(noDict, 4)
ZSTD_GEN_FN(noDict, 5)
ZSTD_GEN_FN(noDict, 6)
ZSTD_GEN_FN(noDict, 7)

ZSTD_GEN_FN(dictMatchState, 4)
ZSTD_GEN_FN(dictMatchState, 5)
ZSTD_GEN_FN(dictMatchState, 6)
ZSTD_GEN_FN(dictMatchState, 7)

#undef ZSTD_GEN_FN


size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Expand All @@ -323,13 +343,13 @@ size_t ZSTD_compressBlock_doubleFast(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
}
}

Expand All @@ -343,13 +363,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
}
}

Expand Down Expand Up @@ -385,7 +405,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(

/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);

/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
Expand Down Expand Up @@ -499,6 +519,21 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}


#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_doubleFast_extDict_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
Expand All @@ -508,12 +543,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}
69 changes: 56 additions & 13 deletions lib/compress/zstd_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,20 @@ ZSTD_compressBlock_fast_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_fast_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Expand All @@ -193,13 +207,13 @@ size_t ZSTD_compressBlock_fast(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_7(ms, seqStore, rep, src, srcSize);
}
}

Expand Down Expand Up @@ -351,6 +365,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_fast_dictMatchState_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
Expand All @@ -361,13 +390,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
}
}

Expand Down Expand Up @@ -402,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(

/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);

/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
Expand Down Expand Up @@ -475,6 +504,20 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_fast_extDict_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Expand All @@ -485,12 +528,12 @@ size_t ZSTD_compressBlock_fast_extDict(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_extDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_extDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_extDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}

0 comments on commit 93ec0d5

Please sign in to comment.