From de5a0adca1a7d08b1233b317ec092dbf19263d2f Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Mon, 13 Feb 2012 18:31:51 -0800 Subject: [PATCH] Remove explicit run calculation from coeff_level_run Not necessary with the CAVLC lookup table for zero run codes. --- common/bitstream.h | 1 - common/quant.c | 7 ++----- common/vlc.c | 4 +++- common/x86/quant-a.asm | 9 +++++---- tools/checkasm.c | 3 +-- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/common/bitstream.h b/common/bitstream.h index d7289615..3beb1ea9 100644 --- a/common/bitstream.h +++ b/common/bitstream.h @@ -58,7 +58,6 @@ typedef struct int last; int mask; dctcoef level[16]; - uint8_t run[16]; } x264_run_level_t; extern const vlc_t x264_coeff0_token[6]; diff --git a/common/quant.c b/common/quant.c index cc085269..17a6e1c6 100644 --- a/common/quant.c +++ b/common/quant.c @@ -376,12 +376,9 @@ static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel ) int mask = 0;\ do\ {\ - int r = 0;\ - runlevel->level[i_total] = dct[i_last];\ + runlevel->level[i_total++] = dct[i_last];\ mask |= 1 << (i_last);\ - while( --i_last >= 0 && dct[i_last] == 0 )\ - r++;\ - runlevel->run[i_total++] = r;\ + while( --i_last >= 0 && dct[i_last] == 0 );\ } while( i_last >= 0 );\ runlevel->mask = mask;\ return i_total;\ diff --git a/common/vlc.c b/common/vlc.c index 12bdad00..8af36de1 100644 --- a/common/vlc.c +++ b/common/vlc.c @@ -852,15 +852,17 @@ void x264_cavlc_init( x264_t *h ) dct[j] = i&(1<quantf.coeff_level_run[DCT_LUMA_4x4]( dct, &runlevel ); int zeros = runlevel.last + 1 - total; + uint32_t mask = i << (x264_clz( i ) + 1); for( int j = 0; j < total-1 && zeros > 0; j++ ) { int idx = X264_MIN(zeros, 7) - 1; - int run = runlevel.run[j]; + int run = x264_clz( mask ); int len = run_before[idx][run].i_size; size += len; bits <<= len; bits |= run_before[idx][run].i_bits; zeros -= run; + mask <<= run + 1; } x264_run_before[i] = (bits << 5) + size; } diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm index 456cce6a..970811f8 100644 --- a/common/x86/quant-a.asm +++ b/common/x86/quant-a.asm @@ -1368,15 +1368,16 @@ cglobal coeff_level_run%1,0,7 LZCOUNT t3d, t5d, 0x1f %if HIGH_BIT_DEPTH mov t2d, [t0+t4*4] - mov [t1+t6+8+16*4], t3b - mov [t1+t6*4+ 8], t2d %else mov t2w, [t0+t4*2] - mov [t1+t6+8+16*2], t3b - mov [t1+t6*2+ 8], t2w %endif inc t3d shl t5d, t3b +%if HIGH_BIT_DEPTH + mov [t1+t6*4+ 8], t2d +%else + mov [t1+t6*2+ 8], t2w +%endif inc t6d sub t4d, t3d jge .loop diff --git a/tools/checkasm.c b/tools/checkasm.c index a340fffe..630a01d4 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -2052,8 +2052,7 @@ static int check_quant( int cpu_ref, int cpu_new ) int result_a = call_a( qf_a.lastname, dct1+ac, &runlevel_a ); \ if( result_c != result_a || runlevel_c.last != runlevel_a.last || \ runlevel_c.mask != runlevel_a.mask || \ - memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c) || \ - memcmp(runlevel_c.run, runlevel_a.run, sizeof(uint8_t)*(result_c-1)) ) \ + memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c)) \ { \ ok = 0; \ fprintf( stderr, #name ": [FAILED]\n" ); \