Skip to content
Browse files

Add --subme 11, which disables all early terminations in analysis

Necessary for a future trellis mode decision/motion estimation patch.
Also add the slowest presets to the regression test.
  • Loading branch information...
1 parent 207ca3e commit 9977b595fb7591e3616fa98677baf6e84e0f7029 @danielhlarkin danielhlarkin committed with Jason Garrett-Glaser Jul 13, 2011
Showing with 47 additions and 38 deletions.
  1. +1 −1 common/common.c
  2. +36 −31 encoder/analyse.c
  3. +2 −2 encoder/encoder.c
  4. +1 −0 encoder/me.c
  5. +3 −1 tools/test_x264.py
  6. +4 −3 x264.c
View
2 common/common.c
@@ -275,7 +275,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
else if( !strcasecmp( preset, "placebo" ) )
{
param->analyse.i_me_method = X264_ME_TESA;
- param->analyse.i_subpel_refine = 10;
+ param->analyse.i_subpel_refine = 11;
param->analyse.i_me_range = 24;
param->i_frame_reference = 16;
param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
View
67 encoder/analyse.c
@@ -132,6 +132,7 @@ typedef struct
int i_mb_type8x16;
int b_direct_available;
+ int b_early_terminate;
} x264_mb_analysis_t;
@@ -416,6 +417,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
/* mbrd == 3 -> QPRD */
a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
+ a->b_early_terminate = h->param.analyse.i_subpel_refine < 11;
x264_mb_analyse_init_qp( h, a, qp );
@@ -560,7 +562,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
}
/* Fast intra decision */
- if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
+ if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
{
/* Always run in fast-intra mode for subme < 3 */
if( h->mb.i_subpel_refine > 2 &&
@@ -927,18 +929,18 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
}
/* Not heavily tuned */
static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
- if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
+ if( a->b_early_terminate && X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
return;
}
/* 4x4 prediction selection */
if( flags & X264_ANALYSE_I4x4 )
{
int i_cost = lambda * (24+16); /* 24from JVT (SATD0), 16 from base predmode costs */
- int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 );
+ int i_satd_thresh = a->b_early_terminate ? X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 ) : COST_MAX;
h->mb.i_cbp_luma = 0;
- if( a->i_mbrd )
+ if( a->b_early_terminate && a->i_mbrd )
i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8;
if( h->sh.i_type == SLICE_TYPE_B )
@@ -1033,6 +1035,9 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
{
+ if( !a->b_early_terminate )
+ i_satd_thresh = COST_MAX;
+
if( a->i_satd_i16x16 < i_satd_thresh )
{
h->mb.i_type = I_16x16;
@@ -1072,7 +1077,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
{
int old_pred_mode = a->i_predict16x16;
const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
- int i_thresh = a->i_satd_i16x16_dir[old_pred_mode] * 9/8;
+ int i_thresh = a->b_early_terminate ? a->i_satd_i16x16_dir[old_pred_mode] * 9/8 : COST_MAX;
i_best = a->i_satd_i16x16;
for( ; *predict_mode >= 0; predict_mode++ )
{
@@ -1093,7 +1098,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
{
int8_t predict_mode_sorted[4];
int i_max;
- int i_thresh = a->i_satd_i8x8chroma * 5/4;
+ int i_thresh = a->b_early_terminate ? a->i_satd_i8x8chroma * 5/4 : COST_MAX;
for( i_max = 0; *predict_mode >= 0; predict_mode++ )
{
@@ -1197,7 +1202,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
h->mb.pic.p_fdec[1] + 8*x + 8*y*FDEC_STRIDE,
h->mb.pic.p_fdec[2] + 8*x + 8*y*FDEC_STRIDE};
int cbp_luma_new = 0;
- int i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8;
+ int i_thresh = a->b_early_terminate ? a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8 : COST_MAX;
i_best = COST_MAX64;
@@ -1298,7 +1303,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
int i_mvc;
ALIGNED_4( int16_t mvc[8][2] );
int i_halfpel_thresh = INT_MAX;
- int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
+ int *p_halfpel_thresh = (a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh : NULL;
/* 16x16 Search on all ref frame */
m.i_pixel = PIXEL_16x16;
@@ -1388,8 +1393,8 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
/* early termination: if 16x16 chose ref 0, then evalute no refs older
* than those used by the neighbors */
- if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
- h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
+ if( a->b_early_terminate && (i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
+ h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0) )
{
i_maxref = 0;
CHECK_NEIGHBOUR( -8 - 1 );
@@ -1572,7 +1577,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i
/* Early termination based on the current SATD score of partition[0]
plus the estimated SATD score of partition[1] */
- if( !i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
+ if( a->b_early_terminate && (!i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4) )
{
a->l0.i_cost16x8 = COST_MAX;
return;
@@ -1637,7 +1642,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
/* Early termination based on the current SATD score of partition[0]
plus the estimated SATD score of partition[1] */
- if( !i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
+ if( a->b_early_terminate && (!i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4) )
{
a->l0.i_cost8x16 = COST_MAX;
return;
@@ -1923,8 +1928,8 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
int try_skip = a->b_try_skip;
int list1_skipped = 0;
int i_halfpel_thresh[2] = {INT_MAX, INT_MAX};
- int *p_halfpel_thresh[2] = {h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh[0] : NULL,
- h->mb.pic.i_fref[1]>1 ? &i_halfpel_thresh[1] : NULL};
+ int *p_halfpel_thresh[2] = {(a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh[0] : NULL,
+ (a->b_early_terminate && h->mb.pic.i_fref[1]>1) ? &i_halfpel_thresh[1] : NULL};
x264_me_t m;
m.i_pixel = PIXEL_16x16;
@@ -2454,8 +2459,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
/* Early termination based on the current SATD score of partition[0]
plus the estimated SATD score of partition[1] */
- if( !i && i_part_cost + a->i_cost_est16x8[1] > i_best_satd
- * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16 )
+ if( a->b_early_terminate && (!i && i_part_cost + a->i_cost_est16x8[1] > i_best_satd
+ * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16) )
{
a->i_cost16x8bi = COST_MAX;
return;
@@ -2547,8 +2552,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
/* Early termination based on the current SATD score of partition[0]
plus the estimated SATD score of partition[1] */
- if( !i && i_part_cost + a->i_cost_est8x16[1] > i_best_satd
- * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16 )
+ if( a->b_early_terminate && (!i && i_part_cost + a->i_cost_est8x16[1] > i_best_satd
+ * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16) )
{
a->i_cost8x16bi = COST_MAX;
return;
@@ -2566,10 +2571,10 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
{
- int thresh = i_satd * 5/4 + 1;
+ int thresh = a->b_early_terminate ? i_satd * 5/4 + 1 : COST_MAX;
h->mb.i_type = P_L0;
- if( a->l0.i_rd16x16 == COST_MAX && a->l0.me16x16.cost <= i_satd * 3/2 )
+ if( a->l0.i_rd16x16 == COST_MAX && (!a->b_early_terminate || a->l0.me16x16.cost <= i_satd * 3/2) )
{
h->mb.i_partition = D_16x16;
x264_analyse_update_cache( h, a );
@@ -2609,7 +2614,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
for( int i = 0; i < 4; i++ )
{
int costs[4] = {a->l0.i_cost4x4[i], a->l0.i_cost8x4[i], a->l0.i_cost4x8[i], a->l0.me8x8[i].cost};
- int sub8x8_thresh = X264_MIN4( costs[0], costs[1], costs[2], costs[3] ) * 5 / 4;
+ int sub8x8_thresh = a->b_early_terminate ? X264_MIN4( costs[0], costs[1], costs[2], costs[3] ) * 5 / 4 : COST_MAX;
int subtype, btype = D_L0_8x8;
uint64_t bcost = COST_MAX64;
for( subtype = D_L0_4x4; subtype <= D_L0_8x8; subtype++ )
@@ -2639,7 +2644,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
{
- int thresh = i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1;
+ int thresh = a->b_early_terminate ? i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1 : COST_MAX;
if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )
{
@@ -3020,8 +3025,8 @@ void x264_macroblock_analyse( x264_t *h )
i_partition = D_16x16;
i_cost = analysis.l0.me16x16.cost;
- if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
- analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
+ if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate ||
+ analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost) )
{
i_type = P_8x8;
i_partition = D_8x8;
@@ -3033,7 +3038,7 @@ void x264_macroblock_analyse( x264_t *h )
for( int i = 0; i < 4; i++ )
{
x264_mb_analyse_inter_p4x4( h, &analysis, i );
- if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
+ if( !analysis.b_early_terminate || analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
{
int i_cost8x8 = analysis.l0.i_cost4x4[i];
h->mb.i_sub_partition[i] = D_L0_4x4;
@@ -3056,8 +3061,8 @@ void x264_macroblock_analyse( x264_t *h )
/* Now do 16x8/8x16 */
i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv;
- if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
- analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 )
+ if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate ||
+ analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8) )
{
int i_avg_mv_ref_cost = (analysis.l0.me8x8[2].cost_mv + analysis.l0.me8x8[2].i_ref_cost
+ analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
@@ -3364,7 +3369,7 @@ void x264_macroblock_analyse( x264_t *h )
COPY2_IF_LT( i_cost, analysis.i_cost16x16bi, i_type, B_BI_BI );
COPY2_IF_LT( i_cost, analysis.i_cost16x16direct, i_type, B_DIRECT );
- if( analysis.i_mbrd && analysis.i_cost16x16direct <= i_cost * 33/32 )
+ if( analysis.i_mbrd && analysis.b_early_terminate && analysis.i_cost16x16direct <= i_cost * 33/32 )
{
x264_mb_analyse_b_rd( h, &analysis, i_cost );
if( i_bskip_cost < analysis.i_rd16x16direct &&
@@ -3431,17 +3436,17 @@ void x264_macroblock_analyse( x264_t *h )
/* We can gain a little speed by checking the mode with the lowest estimated cost first */
int try_16x8_first = i_cost_est16x8bi_total < i_cost_est8x16bi_total;
- if( try_16x8_first && i_cost_est16x8bi_total < i_cost )
+ if( try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
{
x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
}
- if( i_cost_est8x16bi_total < i_cost )
+ if( !analysis.b_early_terminate || i_cost_est8x16bi_total < i_cost )
{
x264_mb_analyse_inter_b8x16( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.i_cost8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 );
}
- if( !try_16x8_first && i_cost_est16x8bi_total < i_cost )
+ if( !try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
{
x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
View
4 encoder/encoder.c
@@ -503,7 +503,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
- h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 10 );
+ h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 11 );
h->param.rc.f_ip_factor = X264_MAX( h->param.rc.f_ip_factor, 0.01f );
h->param.rc.f_pb_factor = X264_MAX( h->param.rc.f_pb_factor, 0.01f );
if( h->param.rc.i_rc_method == X264_RC_CRF )
@@ -784,7 +784,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
h->param.rc.f_aq_strength = 0;
}
h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
- if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
+ if( h->param.analyse.i_subpel_refine >= 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
h->param.analyse.i_subpel_refine = 9;
{
View
1 encoder/me.c
@@ -46,6 +46,7 @@ static const uint8_t subpel_iterations[][4] =
{0,0,2,2},
{0,0,4,10},
{0,0,4,10},
+ {0,0,4,10},
{0,0,4,10}};
/* (x-1)%6 */
View
4 tools/test_x264.py
@@ -44,7 +44,9 @@
"fast",
"medium",
"slow",
- "slower") ]
+ "slower",
+ "veryslow",
+ "placebo") ]
]
# end options
View
7 x264.c
@@ -479,7 +479,7 @@ static void help( x264_param_t *defaults, int longhelp )
" --bframes 16 --b-adapt 2 --direct auto\n"
" --slow-firstpass --no-fast-pskip\n"
" --me tesa --merange 24 --partitions all\n"
- " --rc-lookahead 60 --ref 16 --subme 10\n"
+ " --rc-lookahead 60 --ref 16 --subme 11\n"
" --trellis 2\n" );
else H0( " - ultrafast,superfast,veryfast,faster,fast\n"
" - medium,slow,slower,veryslow,placebo\n" );
@@ -650,8 +650,9 @@ static void help( x264_param_t *defaults, int longhelp )
" - 7: RD mode decision for all frames\n"
" - 8: RD refinement for I/P-frames\n"
" - 9: RD refinement for all frames\n"
- " - 10: QP-RD - requires trellis=2, aq-mode>0\n" );
- else H1( " decision quality: 1=fast, 10=best.\n" );
+ " - 10: QP-RD - requires trellis=2, aq-mode>0\n"
+ " - 11: Full RD: disable all early terminations\n" );
+ else H1( " decision quality: 1=fast, 11=best\n" );
H1( " --psy-rd <float:float> Strength of psychovisual optimization [\"%.1f:%.1f\"]\n"
" #1: RD (requires subme>=6)\n"
" #2: Trellis (requires trellis, experimental)\n",

0 comments on commit 9977b59

Please sign in to comment.
Something went wrong with that request. Please try again.