Add --subme 11, which disables all early terminations in analysis

Necessary for a future trellis mode decision/motion estimation patch. Also add the slowest presets to the regression test.
simonhorlick · Jul 22, 2011 · 9977b59 · 9977b59
1 parent 207ca3e
commit 9977b59
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 38 deletions.
diff --git a/common/common.c b/common/common.c
@@ -275,7 +275,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
     else if( !strcasecmp( preset, "placebo" ) )
     {
         param->analyse.i_me_method = X264_ME_TESA;
-        param->analyse.i_subpel_refine = 10;
+        param->analyse.i_subpel_refine = 11;
         param->analyse.i_me_range = 24;
         param->i_frame_reference = 16;
         param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;

diff --git a/encoder/analyse.c b/encoder/analyse.c
@@ -132,6 +132,7 @@ typedef struct
     int i_mb_type8x16;
 
     int b_direct_available;
+    int b_early_terminate;
 
 } x264_mb_analysis_t;
 
@@ -416,6 +417,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
     /* mbrd == 3 -> QPRD */
     a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
     h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
+    a->b_early_terminate = h->param.analyse.i_subpel_refine < 11;
 
     x264_mb_analyse_init_qp( h, a, qp );
 
@@ -560,7 +562,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
             }
 
         /* Fast intra decision */
-        if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
+        if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
         {
             /* Always run in fast-intra mode for subme < 3 */
             if( h->mb.i_subpel_refine > 2 &&
@@ -927,18 +929,18 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
         }
         /* Not heavily tuned */
         static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
-        if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
+        if( a->b_early_terminate && X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
             return;
     }
 
     /* 4x4 prediction selection */
     if( flags & X264_ANALYSE_I4x4 )
     {
         int i_cost = lambda * (24+16); /* 24from JVT (SATD0), 16 from base predmode costs */
-        int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 );
+        int i_satd_thresh = a->b_early_terminate ? X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 ) : COST_MAX;
         h->mb.i_cbp_luma = 0;
 
-        if( a->i_mbrd )
+        if( a->b_early_terminate && a->i_mbrd )
             i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8;
 
         if( h->sh.i_type == SLICE_TYPE_B )
@@ -1033,6 +1035,9 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
 
 static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
 {
+    if( !a->b_early_terminate )
+        i_satd_thresh = COST_MAX;
+
     if( a->i_satd_i16x16 < i_satd_thresh )
     {
         h->mb.i_type = I_16x16;
@@ -1072,7 +1077,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
     {
         int old_pred_mode = a->i_predict16x16;
         const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
-        int i_thresh = a->i_satd_i16x16_dir[old_pred_mode] * 9/8;
+        int i_thresh = a->b_early_terminate ? a->i_satd_i16x16_dir[old_pred_mode] * 9/8 : COST_MAX;
         i_best = a->i_satd_i16x16;
         for( ; *predict_mode >= 0; predict_mode++ )
         {
@@ -1093,7 +1098,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
         {
             int8_t predict_mode_sorted[4];
             int i_max;
-            int i_thresh = a->i_satd_i8x8chroma * 5/4;
+            int i_thresh = a->b_early_terminate ? a->i_satd_i8x8chroma * 5/4 : COST_MAX;
 
             for( i_max = 0; *predict_mode >= 0; predict_mode++ )
             {
@@ -1197,7 +1202,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
                              h->mb.pic.p_fdec[1] + 8*x + 8*y*FDEC_STRIDE,
                              h->mb.pic.p_fdec[2] + 8*x + 8*y*FDEC_STRIDE};
             int cbp_luma_new = 0;
-            int i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8;
+            int i_thresh = a->b_early_terminate ? a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8 : COST_MAX;
 
             i_best = COST_MAX64;
 
@@ -1298,7 +1303,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
     int i_mvc;
     ALIGNED_4( int16_t mvc[8][2] );
     int i_halfpel_thresh = INT_MAX;
-    int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
+    int *p_halfpel_thresh = (a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh : NULL;
 
     /* 16x16 Search on all ref frame */
     m.i_pixel = PIXEL_16x16;
@@ -1388,8 +1393,8 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
 
     /* early termination: if 16x16 chose ref 0, then evalute no refs older
      * than those used by the neighbors */
-    if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
-        h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
+    if( a->b_early_terminate && (i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
+        h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0) )
     {
         i_maxref = 0;
         CHECK_NEIGHBOUR(  -8 - 1 );
@@ -1572,7 +1577,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i
 
         /* Early termination based on the current SATD score of partition[0]
            plus the estimated SATD score of partition[1] */
-        if( !i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
+        if( a->b_early_terminate && (!i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4) )
         {
             a->l0.i_cost16x8 = COST_MAX;
             return;
@@ -1637,7 +1642,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
 
         /* Early termination based on the current SATD score of partition[0]
            plus the estimated SATD score of partition[1] */
-        if( !i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
+        if( a->b_early_terminate && (!i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4) )
         {
             a->l0.i_cost8x16 = COST_MAX;
             return;
@@ -1923,8 +1928,8 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
     int try_skip = a->b_try_skip;
     int list1_skipped = 0;
     int i_halfpel_thresh[2] = {INT_MAX, INT_MAX};
-    int *p_halfpel_thresh[2] = {h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh[0] : NULL,
-                                h->mb.pic.i_fref[1]>1 ? &i_halfpel_thresh[1] : NULL};
+    int *p_halfpel_thresh[2] = {(a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh[0] : NULL,
+                                (a->b_early_terminate && h->mb.pic.i_fref[1]>1) ? &i_halfpel_thresh[1] : NULL};
 
     x264_me_t m;
     m.i_pixel = PIXEL_16x16;
@@ -2454,8 +2459,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
 
         /* Early termination based on the current SATD score of partition[0]
            plus the estimated SATD score of partition[1] */
-        if( !i && i_part_cost + a->i_cost_est16x8[1] > i_best_satd
-            * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16 )
+        if( a->b_early_terminate && (!i && i_part_cost + a->i_cost_est16x8[1] > i_best_satd
+            * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16) )
         {
             a->i_cost16x8bi = COST_MAX;
             return;
@@ -2547,8 +2552,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
 
         /* Early termination based on the current SATD score of partition[0]
            plus the estimated SATD score of partition[1] */
-        if( !i && i_part_cost + a->i_cost_est8x16[1] > i_best_satd
-            * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16 )
+        if( a->b_early_terminate && (!i && i_part_cost + a->i_cost_est8x16[1] > i_best_satd
+            * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16) )
         {
             a->i_cost8x16bi = COST_MAX;
             return;
@@ -2566,10 +2571,10 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
 
 static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
 {
-    int thresh = i_satd * 5/4 + 1;
+    int thresh = a->b_early_terminate ? i_satd * 5/4 + 1 : COST_MAX;
 
     h->mb.i_type = P_L0;
-    if( a->l0.i_rd16x16 == COST_MAX && a->l0.me16x16.cost <= i_satd * 3/2 )
+    if( a->l0.i_rd16x16 == COST_MAX && (!a->b_early_terminate || a->l0.me16x16.cost <= i_satd * 3/2) )
     {
         h->mb.i_partition = D_16x16;
         x264_analyse_update_cache( h, a );
@@ -2609,7 +2614,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
             for( int i = 0; i < 4; i++ )
             {
                 int costs[4] = {a->l0.i_cost4x4[i], a->l0.i_cost8x4[i], a->l0.i_cost4x8[i], a->l0.me8x8[i].cost};
-                int sub8x8_thresh = X264_MIN4( costs[0], costs[1], costs[2], costs[3] ) * 5 / 4;
+                int sub8x8_thresh = a->b_early_terminate ? X264_MIN4( costs[0], costs[1], costs[2], costs[3] ) * 5 / 4 : COST_MAX;
                 int subtype, btype = D_L0_8x8;
                 uint64_t bcost = COST_MAX64;
                 for( subtype = D_L0_4x4; subtype <= D_L0_8x8; subtype++ )
@@ -2639,7 +2644,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
 
 static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
 {
-    int thresh = i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1;
+    int thresh = a->b_early_terminate ? i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1 : COST_MAX;
 
     if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )
     {
@@ -3020,8 +3025,8 @@ void x264_macroblock_analyse( x264_t *h )
             i_partition = D_16x16;
             i_cost = analysis.l0.me16x16.cost;
 
-            if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
-                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
+            if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate ||
+                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost) )
             {
                 i_type = P_8x8;
                 i_partition = D_8x8;
@@ -3033,7 +3038,7 @@ void x264_macroblock_analyse( x264_t *h )
                     for( int i = 0; i < 4; i++ )
                     {
                         x264_mb_analyse_inter_p4x4( h, &analysis, i );
-                        if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
+                        if( !analysis.b_early_terminate || analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
                         {
                             int i_cost8x8 = analysis.l0.i_cost4x4[i];
                             h->mb.i_sub_partition[i] = D_L0_4x4;
@@ -3056,8 +3061,8 @@ void x264_macroblock_analyse( x264_t *h )
 
             /* Now do 16x8/8x16 */
             i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv;
-            if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
-                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 )
+            if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate ||
+                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8) )
             {
                 int i_avg_mv_ref_cost = (analysis.l0.me8x8[2].cost_mv + analysis.l0.me8x8[2].i_ref_cost
                                       + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
@@ -3364,7 +3369,7 @@ void x264_macroblock_analyse( x264_t *h )
             COPY2_IF_LT( i_cost, analysis.i_cost16x16bi, i_type, B_BI_BI );
             COPY2_IF_LT( i_cost, analysis.i_cost16x16direct, i_type, B_DIRECT );
 
-            if( analysis.i_mbrd && analysis.i_cost16x16direct <= i_cost * 33/32 )
+            if( analysis.i_mbrd && analysis.b_early_terminate && analysis.i_cost16x16direct <= i_cost * 33/32 )
             {
                 x264_mb_analyse_b_rd( h, &analysis, i_cost );
                 if( i_bskip_cost < analysis.i_rd16x16direct &&
@@ -3431,17 +3436,17 @@ void x264_macroblock_analyse( x264_t *h )
 
                 /* We can gain a little speed by checking the mode with the lowest estimated cost first */
                 int try_16x8_first = i_cost_est16x8bi_total < i_cost_est8x16bi_total;
-                if( try_16x8_first && i_cost_est16x8bi_total < i_cost )
+                if( try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
                 {
                     x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
                     COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
                 }
-                if( i_cost_est8x16bi_total < i_cost )
+                if( !analysis.b_early_terminate || i_cost_est8x16bi_total < i_cost )
                 {
                     x264_mb_analyse_inter_b8x16( h, &analysis, i_cost );
                     COPY3_IF_LT( i_cost, analysis.i_cost8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 );
                 }
-                if( !try_16x8_first && i_cost_est16x8bi_total < i_cost )
+                if( !try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
                 {
                     x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
                     COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );

diff --git a/encoder/encoder.c b/encoder/encoder.c
@@ -503,7 +503,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
     h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
     h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
     h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
-    h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 10 );
+    h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 11 );
     h->param.rc.f_ip_factor = X264_MAX( h->param.rc.f_ip_factor, 0.01f );
     h->param.rc.f_pb_factor = X264_MAX( h->param.rc.f_pb_factor, 0.01f );
     if( h->param.rc.i_rc_method == X264_RC_CRF )
@@ -784,7 +784,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
         h->param.rc.f_aq_strength = 0;
     }
     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
-    if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
+    if( h->param.analyse.i_subpel_refine >= 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
         h->param.analyse.i_subpel_refine = 9;
 
     {

diff --git a/encoder/me.c b/encoder/me.c
@@ -46,6 +46,7 @@ static const uint8_t subpel_iterations[][4] =
     {0,0,2,2},
     {0,0,4,10},
     {0,0,4,10},
+    {0,0,4,10},
     {0,0,4,10}};
 
 /* (x-1)%6 */

diff --git a/tools/test_x264.py b/tools/test_x264.py
@@ -44,7 +44,9 @@
                                   "fast",
                                   "medium",
                                   "slow",
-                                  "slower") ]
+                                  "slower",
+                                  "veryslow",
+                                  "placebo") ]
 ]
 
 # end options

diff --git a/x264.c b/x264.c
@@ -479,7 +479,7 @@ static void help( x264_param_t *defaults, int longhelp )
         "                                    --bframes 16 --b-adapt 2 --direct auto\n"
         "                                    --slow-firstpass --no-fast-pskip\n"
         "                                    --me tesa --merange 24 --partitions all\n"
-        "                                    --rc-lookahead 60 --ref 16 --subme 10\n"
+        "                                    --rc-lookahead 60 --ref 16 --subme 11\n"
         "                                    --trellis 2\n" );
     else H0( "                                  - ultrafast,superfast,veryfast,faster,fast\n"
              "                                  - medium,slow,slower,veryslow,placebo\n" );
@@ -650,8 +650,9 @@ static void help( x264_param_t *defaults, int longhelp )
         "                                  - 7: RD mode decision for all frames\n"
         "                                  - 8: RD refinement for I/P-frames\n"
         "                                  - 9: RD refinement for all frames\n"
-        "                                  - 10: QP-RD - requires trellis=2, aq-mode>0\n" );
-    else H1( "                                  decision quality: 1=fast, 10=best.\n"  );
+        "                                  - 10: QP-RD - requires trellis=2, aq-mode>0\n"
+        "                                  - 11: Full RD: disable all early terminations\n" );
+    else H1( "                                  decision quality: 1=fast, 11=best\n" );
     H1( "      --psy-rd <float:float>  Strength of psychovisual optimization [\"%.1f:%.1f\"]\n"
         "                                  #1: RD (requires subme>=6)\n"
         "                                  #2: Trellis (requires trellis, experimental)\n",