Skip to content

Commit

Permalink
4:0:0 (monochrome) encoding support
Browse files Browse the repository at this point in the history
Virtually zero increase in compression efficiency compared to 4:2:0 with empty
chroma planes. Performance is better though, especially with fast settings.
  • Loading branch information
eruffaldi authored and Gramner committed Aug 6, 2018
1 parent 814e61e commit 698c5a3
Show file tree
Hide file tree
Showing 27 changed files with 325 additions and 194 deletions.
5 changes: 5 additions & 0 deletions common/base.c
Expand Up @@ -749,6 +749,11 @@ static int param_apply_profile( x264_param_t *param, const char *profile )
x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d\n", profile, param->i_bitdepth );
return -1;
}
if( p < PROFILE_HIGH && (param->i_csp & X264_CSP_MASK) == X264_CSP_I400 )
{
x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support 4:0:0\n", profile );
return -1;
}

if( p == PROFILE_BASELINE )
{
Expand Down
2 changes: 1 addition & 1 deletion common/common.h
Expand Up @@ -86,7 +86,7 @@
# define CHROMA_V_SHIFT h->mb.chroma_v_shift
#endif

#define CHROMA_SIZE(s) ((s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT))
#define CHROMA_SIZE(s) (CHROMA_FORMAT ? (s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT) : 0)
#define FRAME_SIZE(s) ((s)+2*CHROMA_SIZE(s))
#define CHROMA444 (CHROMA_FORMAT == CHROMA_444)

Expand Down
45 changes: 29 additions & 16 deletions common/deblock.c
Expand Up @@ -383,6 +383,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
int qp_thresh = 15 - X264_MIN( a, b ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset );
int stridey = h->fdec->i_stride[0];
int strideuv = h->fdec->i_stride[1];
int chroma_format = CHROMA_FORMAT;
int chroma444 = CHROMA444;
int chroma_height = 16 >> CHROMA_V_SHIFT;
intptr_t uvdiff = chroma444 ? h->fdec->plane[2] - h->fdec->plane[1] : 1;
Expand Down Expand Up @@ -420,7 +421,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
deblock_edge##intra( h, pixy + 4*edge*(dir?stride2y:1),\
stride2y, bs[dir][edge], qp, a, b, 0,\
h->loopf.deblock_luma##intra[dir] );\
if( CHROMA_FORMAT == CHROMA_444 )\
if( chroma_format == CHROMA_444 )\
{\
deblock_edge##intra( h, pixuv + 4*edge*(dir?stride2uv:1),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
Expand All @@ -429,14 +430,14 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
h->loopf.deblock_luma##intra[dir] );\
}\
else if( CHROMA_FORMAT == CHROMA_420 && !(edge & 1) )\
else if( chroma_format == CHROMA_420 && !(edge & 1) )\
{\
deblock_edge##intra( h, pixuv + edge*(dir?2*stride2uv:4),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
h->loopf.deblock_chroma##intra[dir] );\
}\
}\
if( CHROMA_FORMAT == CHROMA_422 && (dir || !(edge & 1)) )\
if( chroma_format == CHROMA_422 && (dir || !(edge & 1)) )\
{\
deblock_edge##intra( h, pixuv + edge*(dir?4*stride2uv:4),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
Expand All @@ -463,16 +464,22 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] ) )
{
deblock_edge_intra( h, pixy, 2*stridey, bs[0][0], luma_qp[0], a, b, 0, luma_intra_deblock );
deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
if( chroma444 )
deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
if( chroma_format )
{
deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
if( chroma444 )
deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
}
}
else
{
deblock_edge( h, pixy, 2*stridey, bs[0][0], luma_qp[0], a, b, 0, luma_deblock );
deblock_edge( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
if( chroma444 )
deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
if( chroma_format )
{
deblock_edge( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
if( chroma444 )
deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
}
}

int offy = MB_INTERLACED ? 4 : 0;
Expand All @@ -483,16 +490,22 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[1]] ) )
{
deblock_edge_intra( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], a, b, 0, luma_intra_deblock );
deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
if( chroma444 )
deblock_edge_intra( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
if( chroma_format )
{
deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
if( chroma444 )
deblock_edge_intra( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
}
}
else
{
deblock_edge( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], a, b, 0, luma_deblock );
deblock_edge( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
if( chroma444 )
deblock_edge( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
if( chroma_format )
{
deblock_edge( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
if( chroma444 )
deblock_edge( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
}
}
}
else
Expand Down Expand Up @@ -548,7 +561,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 0, h->loopf.deblock_luma[1] );
deblock_edge( h, pixuv + uvdiff + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 0, h->loopf.deblock_luma[1] );
}
else
else if( chroma_format )
deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 1, h->loopf.deblock_chroma[1] );
}
}
Expand Down
43 changes: 19 additions & 24 deletions common/frame.c
Expand Up @@ -44,29 +44,16 @@ static int align_plane_size( int x, int disalign )

static int frame_internal_csp( int external_csp )
{
switch( external_csp & X264_CSP_MASK )
{
case X264_CSP_NV12:
case X264_CSP_NV21:
case X264_CSP_I420:
case X264_CSP_YV12:
return X264_CSP_NV12;
case X264_CSP_NV16:
case X264_CSP_I422:
case X264_CSP_YV16:
case X264_CSP_YUYV:
case X264_CSP_UYVY:
case X264_CSP_V210:
return X264_CSP_NV16;
case X264_CSP_I444:
case X264_CSP_YV24:
case X264_CSP_BGR:
case X264_CSP_BGRA:
case X264_CSP_RGB:
return X264_CSP_I444;
default:
return X264_CSP_NONE;
}
int csp = external_csp & X264_CSP_MASK;
if( csp == X264_CSP_I400 )
return X264_CSP_I400;
if( csp >= X264_CSP_I420 && csp < X264_CSP_I422 )
return X264_CSP_NV12;
if( csp >= X264_CSP_I422 && csp < X264_CSP_I444 )
return X264_CSP_NV16;
if( csp >= X264_CSP_I444 && csp <= X264_CSP_RGB )
return X264_CSP_I444;
return X264_CSP_NONE;
}

static x264_frame_t *frame_new( x264_t *h, int b_fdec )
Expand Down Expand Up @@ -119,6 +106,14 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
frame->i_stride[i] = i_stride;
}
}
else if( i_csp == X264_CSP_I400 )
{
luma_plane_count = 1;
frame->i_plane = 1;
frame->i_width[0] = i_width;
frame->i_lines[0] = i_lines;
frame->i_stride[0] = i_stride;
}
else
goto fail;

Expand Down Expand Up @@ -470,7 +465,7 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
(pixel*)pix[2], stride[2]/sizeof(pixel),
h->param.i_width>>1, h->param.i_height>>v_shift );
}
else //if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 )
else if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 )
{
get_plane_ptr( h, src, &pix[1], &stride[1], i_csp==X264_CSP_I444 ? 1 : 2, 0, 0 );
get_plane_ptr( h, src, &pix[2], &stride[2], i_csp==X264_CSP_I444 ? 2 : 1, 0, 0 );
Expand Down
41 changes: 22 additions & 19 deletions common/macroblock.c
Expand Up @@ -48,7 +48,7 @@ static NOINLINE void mb_mc_0xywh( x264_t *h, int x, int y, int width, int height
MC_LUMA( 0, 1 );
MC_LUMA( 0, 2 );
}
else
else if( CHROMA_FORMAT )
{
int v_shift = CHROMA_V_SHIFT;
// Chroma in 4:2:0 is offset if MCing from a field of opposite parity
Expand Down Expand Up @@ -87,7 +87,7 @@ static NOINLINE void mb_mc_1xywh( x264_t *h, int x, int y, int width, int height
MC_LUMA( 1, 1 );
MC_LUMA( 1, 2 );
}
else
else if( CHROMA_FORMAT )
{
int v_shift = CHROMA_V_SHIFT;
if( v_shift & MB_INTERLACED & i_ref )
Expand Down Expand Up @@ -132,7 +132,7 @@ static NOINLINE void mb_mc_01xywh( x264_t *h, int x, int y, int width, int heigh
MC_LUMA_BI( 1 );
MC_LUMA_BI( 2 );
}
else
else if( CHROMA_FORMAT )
{
int v_shift = CHROMA_V_SHIFT;
if( v_shift & MB_INTERLACED & i_ref0 )
Expand Down Expand Up @@ -531,17 +531,20 @@ void x264_macroblock_thread_init( x264_t *h )
*/
h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE;
if( CHROMA444 )
{
h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE;
h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 38*FDEC_STRIDE;
}
else
if( CHROMA_FORMAT )
{
h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE + 16;
h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE;
if( CHROMA444 )
{
h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE;
h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 38*FDEC_STRIDE;
}
else
{
h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE + 16;
}
}
}

Expand Down Expand Up @@ -1006,7 +1009,7 @@ static void ALWAYS_INLINE macroblock_cache_load( x264_t *h, int mb_x, int mb_y,
macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 0 );
macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 0 );
}
else
else if( CHROMA_FORMAT )
{
x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+ 4*FDEC_STRIDE );
x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+ 4*FDEC_STRIDE );
Expand All @@ -1026,7 +1029,7 @@ static void ALWAYS_INLINE macroblock_cache_load( x264_t *h, int mb_x, int mb_y,
macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 1 );
macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 1 );
}
else
else if( CHROMA_FORMAT )
macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 1 );
}

Expand Down Expand Up @@ -1643,7 +1646,7 @@ static void ALWAYS_INLINE macroblock_backup_intra( x264_t *h, int mb_x, int mb_y
memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*15, 16*sizeof(pixel) );
memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+FDEC_STRIDE*15, 16*sizeof(pixel) );
}
else
else if( CHROMA_FORMAT )
{
int backup_src = (15>>CHROMA_V_SHIFT) * FDEC_STRIDE;
memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) );
Expand All @@ -1661,7 +1664,7 @@ static void ALWAYS_INLINE macroblock_backup_intra( x264_t *h, int mb_x, int mb_y
memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 16*sizeof(pixel) );
memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+backup_src, 16*sizeof(pixel) );
}
else
else if( CHROMA_FORMAT )
{
if( CHROMA_FORMAT == CHROMA_420 )
backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE;
Expand Down Expand Up @@ -1695,7 +1698,7 @@ void x264_macroblock_cache_save( x264_t *h )
macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 1 );
macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 1 );
}
else
else if( CHROMA_FORMAT )
macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 1 );
}
else
Expand All @@ -1707,7 +1710,7 @@ void x264_macroblock_cache_save( x264_t *h )
macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 0 );
macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 0 );
}
else
else if( CHROMA_FORMAT )
macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 0 );
}

Expand Down
1 change: 1 addition & 0 deletions common/mc.c
Expand Up @@ -652,6 +652,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )

pf->hpel_filter = hpel_filter;

pf->prefetch_fenc_400 = prefetch_fenc_null;
pf->prefetch_fenc_420 = prefetch_fenc_null;
pf->prefetch_fenc_422 = prefetch_fenc_null;
pf->prefetch_ref = prefetch_ref_null;
Expand Down
1 change: 1 addition & 0 deletions common/mc.h
Expand Up @@ -308,6 +308,7 @@ typedef struct

/* prefetch the next few macroblocks of fenc or fdec */
void (*prefetch_fenc) ( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
void (*prefetch_fenc_400)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
void (*prefetch_fenc_420)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
void (*prefetch_fenc_422)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
/* prefetch the next few macroblocks of a hpel reference frame */
Expand Down
19 changes: 19 additions & 0 deletions common/x86/mc-a.asm
Expand Up @@ -1515,6 +1515,25 @@ INIT_MMX mmx2
PREFETCH_FENC 420
PREFETCH_FENC 422

%if ARCH_X86_64
DECLARE_REG_TMP 4
%else
DECLARE_REG_TMP 2
%endif

cglobal prefetch_fenc_400, 2,3
movifnidn t0d, r4m
FIX_STRIDES r1
and t0d, 3
imul t0d, r1d
lea r0, [r0+t0*4+64*SIZEOF_PIXEL]
prefetcht0 [r0]
prefetcht0 [r0+r1]
lea r0, [r0+r1*2]
prefetcht0 [r0]
prefetcht0 [r0+r1]
RET

;-----------------------------------------------------------------------------
; void prefetch_ref( pixel *pix, intptr_t stride, int parity )
;-----------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions common/x86/mc-c.c
Expand Up @@ -159,6 +159,8 @@ void x264_mc_copy_w16_aligned_sse( pixel *, intptr_t, pixel *, intptr_t, int );
void x264_mc_copy_w16_avx( uint16_t *, intptr_t, uint16_t *, intptr_t, int );
#define x264_mc_copy_w16_aligned_avx x264_template(mc_copy_w16_aligned_avx)
void x264_mc_copy_w16_aligned_avx( uint16_t *, intptr_t, uint16_t *, intptr_t, int );
#define x264_prefetch_fenc_400_mmx2 x264_template(prefetch_fenc_400_mmx2)
void x264_prefetch_fenc_400_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
#define x264_prefetch_fenc_420_mmx2 x264_template(prefetch_fenc_420_mmx2)
void x264_prefetch_fenc_420_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
#define x264_prefetch_fenc_422_mmx2 x264_template(prefetch_fenc_422_mmx2)
Expand Down Expand Up @@ -796,6 +798,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
if( !(cpu&X264_CPU_MMX2) )
return;

pf->prefetch_fenc_400 = x264_prefetch_fenc_400_mmx2;
pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2;
pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2;
pf->prefetch_ref = x264_prefetch_ref_mmx2;
Expand Down
6 changes: 3 additions & 3 deletions configure
Expand Up @@ -31,7 +31,7 @@ Configuration options:
--disable-win32thread disable win32threads (windows only)
--disable-interlaced disable interlaced encoding support
--bit-depth=BIT_DEPTH set output bit depth (8, 10, all) [all]
--chroma-format=FORMAT output chroma format (420, 422, 444, all) [all]
--chroma-format=FORMAT output chroma format (400, 420, 422, 444, all) [all]
Advanced options:
--disable-asm disable platform-specific assembly optimizations
Expand Down Expand Up @@ -517,8 +517,8 @@ for opt do
;;
--chroma-format=*)
chroma_format="$optarg"
if [ $chroma_format != "420" -a $chroma_format != "422" -a $chroma_format != "444" -a $chroma_format != "all" ]; then
echo "Supplied chroma format must be 420, 422, 444 or all."
if [ $chroma_format != "400" -a $chroma_format != "420" -a $chroma_format != "422" -a $chroma_format != "444" -a $chroma_format != "all" ]; then
echo "Supplied chroma format must be 400, 420, 422, 444 or all."
exit 1
fi
;;
Expand Down
6 changes: 3 additions & 3 deletions encoder/analyse.c
Expand Up @@ -309,8 +309,8 @@ static void mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
/* I: Intra part */
a->i_satd_i16x16 =
a->i_satd_i8x8 =
a->i_satd_i4x4 =
a->i_satd_chroma = COST_MAX;
a->i_satd_i4x4 = COST_MAX;
a->i_satd_chroma = CHROMA_FORMAT ? COST_MAX : 0;

/* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
* PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
Expand Down Expand Up @@ -1035,7 +1035,7 @@ static void intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
}

/* RD selection for chroma prediction */
if( !CHROMA444 )
if( CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422 )
{
const int8_t *predict_mode = predict_chroma_mode_available( h->mb.i_neighbour_intra );
if( predict_mode[1] >= 0 )
Expand Down

0 comments on commit 698c5a3

Please sign in to comment.