Permalink
Browse files

Merge branch 'kaudio' into lsmash

Conflicts:
	configure
  • Loading branch information...
golgol7777 committed Oct 24, 2011
2 parents 9c04bd5 + c96ef7a commit 8ba1e9f990a688e9e5ef0ba9a9bcf6095e9ab7d0
View
@@ -30,7 +30,7 @@
.macro h264_loop_filter_start
ldr ip, [sp]
ldr ip, [ip]
vmov.32 d24[0], ip
vdup.32 d24, ip
and ip, ip, ip, lsl #16
ands ip, ip, ip, lsl #8
bxlt lr
@@ -197,90 +197,110 @@ function x264_deblock_h_luma_neon
.endfunc
.macro h264_loop_filter_chroma
vdup.8 d22, r2 // alpha
vdup.8 q11, r2 // alpha
vmovl.u8 q12, d24
vabd.u8 d26, d16, d0 // abs(p0 - q0)
vmovl.u8 q2, d0
vabd.u8 d28, d18, d16 // abs(p1 - p0)
vsubw.u8 q2, q2, d16
vsli.16 d24, d24, #8
vabd.u8 q13, q8, q0 // abs(p0 - q0)
vabd.u8 q14, q9, q8 // abs(p1 - p0)
vsubl.u8 q2, d0, d16
vsubl.u8 q3, d1, d17
vsli.16 q12, q12, #8
vshl.i16 q2, q2, #2
vabd.u8 d30, d2, d0 // abs(q1 - q0)
vshl.i16 q3, q3, #2
vabd.u8 q15, q1, q0 // abs(q1 - q0)
vaddw.u8 q2, q2, d18
vclt.u8 d26, d26, d22 // < alpha
vaddw.u8 q3, q3, d19
vclt.u8 q13, q13, q11 // < alpha
vsubw.u8 q2, q2, d2
vdup.8 d22, r3 // beta
vclt.s8 d25, d24, #0
vsubw.u8 q3, q3, d3
vdup.8 q11, r3 // beta
vclt.s8 q10, q12, #0
vrshrn.i16 d4, q2, #3
vclt.u8 d28, d28, d22 // < beta
vbic d26, d26, d25
vclt.u8 d30, d30, d22 // < beta
vand d26, d26, d28
vneg.s8 d25, d24
vand d26, d26, d30
vmin.s8 d4, d4, d24
vrshrn.i16 d5, q3, #3
vclt.u8 q14, q14, q11 // < beta
vbic q13, q13, q10
vclt.u8 q15, q15, q11 // < beta
vand q13, q13, q14
vneg.s8 q10, q12
vand q13, q13, q15
vmin.s8 q2, q2, q12
vmovl.u8 q14, d16
vand d4, d4, d26
vmax.s8 d4, d4, d25
vand q2, q2, q13
vmovl.u8 q15, d17
vmax.s8 q2, q2, q10
vmovl.u8 q11, d0
vmovl.u8 q12, d1
vaddw.s8 q14, q14, d4
vaddw.s8 q15, q15, d5
vsubw.s8 q11, q11, d4
vsubw.s8 q12, q12, d5
vqmovun.s16 d16, q14
vqmovun.s16 d17, q15
vqmovun.s16 d0, q11
vqmovun.s16 d1, q12
.endm
function x264_deblock_v_chroma_neon
h264_loop_filter_start
sub r0, r0, r1, lsl #1
vld1.64 {d18}, [r0,:64], r1
vld1.64 {d16}, [r0,:64], r1
vld1.64 {d0}, [r0,:64], r1
vld1.64 {d2}, [r0,:64]
vld2.8 {d18,d19}, [r0,:128], r1
vld2.8 {d16,d17}, [r0,:128], r1
vld2.8 {d0, d1}, [r0,:128], r1
vld2.8 {d2, d3}, [r0,:128]
h264_loop_filter_chroma
sub r0, r0, r1, lsl #1
vst1.64 {d16}, [r0,:64], r1
vst1.64 {d0}, [r0,:64], r1
vst2.8 {d16,d17}, [r0,:128], r1
vst2.8 {d0, d1}, [r0,:128], r1
bx lr
.endfunc
function x264_deblock_h_chroma_neon
h264_loop_filter_start
sub r0, r0, #2
vld1.32 {d18[]}, [r0], r1
vld1.32 {d16[]}, [r0], r1
vld1.32 {d0[]}, [r0], r1
vld1.32 {d2[]}, [r0], r1
vld1.32 {d18[1]}, [r0], r1
vld1.32 {d16[1]}, [r0], r1
vld1.32 {d0[1]}, [r0], r1
vld1.32 {d2[1]}, [r0], r1
vtrn.16 d18, d0
vtrn.16 d16, d2
vtrn.8 d18, d16
vtrn.8 d0, d2
sub r0, r0, #4
vld1.8 {d18}, [r0], r1
vld1.8 {d16}, [r0], r1
vld1.8 {d0}, [r0], r1
vld1.8 {d2}, [r0], r1
vld1.8 {d19}, [r0], r1
vld1.8 {d17}, [r0], r1
vld1.8 {d1}, [r0], r1
vld1.8 {d3}, [r0], r1
vuzp.8 d18, d19
vuzp.8 d16, d17
vuzp.8 d0, d1
vuzp.8 d2, d3
vtrn.16 q9, q0
vtrn.16 q8, q1
vtrn.8 q9, q8
vtrn.8 q0, q1
h264_loop_filter_chroma
vtrn.16 d18, d0
vtrn.16 d16, d2
vtrn.8 d18, d16
vtrn.8 d0, d2
vtrn.16 q9, q0
vtrn.16 q8, q1
vtrn.8 q9, q8
vtrn.8 q0, q1
vzip.8 d18, d19
vzip.8 d16, d17
vzip.8 d0, d1
vzip.8 d2, d3
sub r0, r0, r1, lsl #3
vst1.32 {d18[0]}, [r0], r1
vst1.32 {d16[0]}, [r0], r1
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d2[0]}, [r0], r1
vst1.32 {d18[1]}, [r0], r1
vst1.32 {d16[1]}, [r0], r1
vst1.32 {d0[1]}, [r0], r1
vst1.32 {d2[1]}, [r0], r1
vst1.8 {d18}, [r0], r1
vst1.8 {d16}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d2}, [r0], r1
vst1.8 {d19}, [r0], r1
vst1.8 {d17}, [r0], r1
vst1.8 {d1}, [r0], r1
vst1.8 {d3}, [r0], r1
bx lr
.endfunc
View
@@ -210,7 +210,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
return;
#if !HIGH_BIT_DEPTH
pf->prefetch_fenc = x264_prefetch_fenc_arm;
pf->prefetch_fenc_420 = x264_prefetch_fenc_arm;
pf->prefetch_fenc_422 = x264_prefetch_fenc_arm; /* FIXME */
pf->prefetch_ref = x264_prefetch_ref_arm;
#endif // !HIGH_BIT_DEPTH
View
@@ -48,7 +48,7 @@ typedef struct bs_s
uint8_t *p;
uint8_t *p_end;
intptr_t cur_bits;
uintptr_t cur_bits;
int i_left; /* i_count number of available bits */
int i_bits_encoded; /* RD only */
} bs_t;
View
@@ -35,6 +35,8 @@
const int x264_bit_depth = BIT_DEPTH;
const int x264_chroma_format = X264_CHROMA_FORMAT;
static void x264_log_default( void *, int, const char *, va_list );
/****************************************************************************
@@ -52,7 +54,7 @@ void x264_param_default( x264_param_t *param )
param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
/* Video properties */
param->i_csp = X264_CSP_I420;
param->i_csp = X264_CHROMA_FORMAT ? X264_CHROMA_FORMAT : X264_CSP_I420;
param->i_width = 0;
param->i_height = 0;
param->vui.i_sar_width = 0;
View
@@ -40,9 +40,6 @@
#define IS_DISPOSABLE(type) ( type == X264_TYPE_B )
#define FIX8(f) ((int)(f*(1<<8)+.5))
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
#define CHROMA_FORMAT h->sps->i_chroma_format_idc
#define CHROMA_SIZE(s) ((s)>>(h->mb.chroma_h_shift+h->mb.chroma_v_shift))
#define FRAME_SIZE(s) ((s)+2*CHROMA_SIZE(s))
#define CHECKED_MALLOC( var, size )\
do {\
@@ -105,6 +102,17 @@ do {\
# define PARAM_INTERLACED 0
#endif
#ifdef CHROMA_FORMAT
# define CHROMA_H_SHIFT (CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422)
# define CHROMA_V_SHIFT (CHROMA_FORMAT == CHROMA_420)
#else
# define CHROMA_FORMAT h->sps->i_chroma_format_idc
# define CHROMA_H_SHIFT h->mb.chroma_h_shift
# define CHROMA_V_SHIFT h->mb.chroma_v_shift
#endif
#define CHROMA_SIZE(s) ((s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT))
#define FRAME_SIZE(s) ((s)+2*CHROMA_SIZE(s))
#define CHROMA444 (CHROMA_FORMAT == CHROMA_444)
/* Unions for type-punning.
View
@@ -63,6 +63,8 @@ const x264_cpu_name_t x264_cpu_names[] =
{"SSE4", SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4},
{"SSE4.2", SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4|X264_CPU_SSE42},
{"AVX", SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4|X264_CPU_SSE42|X264_CPU_AVX},
{"XOP", SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4|X264_CPU_SSE42|X264_CPU_AVX|X264_CPU_XOP},
{"FMA4", SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4|X264_CPU_SSE42|X264_CPU_AVX|X264_CPU_FMA4},
#undef SSE2
{"Cache32", X264_CPU_CACHELINE_32},
{"Cache64", X264_CPU_CACHELINE_64},
@@ -175,6 +177,14 @@ uint32_t x264_cpu_detect( void )
cpu |= X264_CPU_SSE_MISALIGN;
x264_cpu_mask_misalign_sse();
}
if( cpu & X264_CPU_AVX )
{
if( ecx&0x00000800 ) /* XOP */
cpu |= X264_CPU_XOP;
if( ecx&0x00010000 ) /* FMA4 */
cpu |= X264_CPU_FMA4;
}
}
}
View
@@ -887,6 +887,8 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
if( cpu&X264_CPU_SHUFFLE_IS_FAST )
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
}
if( cpu&X264_CPU_XOP )
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_xop;
#endif // HAVE_MMX
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
View
@@ -394,7 +394,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
int stridey = h->fdec->i_stride[0];
int strideuv = h->fdec->i_stride[1];
int chroma444 = CHROMA444;
int chroma_height = 16 >> h->mb.chroma_v_shift;
int chroma_height = 16 >> CHROMA_V_SHIFT;
intptr_t uvdiff = chroma444 ? h->fdec->plane[2] - h->fdec->plane[1] : 1;
for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
@@ -484,7 +484,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
}
int offy = MB_INTERLACED ? 4 : 0;
int offuv = MB_INTERLACED ? 4-h->mb.chroma_v_shift : 0;
int offuv = MB_INTERLACED ? 4-CHROMA_V_SHIFT : 0;
left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
luma_qp[1] = (qp + left_qp[1] + 1) >> 1;
chroma_qp[1] = (qpc + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
@@ -647,6 +647,9 @@ void x264_deblock_v_chroma_sse2( pixel *pix, int stride, int alpha, int beta, in
void x264_deblock_v_chroma_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_422_mmx2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_422_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_422_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_luma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_v_luma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_luma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
@@ -736,6 +739,9 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_luma[0] = x264_deblock_h_luma_mmx2;
pf->deblock_chroma[1] = x264_deblock_v_chroma_mmx2;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_mmx2;
#if !HIGH_BIT_DEPTH
pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_mmx2;
#endif
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmx2;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmx2;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmx2;
@@ -745,12 +751,15 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
if( cpu&X264_CPU_SSE2 )
{
pf->deblock_strength = x264_deblock_strength_sse2;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_sse2;
#if !HIGH_BIT_DEPTH
pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_sse2;
#endif
if( !(cpu&X264_CPU_STACK_MOD4) )
{
pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
pf->deblock_luma[0] = x264_deblock_h_luma_sse2;
pf->deblock_chroma[1] = x264_deblock_v_chroma_sse2;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_sse2;
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_sse2;
@@ -762,12 +771,15 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
if( cpu&X264_CPU_AVX )
{
pf->deblock_strength = x264_deblock_strength_avx;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_avx;
#if !HIGH_BIT_DEPTH
pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_avx;
#endif
if( !(cpu&X264_CPU_STACK_MOD4) )
{
pf->deblock_luma[1] = x264_deblock_v_luma_avx;
pf->deblock_luma[0] = x264_deblock_h_luma_avx;
pf->deblock_chroma[1] = x264_deblock_v_chroma_avx;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_avx;
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_avx;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_avx;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_avx;
@@ -791,8 +803,8 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
{
pf->deblock_luma[1] = x264_deblock_v_luma_neon;
pf->deblock_luma[0] = x264_deblock_h_luma_neon;
// pf->deblock_chroma[1] = x264_deblock_v_chroma_neon;
// pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon;
pf->deblock_chroma[1] = x264_deblock_v_chroma_neon;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon;
}
#endif
#endif // !HIGH_BIT_DEPTH
Oops, something went wrong.

0 comments on commit 8ba1e9f

Please sign in to comment.