Permalink
Browse files
x264: use compatible PPC assembly
also use gmake on Tiger to avoid errors from Tiger's old make
- Loading branch information
Showing
with
193 additions
and 0 deletions.
| @@ -0,0 +1,186 @@ | ||
| diff --git common/ppc/quant.c common/ppc/quant.c | ||
| index dfb8a80..6a54aa9 100644 | ||
| --- common/ppc/quant.c | ||
| +++ common/ppc/quant.c | ||
| @@ -39,8 +39,8 @@ | ||
| biasvB = vec_ld((idx1), bias); \ | ||
| mskA = vec_cmplt(temp1v, zero_s16v); \ | ||
| mskB = vec_cmplt(temp2v, zero_s16v); \ | ||
| - coefvA = (vec_u16_t)vec_abs( temp1v ); \ | ||
| - coefvB = (vec_u16_t)vec_abs( temp2v ); \ | ||
| + coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\ | ||
| + coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\ | ||
| coefvA = vec_adds(coefvA, biasvA); \ | ||
| coefvB = vec_adds(coefvB, biasvB); \ | ||
| multEvenvA = vec_mule(coefvA, mfvA); \ | ||
| @@ -51,12 +51,8 @@ | ||
| multOddvA = vec_sr(multOddvA, i_qbitsv); \ | ||
| multEvenvB = vec_sr(multEvenvB, i_qbitsv); \ | ||
| multOddvB = vec_sr(multOddvB, i_qbitsv); \ | ||
| - temp1v = (vec_s16_t) vec_packs( multEvenvA, multOddvA ); \ | ||
| - tmpv = xxpermdi( temp1v, temp1v, 2 ); \ | ||
| - temp1v = vec_mergeh( temp1v, tmpv ); \ | ||
| - temp2v = (vec_s16_t) vec_packs( multEvenvB, multOddvB ); \ | ||
| - tmpv = xxpermdi( temp2v, temp2v, 2 ); \ | ||
| - temp2v = vec_mergeh( temp2v, tmpv ); \ | ||
| + temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \ | ||
| + temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \ | ||
| temp1v = vec_xor(temp1v, mskA); \ | ||
| temp2v = vec_xor(temp2v, mskB); \ | ||
| temp1v = vec_adds(temp1v, vec_and(mskA, one)); \ | ||
| @@ -84,7 +80,7 @@ int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] | ||
| vec_u16_t mfvB; | ||
| vec_u16_t biasvB; | ||
|
|
||
| - vec_s16_t temp1v, temp2v, tmpv; | ||
| + vec_s16_t temp1v, temp2v; | ||
|
|
||
| vec_u32_u qbits_u; | ||
| qbits_u.s[0]=16; | ||
| @@ -143,9 +139,17 @@ int x264_quant_4x4_dc_altivec( int16_t dct[16], int mf, int bias ) | ||
| vec_u16_t mfv; | ||
| vec_u16_t biasv; | ||
|
|
||
| - mfv = vec_splats( (uint16_t)mf ); | ||
| - i_qbitsv = vec_splats( (uint32_t) 16 ); | ||
| - biasv = vec_splats( (uint16_t)bias ); | ||
| + vec_u16_u mf_u; | ||
| + mf_u.s[0]=mf; | ||
| + mfv = vec_splat( mf_u.v, 0 ); | ||
| + | ||
| + vec_u32_u qbits_u; | ||
| + qbits_u.s[0]=16; | ||
| + i_qbitsv = vec_splat(qbits_u.v, 0); | ||
| + | ||
| + vec_u16_u bias_u; | ||
| + bias_u.s[0]=bias; | ||
| + biasv = vec_splat(bias_u.v, 0); | ||
|
|
||
| QUANT_16_U_DC( 0, 16 ); | ||
| return vec_any_ne(nz, zero_s16v); | ||
| @@ -186,9 +190,17 @@ int x264_quant_2x2_dc_altivec( int16_t dct[4], int mf, int bias ) | ||
| vec_u16_t mfv; | ||
| vec_u16_t biasv; | ||
|
|
||
| - mfv = vec_splats( (uint16_t)mf ); | ||
| - i_qbitsv = vec_splats( (uint32_t) 16 ); | ||
| - biasv = vec_splats( (uint16_t)bias ); | ||
| + vec_u16_u mf_u; | ||
| + mf_u.s[0]=mf; | ||
| + mfv = vec_splat( mf_u.v, 0 ); | ||
| + | ||
| + vec_u32_u qbits_u; | ||
| + qbits_u.s[0]=16; | ||
| + i_qbitsv = vec_splat(qbits_u.v, 0); | ||
| + | ||
| + vec_u16_u bias_u; | ||
| + bias_u.s[0]=bias; | ||
| + biasv = vec_splat(bias_u.v, 0); | ||
|
|
||
| static const vec_s16_t mask2 = CV(-1, -1, -1, -1, 0, 0, 0, 0); | ||
| QUANT_4_U_DC(0); | ||
| @@ -213,7 +225,7 @@ int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] | ||
| vec_u16_t mfvB; | ||
| vec_u16_t biasvB; | ||
|
|
||
| - vec_s16_t temp1v, temp2v, tmpv; | ||
| + vec_s16_t temp1v, temp2v; | ||
|
|
||
| vec_u32_u qbits_u; | ||
| qbits_u.s[0]=16; | ||
| @@ -235,9 +247,6 @@ int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] | ||
| multOddvA = vec_mulo(dctv, mfv); \ | ||
| dctv = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), \ | ||
| vec_mergel(multEvenvA, multOddvA)); \ | ||
| - dctv = (vec_s16_t) vec_packs( multEvenvA, multOddvA ); \ | ||
| - tmpv = xxpermdi( dctv, dctv, 2 ); \ | ||
| - dctv = vec_mergeh( dctv, tmpv ); \ | ||
| dctv = vec_sl(dctv, i_qbitsv); \ | ||
| vec_st(dctv, 8*y, dct); \ | ||
| } | ||
| @@ -279,7 +288,7 @@ void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp | ||
| int i_mf = i_qp%6; | ||
| int i_qbits = i_qp/6 - 4; | ||
|
|
||
| - vec_s16_t dctv, tmpv; | ||
| + vec_s16_t dctv; | ||
| vec_s16_t dct1v, dct2v; | ||
| vec_s32_t mf1v, mf2v; | ||
| vec_s16_t mfv; | ||
| @@ -289,7 +298,9 @@ void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp | ||
| if( i_qbits >= 0 ) | ||
| { | ||
| vec_u16_t i_qbitsv; | ||
| - i_qbitsv = vec_splats( (uint16_t) i_qbits ); | ||
| + vec_u16_u qbits_u; | ||
| + qbits_u.s[0]=i_qbits; | ||
| + i_qbitsv = vec_splat(qbits_u.v, 0); | ||
|
|
||
| for( int y = 0; y < 4; y+=2 ) | ||
| DEQUANT_SHL(); | ||
| @@ -299,13 +310,19 @@ void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp | ||
| const int f = 1 << (-i_qbits-1); | ||
|
|
||
| vec_s32_t fv; | ||
| - fv = vec_splats( f ); | ||
| + vec_u32_u f_u; | ||
| + f_u.s[0]=f; | ||
| + fv = (vec_s32_t)vec_splat(f_u.v, 0); | ||
|
|
||
| vec_u32_t i_qbitsv; | ||
| - i_qbitsv = vec_splats( (uint32_t)-i_qbits ); | ||
| + vec_u32_u qbits_u; | ||
| + qbits_u.s[0]=-i_qbits; | ||
| + i_qbitsv = vec_splat(qbits_u.v, 0); | ||
|
|
||
| vec_u32_t sixteenv; | ||
| - sixteenv = vec_splats( (uint32_t)16 ); | ||
| + vec_u32_u sixteen_u; | ||
| + sixteen_u.s[0]=16; | ||
| + sixteenv = vec_splat(sixteen_u.v, 0); | ||
|
|
||
| for( int y = 0; y < 4; y+=2 ) | ||
| DEQUANT_SHR(); | ||
| @@ -317,7 +334,7 @@ void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp | ||
| int i_mf = i_qp%6; | ||
| int i_qbits = i_qp/6 - 6; | ||
|
|
||
| - vec_s16_t dctv, tmpv; | ||
| + vec_s16_t dctv; | ||
| vec_s16_t dct1v, dct2v; | ||
| vec_s32_t mf1v, mf2v; | ||
| vec_s16_t mfv; | ||
| @@ -327,7 +344,9 @@ void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp | ||
| if( i_qbits >= 0 ) | ||
| { | ||
| vec_u16_t i_qbitsv; | ||
| - i_qbitsv = vec_splats((uint16_t)i_qbits ); | ||
| + vec_u16_u qbits_u; | ||
| + qbits_u.s[0]=i_qbits; | ||
| + i_qbitsv = vec_splat(qbits_u.v, 0); | ||
|
|
||
| for( int y = 0; y < 16; y+=2 ) | ||
| DEQUANT_SHL(); | ||
| @@ -337,13 +356,19 @@ void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp | ||
| const int f = 1 << (-i_qbits-1); | ||
|
|
||
| vec_s32_t fv; | ||
| - fv = vec_splats( f ); | ||
| + vec_u32_u f_u; | ||
| + f_u.s[0]=f; | ||
| + fv = (vec_s32_t)vec_splat(f_u.v, 0); | ||
|
|
||
| vec_u32_t i_qbitsv; | ||
| - i_qbitsv = vec_splats( (uint32_t)-i_qbits ); | ||
| + vec_u32_u qbits_u; | ||
| + qbits_u.s[0]=-i_qbits; | ||
| + i_qbitsv = vec_splat(qbits_u.v, 0); | ||
|
|
||
| vec_u32_t sixteenv; | ||
| - sixteenv = vec_splats( (uint32_t)16 ); | ||
| + vec_u32_u sixteen_u; | ||
| + sixteen_u.s[0]=16; | ||
| + sixteenv = vec_splat(sixteen_u.v, 0); | ||
|
|
||
| for( int y = 0; y < 16; y+=2 ) | ||
| DEQUANT_SHR(); |