Skip to content
Permalink
Browse files

x264: use compatible PPC assembly

also use gmake on Tiger to avoid errors from Tiger's old make
  • Loading branch information
kencu committed Mar 31, 2019
1 parent a03614c commit cca444249af6d4804482e0b3d15ee28edf136e8b
Showing with 193 additions and 0 deletions.
  1. +7 −0 multimedia/x264/Portfile
  2. +186 −0 multimedia/x264/files/patch-x264-older-ppc-code.diff
@@ -32,6 +32,10 @@ minimum_xcodeversions {9 3.1}

depends_build port:nasm

# as of 20190313 the PPC assembly uses VSX, which is Power7+ only. We could disable asm, but
# instead we can use the previous ppc assembly, which does compile and provides the same functions
patchfiles-append patch-x264-older-ppc-code.diff

configure.args --enable-pic \
--enable-shared \
--enable-static \
@@ -74,6 +78,9 @@ platform darwin 8 {
configure.cflags-append -msse2
}
set merger_configure_cflags(i386) -msse2

depends_build-append port:gmake
build.cmd gmake
}

# sets its own optflags
@@ -0,0 +1,186 @@
diff --git common/ppc/quant.c common/ppc/quant.c
index dfb8a80..6a54aa9 100644
--- common/ppc/quant.c
+++ common/ppc/quant.c
@@ -39,8 +39,8 @@
biasvB = vec_ld((idx1), bias); \
mskA = vec_cmplt(temp1v, zero_s16v); \
mskB = vec_cmplt(temp2v, zero_s16v); \
- coefvA = (vec_u16_t)vec_abs( temp1v ); \
- coefvB = (vec_u16_t)vec_abs( temp2v ); \
+ coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\
+ coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\
coefvA = vec_adds(coefvA, biasvA); \
coefvB = vec_adds(coefvB, biasvB); \
multEvenvA = vec_mule(coefvA, mfvA); \
@@ -51,12 +51,8 @@
multOddvA = vec_sr(multOddvA, i_qbitsv); \
multEvenvB = vec_sr(multEvenvB, i_qbitsv); \
multOddvB = vec_sr(multOddvB, i_qbitsv); \
- temp1v = (vec_s16_t) vec_packs( multEvenvA, multOddvA ); \
- tmpv = xxpermdi( temp1v, temp1v, 2 ); \
- temp1v = vec_mergeh( temp1v, tmpv ); \
- temp2v = (vec_s16_t) vec_packs( multEvenvB, multOddvB ); \
- tmpv = xxpermdi( temp2v, temp2v, 2 ); \
- temp2v = vec_mergeh( temp2v, tmpv ); \
+ temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
+ temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \
temp1v = vec_xor(temp1v, mskA); \
temp2v = vec_xor(temp2v, mskB); \
temp1v = vec_adds(temp1v, vec_and(mskA, one)); \
@@ -84,7 +80,7 @@ int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16]
vec_u16_t mfvB;
vec_u16_t biasvB;

- vec_s16_t temp1v, temp2v, tmpv;
+ vec_s16_t temp1v, temp2v;

vec_u32_u qbits_u;
qbits_u.s[0]=16;
@@ -143,9 +139,17 @@ int x264_quant_4x4_dc_altivec( int16_t dct[16], int mf, int bias )
vec_u16_t mfv;
vec_u16_t biasv;

- mfv = vec_splats( (uint16_t)mf );
- i_qbitsv = vec_splats( (uint32_t) 16 );
- biasv = vec_splats( (uint16_t)bias );
+ vec_u16_u mf_u;
+ mf_u.s[0]=mf;
+ mfv = vec_splat( mf_u.v, 0 );
+
+ vec_u32_u qbits_u;
+ qbits_u.s[0]=16;
+ i_qbitsv = vec_splat(qbits_u.v, 0);
+
+ vec_u16_u bias_u;
+ bias_u.s[0]=bias;
+ biasv = vec_splat(bias_u.v, 0);

QUANT_16_U_DC( 0, 16 );
return vec_any_ne(nz, zero_s16v);
@@ -186,9 +190,17 @@ int x264_quant_2x2_dc_altivec( int16_t dct[4], int mf, int bias )
vec_u16_t mfv;
vec_u16_t biasv;

- mfv = vec_splats( (uint16_t)mf );
- i_qbitsv = vec_splats( (uint32_t) 16 );
- biasv = vec_splats( (uint16_t)bias );
+ vec_u16_u mf_u;
+ mf_u.s[0]=mf;
+ mfv = vec_splat( mf_u.v, 0 );
+
+ vec_u32_u qbits_u;
+ qbits_u.s[0]=16;
+ i_qbitsv = vec_splat(qbits_u.v, 0);
+
+ vec_u16_u bias_u;
+ bias_u.s[0]=bias;
+ biasv = vec_splat(bias_u.v, 0);

static const vec_s16_t mask2 = CV(-1, -1, -1, -1, 0, 0, 0, 0);
QUANT_4_U_DC(0);
@@ -213,7 +225,7 @@ int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64]
vec_u16_t mfvB;
vec_u16_t biasvB;

- vec_s16_t temp1v, temp2v, tmpv;
+ vec_s16_t temp1v, temp2v;

vec_u32_u qbits_u;
qbits_u.s[0]=16;
@@ -235,9 +247,6 @@ int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64]
multOddvA = vec_mulo(dctv, mfv); \
dctv = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), \
vec_mergel(multEvenvA, multOddvA)); \
- dctv = (vec_s16_t) vec_packs( multEvenvA, multOddvA ); \
- tmpv = xxpermdi( dctv, dctv, 2 ); \
- dctv = vec_mergeh( dctv, tmpv ); \
dctv = vec_sl(dctv, i_qbitsv); \
vec_st(dctv, 8*y, dct); \
}
@@ -279,7 +288,7 @@ void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp
int i_mf = i_qp%6;
int i_qbits = i_qp/6 - 4;

- vec_s16_t dctv, tmpv;
+ vec_s16_t dctv;
vec_s16_t dct1v, dct2v;
vec_s32_t mf1v, mf2v;
vec_s16_t mfv;
@@ -289,7 +298,9 @@ void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp
if( i_qbits >= 0 )
{
vec_u16_t i_qbitsv;
- i_qbitsv = vec_splats( (uint16_t) i_qbits );
+ vec_u16_u qbits_u;
+ qbits_u.s[0]=i_qbits;
+ i_qbitsv = vec_splat(qbits_u.v, 0);

for( int y = 0; y < 4; y+=2 )
DEQUANT_SHL();
@@ -299,13 +310,19 @@ void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp
const int f = 1 << (-i_qbits-1);

vec_s32_t fv;
- fv = vec_splats( f );
+ vec_u32_u f_u;
+ f_u.s[0]=f;
+ fv = (vec_s32_t)vec_splat(f_u.v, 0);

vec_u32_t i_qbitsv;
- i_qbitsv = vec_splats( (uint32_t)-i_qbits );
+ vec_u32_u qbits_u;
+ qbits_u.s[0]=-i_qbits;
+ i_qbitsv = vec_splat(qbits_u.v, 0);

vec_u32_t sixteenv;
- sixteenv = vec_splats( (uint32_t)16 );
+ vec_u32_u sixteen_u;
+ sixteen_u.s[0]=16;
+ sixteenv = vec_splat(sixteen_u.v, 0);

for( int y = 0; y < 4; y+=2 )
DEQUANT_SHR();
@@ -317,7 +334,7 @@ void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp
int i_mf = i_qp%6;
int i_qbits = i_qp/6 - 6;

- vec_s16_t dctv, tmpv;
+ vec_s16_t dctv;
vec_s16_t dct1v, dct2v;
vec_s32_t mf1v, mf2v;
vec_s16_t mfv;
@@ -327,7 +344,9 @@ void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp
if( i_qbits >= 0 )
{
vec_u16_t i_qbitsv;
- i_qbitsv = vec_splats((uint16_t)i_qbits );
+ vec_u16_u qbits_u;
+ qbits_u.s[0]=i_qbits;
+ i_qbitsv = vec_splat(qbits_u.v, 0);

for( int y = 0; y < 16; y+=2 )
DEQUANT_SHL();
@@ -337,13 +356,19 @@ void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp
const int f = 1 << (-i_qbits-1);

vec_s32_t fv;
- fv = vec_splats( f );
+ vec_u32_u f_u;
+ f_u.s[0]=f;
+ fv = (vec_s32_t)vec_splat(f_u.v, 0);

vec_u32_t i_qbitsv;
- i_qbitsv = vec_splats( (uint32_t)-i_qbits );
+ vec_u32_u qbits_u;
+ qbits_u.s[0]=-i_qbits;
+ i_qbitsv = vec_splat(qbits_u.v, 0);

vec_u32_t sixteenv;
- sixteenv = vec_splats( (uint32_t)16 );
+ vec_u32_u sixteen_u;
+ sixteen_u.s[0]=16;
+ sixteenv = vec_splat(sixteen_u.v, 0);

for( int y = 0; y < 16; y+=2 )
DEQUANT_SHR();

0 comments on commit cca4442

Please sign in to comment.
You can’t perform that action at this time.