Permalink
Browse files

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  lavr: fix handling of custom mix matrices
  fate: force pix_fmt in lagarith-rgb32 test
  fate: add tests for lagarith lossless video codec.
  ARMv6: vp8: fix stack allocation with Apple's assembler
  ARM: vp56: allow inline asm to build with clang
  fft: 3dnow: fix register name typo in DECL_IMDCT macro
  x86: dct32: port to cpuflags
  x86: build: replace mmx2 by mmxext
  Revert "wmapro: prevent division by zero when sample rate is unspecified"
  wmapro: prevent division by zero when sample rate is unspecified
  lagarith: fix color plane inversion for YUY2 output.
  lagarith: pad RGB buffer by 1 byte.
  dsputil: make add_hfyu_left_prediction_sse4() support unaligned src.

Conflicts:
	doc/APIchanges
	libavcodec/lagarith.c
	libavfilter/x86/gradfun.c
	libavutil/cpu.h
	libavutil/version.h
	libswscale/utils.c
	libswscale/version.h
	libswscale/x86/yuv2rgb.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
  • Loading branch information...
2 parents 88fc143 + 8821ae6 commit e776ee8f294984f7643a3c45db803c7266e1edfd @michaelni michaelni committed Aug 4, 2012
Showing with 254 additions and 181 deletions.
  1. +1 −1 Doxyfile
  2. +7 −6 configure
  3. +5 −0 doc/APIchanges
  4. +12 −4 libavcodec/arm/vp56_arith.h
  5. +7 −1 libavcodec/arm/vp8dsp_armv6.S
  6. +2 −2 libavcodec/dct-test.c
  7. +2 −2 libavcodec/motion-test.c
  8. +1 −1 libavcodec/x86/ac3dsp.asm
  9. +1 −1 libavcodec/x86/ac3dsp_mmx.c
  10. +1 −1 libavcodec/x86/cavsdsp_mmx.c
  11. +25 −49 libavcodec/x86/dct32_sse.asm
  12. +2 −2 libavcodec/x86/dsputil_mmx.c
  13. +13 −7 libavcodec/x86/dsputil_yasm.asm
  14. +3 −4 libavcodec/x86/dsputilenc_mmx.c
  15. +1 −1 libavcodec/x86/fft_mmx.asm
  16. +2 −2 libavcodec/x86/h264_intrapred_init.c
  17. +3 −3 libavcodec/x86/h264dsp_mmx.c
  18. +1 −1 libavcodec/x86/motion_est_mmx.c
  19. +5 −5 libavcodec/x86/mpegvideo_mmx.c
  20. +1 −1 libavcodec/x86/mpegvideo_mmx_template.c
  21. +1 −1 libavcodec/x86/pngdsp-init.c
  22. +1 −1 libavcodec/x86/rv34dsp_init.c
  23. +1 −1 libavcodec/x86/rv40dsp_init.c
  24. +1 −1 libavcodec/x86/snowdsp_mmx.c
  25. +2 −2 libavcodec/x86/vc1dsp_mmx.c
  26. +1 −1 libavcodec/x86/vp3dsp_init.c
  27. +1 −1 libavcodec/x86/vp8dsp-init.c
  28. +3 −3 libavfilter/x86/gradfun.c
  29. +3 −3 libavfilter/x86/yadif.c
  30. +9 −1 libavresample/audio_mix.c
  31. +15 −6 libavresample/audio_mix_matrix.c
  32. +2 −3 libavresample/utils.c
  33. +4 −4 libavutil/cpu.c
  34. +1 −0 libavutil/cpu.h
  35. +1 −0 libavutil/utils.c
  36. +1 −1 libavutil/version.h
  37. +2 −2 libavutil/x86/cpu.c
  38. +2 −2 libswscale/swscale.c
  39. +3 −0 libswscale/swscale.h
  40. +6 −6 libswscale/utils.c
  41. +1 −1 libswscale/version.h
  42. +6 −6 libswscale/x86/rgb2rgb.c
  43. +9 −9 libswscale/x86/rgb2rgb_template.c
  44. +8 −8 libswscale/x86/swscale.c
  45. +10 −10 libswscale/x86/swscale_template.c
  46. +8 −8 libswscale/x86/yuv2rgb.c
  47. +6 −6 libswscale/x86/yuv2rgb_template.c
  48. +15 −0 tests/fate/lossless-video.mak
  49. +5 −0 tests/ref/fate/lagarith-rgb24
  50. +26 −0 tests/ref/fate/lagarith-rgb32
  51. +2 −0 tests/ref/fate/lagarith-yuy2
  52. +3 −0 tests/ref/fate/lagarith-yv12
View
@@ -1378,7 +1378,7 @@ PREDEFINED = "__attribute__(x)=" \
"DEF(x)=x ## _TMPL" \
HAVE_AV_CONFIG_H \
HAVE_MMX \
- HAVE_MMX2 \
+ HAVE_MMXEXT \
HAVE_AMD3DNOW \
"DECLARE_ALIGNED(a,t,n)=t n" \
"offsetof(x,y)=0x42"
View
@@ -267,7 +267,7 @@ Optimization options (experts only):
--disable-amd3dnow disable 3DNow! optimizations
--disable-amd3dnowext disable 3DNow! extended optimizations
--disable-mmx disable MMX optimizations
- --disable-mmx2 disable MMX2 optimizations
+ --disable-mmxext disable MMXEXT optimizations
--disable-sse disable SSE optimizations
--disable-ssse3 disable SSSE3 optimizations
--disable-avx disable AVX optimizations
@@ -1182,7 +1182,7 @@ ARCH_EXT_LIST='
fma4
mmi
mmx
- mmx2
+ mmxext
neon
ppc4xx
sse
@@ -1459,7 +1459,7 @@ x86_64_suggest="cmov fast_cmov"
amd3dnow_deps="mmx"
amd3dnowext_deps="amd3dnow"
mmx_deps="x86"
-mmx2_deps="mmx"
+mmxext_deps="mmx"
sse_deps="mmx"
ssse3_deps="sse"
avx_deps="ssse3"
@@ -3194,9 +3194,9 @@ EOF
# check whether xmm clobbers are supported
check_asm xmm_clobbers '"":::"%xmm0"'
- # check whether binutils is new enough to compile SSSE3/MMX2
+ # check whether binutils is new enough to compile SSSE3/MMXEXT
enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"'
- enabled mmx2 && check_asm mmx2 '"pmaxub %mm0, %mm1"'
+ enabled mmxext && check_asm mmxext '"pmaxub %mm0, %mm1"'
if ! disabled_any asm mmx yasm; then
if check_cmd $yasmexe --version; then
@@ -3748,7 +3748,7 @@ echo "runtime cpu detection ${runtime_cpudetect-no}"
if enabled x86; then
echo "${yasmexe} ${yasm-no}"
echo "MMX enabled ${mmx-no}"
- echo "MMX2 enabled ${mmx2-no}"
+ echo "MMXEXT enabled ${mmxext-no}"
echo "3DNow! enabled ${amd3dnow-no}"
echo "3DNow! extended enabled ${amd3dnowext-no}"
echo "SSE enabled ${sse-no}"
@@ -4019,6 +4019,7 @@ cat > $TMPH <<EOF
#define EXTERN_PREFIX "${extern_prefix}"
#define EXTERN_ASM ${extern_prefix}
#define SLIBSUF "$SLIBSUF"
+#define HAVE_MMX2 HAVE_MMXEXT
EOF
test -n "$assert_level" &&
View
@@ -70,6 +70,11 @@ API changes, most recent first:
2012-03-26 - a67d9cf - lavfi 2.66.100
Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions.
+2012-08-03 - xxxxxxx - lavu 51.37.1 - cpu.h
+ lsws 2.1.1 - swscale.h
+ Rename AV_CPU_FLAG_MMX2 ---> AV_CPU_FLAG_MMXEXT.
+ Rename SWS_CPU_CAPS_MMX2 ---> SWS_CPU_CAPS_MMXEXT.
+
2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h
Add AVFMT_FLAG_NOBUFFER for low latency use cases.
@@ -29,6 +29,14 @@
# define T(x)
#endif
+#if CONFIG_THUMB || defined __clang__
+# define L(x)
+# define U(x) x
+#else
+# define L(x) x
+# define U(x)
+#endif
+
#if HAVE_ARMV6 && HAVE_INLINE_ASM
#define vp56_rac_get_prob vp56_rac_get_prob_armv6
@@ -42,8 +50,8 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
__asm__ ("adds %3, %3, %0 \n"
"itt cs \n"
"cmpcs %7, %4 \n"
- A("ldrcsh %2, [%4], #2 \n")
- T("ldrhcs %2, [%4], #2 \n")
+ L("ldrcsh %2, [%4], #2 \n")
+ U("ldrhcs %2, [%4], #2 \n")
"rsb %0, %6, #256 \n"
"smlabb %0, %5, %6, %0 \n"
T("itttt cs \n")
@@ -80,8 +88,8 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
__asm__ ("adds %3, %3, %0 \n"
"itt cs \n"
"cmpcs %7, %4 \n"
- A("ldrcsh %2, [%4], #2 \n")
- T("ldrhcs %2, [%4], #2 \n")
+ L("ldrcsh %2, [%4], #2 \n")
+ U("ldrhcs %2, [%4], #2 \n")
"rsb %0, %6, #256 \n"
"smlabb %0, %5, %6, %0 \n"
T("itttt cs \n")
@@ -1226,7 +1226,13 @@ vp8_mc_1 bilin, 8, v
vp8_mc_1 bilin, 4, h
vp8_mc_1 bilin, 4, v
-#define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1)
+/* True relational expressions have the value -1 in the GNU assembler,
+ +1 in Apple's. */
+#ifdef __APPLE__
+# define TMPSIZE \size * (8 + 8*(\size > 4) + \ytaps - 1)
+#else
+# define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1)
+#endif
.macro vp8_mc_hv name, size, h, v, ytaps
function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1
View
@@ -87,7 +87,7 @@ static const struct algo fdct_tab[] = {
#if HAVE_MMX && HAVE_INLINE_ASM
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
- { "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 },
+ { "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT },
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
#endif
@@ -132,7 +132,7 @@ static const struct algo idct_tab[] = {
#endif
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
- { "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 },
+ { "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
#if ARCH_X86_64 && HAVE_YASM
{ "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
View
@@ -116,8 +116,8 @@ int main(int argc, char **argv)
AVCodecContext *ctx;
int c;
DSPContext cctx, mmxctx;
- int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMX2 };
- int flags_size = HAVE_MMX2 ? 2 : 1;
+ int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMXEXT };
+ int flags_size = HAVE_MMXEXT ? 2 : 1;
if (argc > 1) {
help();
@@ -68,7 +68,7 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
%define LOOP_ALIGN
INIT_MMX
AC3_EXPONENT_MIN mmx
-%if HAVE_MMX2
+%if HAVE_MMXEXT
%define PMINUB PMINUB_MMXEXT
%define LOOP_ALIGN ALIGN 16
AC3_EXPONENT_MIN mmxext
@@ -65,7 +65,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
}
}
- if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
+ if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
}
@@ -486,7 +486,7 @@ void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
int mm_flags = av_get_cpu_flags();
#if HAVE_INLINE_ASM
- if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx);
+ if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx);
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
#endif /* HAVE_INLINE_ASM */
}
@@ -42,39 +42,24 @@ ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043
align 32
ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
-%macro BUTTERFLY_SSE 4
- movaps %4, %1
- subps %1, %2
- addps %2, %4
- mulps %1, %3
-%endmacro
-
-%macro BUTTERFLY_AVX 4
- vsubps %4, %1, %2
- vaddps %2, %2, %1
- vmulps %1, %4, %3
-%endmacro
-
-%macro BUTTERFLY0_SSE 5
- movaps %4, %1
- shufps %1, %1, %5
- xorps %4, %2
- addps %1, %4
- mulps %1, %3
+%macro BUTTERFLY 4
+ subps %4, %1, %2
+ addps %2, %2, %1
+ mulps %1, %4, %3
%endmacro
-%macro BUTTERFLY0_SSE2 5
+%macro BUTTERFLY0 5
+%if cpuflag(sse2) && notcpuflag(avx)
pshufd %4, %1, %5
xorps %1, %2
addps %1, %4
mulps %1, %3
-%endmacro
-
-%macro BUTTERFLY0_AVX 5
- vshufps %4, %1, %1, %5
- vxorps %1, %1, %2
- vaddps %4, %4, %1
- vmulps %1, %4, %3
+%else
+ shufps %4, %1, %1, %5
+ xorps %1, %1, %2
+ addps %4, %4, %1
+ mulps %1, %4, %3
+%endif
%endmacro
%macro BUTTERFLY2 4
@@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
movss [outq+116], m6
%endmacro
-%define BUTTERFLY BUTTERFLY_AVX
-%define BUTTERFLY0 BUTTERFLY0_AVX
-
-INIT_YMM
+INIT_YMM avx
SECTION_TEXT
%if HAVE_AVX
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_avx, 2,3,8, out, in, tmp
+cglobal dct32_float, 2,3,8, out, in, tmp
; pass 1
vmovaps m4, [inq+0]
vinsertf128 m5, m5, [inq+96], 1
@@ -286,9 +268,6 @@ INIT_XMM
RET
%endif
-%define BUTTERFLY BUTTERFLY_SSE
-%define BUTTERFLY0 BUTTERFLY0_SSE
-
%if ARCH_X86_64
%define SPILL SWAP
%define UNSPILL SWAP
@@ -411,10 +390,9 @@ INIT_XMM
%endif
-INIT_XMM
-%macro DCT32_FUNC 1
; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_%1, 2,3,16, out, in, tmp
+%macro DCT32_FUNC 0
+cglobal dct32_float, 2, 3, 16, out, in, tmp
; pass 1
movaps m0, [inq+0]
@@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp
RET
%endmacro
-%macro LOAD_INV_SSE 2
+%macro LOAD_INV 2
+%if cpuflag(sse2)
+ pshufd %1, %2, 0x1b
+%elif cpuflag(sse)
movaps %1, %2
shufps %1, %1, 0x1b
+%endif
%endmacro
-%define LOAD_INV LOAD_INV_SSE
-DCT32_FUNC sse
-
-%macro LOAD_INV_SSE2 2
- pshufd %1, %2, 0x1b
-%endmacro
-
-%define LOAD_INV LOAD_INV_SSE2
-%define BUTTERFLY0 BUTTERFLY0_SSE2
-DCT32_FUNC sse2
+INIT_XMM sse
+DCT32_FUNC
+INIT_XMM sse2
+DCT32_FUNC
@@ -3171,7 +3171,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
c->idct_add = ff_idct_xvid_sse2_add;
c->idct = ff_idct_xvid_sse2;
c->idct_permutation_type = FF_SSE2_IDCT_PERM;
- } else if (mm_flags & AV_CPU_FLAG_MMX2) {
+ } else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->idct_put = ff_idct_xvid_mmx2_put;
c->idct_add = ff_idct_xvid_mmx2_add;
c->idct = ff_idct_xvid_mmx2;
@@ -3187,7 +3187,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
dsputil_init_mmx(c, avctx, mm_flags);
}
- if (mm_flags & AV_CPU_FLAG_MMX2)
+ if (mm_flags & AV_CPU_FLAG_MMXEXT)
dsputil_init_mmx2(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW)
@@ -388,12 +388,16 @@ cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_to
RET
-%macro ADD_HFYU_LEFT_LOOP 1 ; %1 = is_aligned
+%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
add srcq, wq
add dstq, wq
neg wq
%%.loop:
+%if %2
mova m1, [srcq+wq]
+%else
+ movu m1, [srcq+wq]
+%endif
mova m2, m1
psllw m1, 8
paddb m1, m2
@@ -435,7 +439,7 @@ cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left
mova m3, [pb_zz11zz55zz99zzdd]
movd m0, leftm
psllq m0, 56
- ADD_HFYU_LEFT_LOOP 1
+ ADD_HFYU_LEFT_LOOP 1, 1
INIT_XMM
cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
@@ -446,12 +450,14 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
movd m0, leftm
pslldq m0, 15
test srcq, 15
- jnz add_hfyu_left_prediction_ssse3.skip_prologue
+ jnz .src_unaligned
test dstq, 15
- jnz .unaligned
- ADD_HFYU_LEFT_LOOP 1
-.unaligned:
- ADD_HFYU_LEFT_LOOP 0
+ jnz .dst_unaligned
+ ADD_HFYU_LEFT_LOOP 1, 1
+.dst_unaligned:
+ ADD_HFYU_LEFT_LOOP 0, 1
+.src_unaligned:
+ ADD_HFYU_LEFT_LOOP 0, 0
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
@@ -1112,7 +1112,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
if(mm_flags & AV_CPU_FLAG_SSE2){
c->fdct = ff_fdct_sse2;
- }else if(mm_flags & AV_CPU_FLAG_MMX2){
+ } else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->fdct = ff_fdct_mmx2;
}else{
c->fdct = ff_fdct_mmx;
@@ -1145,8 +1145,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
-
- if (mm_flags & AV_CPU_FLAG_MMX2) {
+ if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->sum_abs_dctelem= sum_abs_dctelem_mmx2;
c->vsad[4]= vsad_intra16_mmx2;
@@ -1187,7 +1186,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
- if (mm_flags & AV_CPU_FLAG_MMX2) {
+ if (mm_flags & AV_CPU_FLAG_MMXEXT) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2;
}
Oops, something went wrong.

0 comments on commit e776ee8

Please sign in to comment.