Permalink
Browse files

Intel Compiler support

Big thanks to David Rudie, the original author of this patch.
  • Loading branch information...
1 parent c7e352c commit b41d1821a0fa6fe6b3e9f06b30194e172c6ac474 @kemuri-9 kemuri-9 committed with Jason Garrett-Glaser Mar 1, 2011
Showing with 585 additions and 115 deletions.
  1. +3 −0 .gitignore
  2. +21 −26 Makefile
  3. +2 −2 common/common.c
  4. +10 −10 common/osdep.c
  5. +34 −12 common/osdep.h
  6. +0 −1 common/set.c
  7. +1 −1 common/win32thread.c
  8. +10 −6 common/x86/predict-c.c
  9. +1 −1 common/x86/util.h
  10. +196 −35 configure
  11. +0 −2 encoder/analyse.c
  12. +0 −2 encoder/encoder.c
  13. +0 −1 encoder/ratecontrol.c
  14. +0 −2 encoder/set.c
  15. +0 −2 encoder/slicetype.c
  16. +285 −0 extras/inttypes.h
  17. +17 −0 extras/stdint.h
  18. +0 −1 input/timecode.c
  19. +1 −5 tools/checkasm.c
  20. +4 −6 x264.c
View
@@ -5,6 +5,9 @@
*.rej
*.dll*
*.exe
+*.def
+*.lib
+*.pdb
*.mo
*.o
*.patch
View
@@ -125,7 +125,7 @@ SRCCLI += extras/getopt.c
endif
ifneq ($(SONAME),)
-ifeq ($(SYS),MINGW)
+ifeq ($(SYS),WINDOWS)
SRCSO += x264dll.c
endif
endif
@@ -139,30 +139,30 @@ DEP = depend
default: $(DEP) x264$(EXE)
-libx264.a: .depend $(OBJS) $(OBJASM)
- $(AR) rc libx264.a $(OBJS) $(OBJASM)
- $(RANLIB) libx264.a
+$(LIBX264): .depend $(OBJS) $(OBJASM)
+ $(AR)$@ $(OBJS) $(OBJASM)
+ $(if $(RANLIB), $(RANLIB) $@)
$(SONAME): .depend $(OBJS) $(OBJASM) $(OBJSO)
- $(CC) -shared -o $@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
+ $(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
-x264$(EXE): $(OBJCLI) libx264.a
- $(CC) -o $@ $+ $(LDFLAGSCLI) $(LDFLAGS)
+x264$(EXE): $(OBJCLI) $(LIBX264)
+ $(LD)$@ $+ $(LDFLAGSCLI) $(LDFLAGS)
-checkasm: tools/checkasm.o libx264.a
- $(CC) -o $@ $+ $(LDFLAGS)
+checkasm: tools/checkasm.o $(LIBX264)
+ $(LD)$@ $+ $(LDFLAGS)
%.o: %.asm
$(AS) $(ASFLAGS) -o $@ $<
- -@ $(STRIP) -x $@ # delete local/anonymous symbols, so they don't show up in oprofile
+ -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
%.o: %.S
$(AS) $(ASFLAGS) -o $@ $<
- -@ $(STRIP) -x $@ # delete local/anonymous symbols, so they don't show up in oprofile
+ -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
.depend: config.mak
@rm -f .depend
- @$(foreach SRC, $(SRCS) $(SRCCLI) $(SRCSO), $(CC) $(CFLAGS) $(SRC) -MT $(SRC:%.c=%.o) -MM -g0 1>> .depend;)
+ @$(foreach SRC, $(SRCS) $(SRCCLI) $(SRCSO), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:%.c=%.o) $(DEPMM) 1>> .depend;)
config.mak:
./configure
@@ -191,25 +191,20 @@ fprofiled:
else
fprofiled:
$(MAKE) clean
- mv config.mak config.mak2
- sed -e 's/CFLAGS.*/& -fprofile-generate/; s/LDFLAGS.*/& -fprofile-generate/' config.mak2 > config.mak
- $(MAKE) x264$(EXE)
+ $(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)"
$(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;))
rm -f $(SRC2:%.c=%.o)
- sed -e 's/CFLAGS.*/& -fprofile-use/; s/LDFLAGS.*/& -fprofile-use/' config.mak2 > config.mak
- $(MAKE)
- rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno)
- mv config.mak2 config.mak
+ $(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)"
+ rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock
endif
clean:
- rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(SONAME) *.a x264 x264.exe .depend TAGS
+ rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(SONAME) *.a *.lib *.exp *.pdb x264 x264.exe .depend TAGS
rm -f checkasm checkasm.exe tools/checkasm.o tools/checkasm-a.o
- rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno)
- - sed -e 's/ *-fprofile-\(generate\|use\)//g' config.mak > config.mak2 && mv config.mak2 config.mak
+ rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock
distclean: clean
- rm -f config.mak x264_config.h config.h config.log x264.pc
+ rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
rm -rf test/
install: x264$(EXE) $(SONAME)
@@ -219,11 +214,11 @@ install: x264$(EXE) $(SONAME)
install -d $(DESTDIR)$(libdir)/pkgconfig
install -m 644 x264.h $(DESTDIR)$(includedir)
install -m 644 x264_config.h $(DESTDIR)$(includedir)
- install -m 644 libx264.a $(DESTDIR)$(libdir)
+ install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
install x264$(EXE) $(DESTDIR)$(bindir)
- $(RANLIB) $(DESTDIR)$(libdir)/libx264.a
-ifeq ($(SYS),MINGW)
+ $(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
+ifeq ($(SYS),WINDOWS)
$(if $(SONAME), install -m 755 $(SONAME) $(DESTDIR)$(bindir))
else
$(if $(SONAME), ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX))
View
@@ -1095,7 +1095,7 @@ void x264_picture_clean( x264_picture_t *pic )
void *x264_malloc( int i_size )
{
uint8_t *align_buf = NULL;
-#if SYS_MACOSX || (SYS_MINGW && ARCH_X86_64)
+#if SYS_MACOSX || (SYS_WINDOWS && ARCH_X86_64)
/* Mac OS X and Win x64 always returns 16 byte aligned memory */
align_buf = malloc( i_size );
#elif HAVE_MALLOC_H
@@ -1121,7 +1121,7 @@ void x264_free( void *p )
{
if( p )
{
-#if HAVE_MALLOC_H || SYS_MACOSX || (SYS_MINGW && ARCH_X86_64)
+#if HAVE_MALLOC_H || SYS_MACOSX || (SYS_WINDOWS && ARCH_X86_64)
free( p );
#else
free( *( ( ( void **) p ) - 1 ) );
View
@@ -24,16 +24,16 @@
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#ifndef __MINGW32__
-#include <sys/time.h>
-#else
+#include "common.h"
+
+#if SYS_WINDOWS
#include <sys/types.h>
#include <sys/timeb.h>
+#else
+#include <sys/time.h>
#endif
#include <time.h>
-#include "common.h"
-
#if PTW32_STATIC_LIB
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
@@ -43,14 +43,14 @@ extern int ptw32_processInitialized;
int64_t x264_mdate( void )
{
-#ifndef __MINGW32__
- struct timeval tv_date;
- gettimeofday( &tv_date, NULL );
- return (int64_t)tv_date.tv_sec * 1000000 + (int64_t)tv_date.tv_usec;
-#else
+#if SYS_WINDOWS
struct timeb tb;
ftime( &tb );
return ((int64_t)tb.time * 1000 + (int64_t)tb.millitm) * 1000;
+#else
+ struct timeval tv_date;
+ gettimeofday( &tv_date, NULL );
+ return (int64_t)tv_date.tv_sec * 1000000 + (int64_t)tv_date.tv_usec;
#endif
}
View
@@ -50,6 +50,25 @@
#include <fcntl.h> // _O_BINARY
#endif
+#ifdef __ICL
+#define inline __inline
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#define snprintf _snprintf
+#define strtok_r strtok_s
+#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+#ifdef __INTEL_COMPILER
+#include <mathimf.h>
+#else
+#include <math.h>
+#endif
+
+#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
+#define HAVE_X86_INLINE_ASM 1
+#endif
+
#if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
#define isfinite finite
#endif
@@ -60,7 +79,11 @@
#endif
#endif
+#ifdef __ICL
+#define DECLARE_ALIGNED( var, n ) __declspec(align(n)) var
+#else
#define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
+#endif
#define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
#define ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 )
#define ALIGNED_4( var ) DECLARE_ALIGNED( var, 4 )
@@ -99,9 +122,14 @@
#define x264_constant_p(x) __builtin_constant_p(x)
#define x264_nonconstant_p(x) (!__builtin_constant_p(x))
#else
-#define UNUSED
+#ifdef __ICL
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#else
#define ALWAYS_INLINE inline
#define NOINLINE
+#endif
+#define UNUSED
#define MAY_ALIAS
#define x264_constant_p(x) 0
#define x264_nonconstant_p(x) 0
@@ -179,19 +207,13 @@ int x264_threading_init( void );
#define asm __asm__
-#if !defined(_WIN64) && !defined(__LP64__)
-#if defined(__INTEL_COMPILER)
-#define BROKEN_STACK_ALIGNMENT 1 /* define it if stack is not mod16 */
-#endif
-#endif
-
#if WORDS_BIGENDIAN
#define endian_fix(x) (x)
#define endian_fix64(x) (x)
#define endian_fix32(x) (x)
#define endian_fix16(x) (x)
#else
-#if defined(__GNUC__) && HAVE_MMX
+#if HAVE_X86_INLINE_ASM && HAVE_MMX
static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
{
asm("bswap %0":"+r"(x));
@@ -209,7 +231,7 @@ static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
return (x<<24) + ((x<<8)&0xff0000) + ((x>>8)&0xff00) + (x>>24);
}
#endif
-#if defined(__GNUC__) && ARCH_X86_64
+#if HAVE_X86_INLINE_ASM && ARCH_X86_64
static ALWAYS_INLINE uint64_t endian_fix64( uint64_t x )
{
asm("bswap %0":"+r"(x));
@@ -260,7 +282,7 @@ static int ALWAYS_INLINE x264_ctz( uint32_t x )
}
#endif
-#if defined(__GNUC__) && HAVE_MMX
+#if HAVE_X86_INLINE_ASM && HAVE_MMX
/* Don't use __builtin_prefetch; even as recent as 4.3.4, GCC seems incapable of
* using complex address modes properly unless we use inline asm. */
static ALWAYS_INLINE void x264_prefetch( void *p )
@@ -277,7 +299,7 @@ static ALWAYS_INLINE void x264_prefetch( void *p )
#endif
#if HAVE_POSIXTHREAD
-#if SYS_MINGW
+#if SYS_WINDOWS
#define x264_lower_thread_priority(p)\
{\
x264_pthread_t handle = pthread_self();\
@@ -290,7 +312,7 @@ static ALWAYS_INLINE void x264_prefetch( void *p )
#else
#include <unistd.h>
#define x264_lower_thread_priority(p) { UNUSED int nice_ret = nice(p); }
-#endif /* SYS_MINGW */
+#endif /* SYS_WINDOWS */
#elif HAVE_WIN32THREAD
#define x264_lower_thread_priority(p) SetThreadPriority( GetCurrentThread(), X264_MAX( -2, -p ) )
#else
View
@@ -24,7 +24,6 @@
*****************************************************************************/
#define _ISOC99_SOURCE
-#include <math.h>
#include "common.h"
#define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
View
@@ -59,7 +59,7 @@ typedef struct
static x264_win32thread_control_t thread_control;
/* _beginthreadex requires that the start routine is __stdcall */
-static __stdcall unsigned x264_win32thread_worker( void *arg )
+static unsigned __stdcall x264_win32thread_worker( void *arg )
{
x264_pthread_t *h = arg;
h->ret = h->func( h->arg );
View
@@ -180,7 +180,7 @@ PREDICT_16x16_P( sse2 )
PREDICT_16x16_P( avx )
#endif //!HIGH_BIT_DEPTH
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
#if HIGH_BIT_DEPTH
static void x264_predict_16x16_p_sse2( uint16_t *src )
#else
@@ -218,7 +218,7 @@ static void x264_predict_16x16_p_ssse3( uint8_t *src )
"pshufw $1, %%mm0, %%mm1 \n"
"paddw %%mm1, %%mm0 \n"
"movd %%mm0, %0 \n"
- "movsx %w0, %0 \n"
+ "movswl %w0, %0 \n"
:"=r"(H)
:"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),
"m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)
@@ -271,7 +271,7 @@ PREDICT_8x8_P( sse2 )
#endif //!HIGH_BIT_DEPTH
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
#if HIGH_BIT_DEPTH
static void x264_predict_8x8c_p_sse2( uint16_t *src )
#else
@@ -301,7 +301,7 @@ static void x264_predict_8x8c_p_ssse3( uint8_t *src )
"pshufw $1, %%mm0, %%mm1 \n"
"paddw %%mm1, %%mm0 \n"
"movd %%mm0, %0 \n"
- "movsx %w0, %0 \n"
+ "movswl %w0, %0 \n"
:"=r"(H)
:"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)
);
@@ -432,7 +432,9 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse2;
pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2;
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
+#endif
#else
#if !ARCH_X86_64
pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmxext;
@@ -449,7 +451,7 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
if( !(cpu&X264_CPU_SSSE3) )
return;
pf[I_PRED_16x16_H] = x264_predict_16x16_h_ssse3;
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
#endif
if( !(cpu&X264_CPU_AVX) )
@@ -473,7 +475,9 @@ void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2;
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2;
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2;
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
+#endif
#else
#if ARCH_X86_64
pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
@@ -493,7 +497,7 @@ void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
if( !(cpu&X264_CPU_SSSE3) )
return;
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_ssse3;
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
#endif
#endif // HIGH_BIT_DEPTH
View
@@ -27,7 +27,7 @@
#ifndef X264_X86_UTIL_H
#define X264_X86_UTIL_H
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
#ifdef __SSE__
#include <xmmintrin.h>
Oops, something went wrong.

0 comments on commit b41d182

Please sign in to comment.