Skip to content

Commit

Permalink
Fix compile on Apple M1/M2 (#295)
Browse files Browse the repository at this point in the history
* Cherry-picked:  Separated out compile flags that are not universally present

* Disabling -flto since that seems to be causing install issues

* Fixed whitespace

* Added the enum and the runtime dispatch for ARM64

* updated Changelog [ci skip]

* Removing the double-check safeguard for compile options
  • Loading branch information
manodeep committed Jul 17, 2023
1 parent b8db3b2 commit 1735f3d
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ New features
2.5.1 (upcoming)
================

Enhancements
------------
- Corrfunc now compiles and runs on Apple M1/M2 cpus (using the `FALLBACK` kernels) [#295]

Changes
-------
- Python >= 3.7 and numpy >= 1.16 are required for python extensions [#291]
Expand Down
78 changes: 74 additions & 4 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ ifeq ($(DO_CHECKS), 1)
# Normally, one would use -xHost with icc instead of -march=native
# But -xHost does not allow you to later turn off just AVX-512,
# which we may decide is necessary if the GAS bug is present
CFLAGS += -march=native
### Now we check and add -march=native separately at the end of common.mk (MS: 4th May 2023)
ifeq (USE_OMP,$(findstring USE_OMP,$(OPT)))
CFLAGS += -qopenmp
CLINK += -qopenmp
Expand Down Expand Up @@ -330,8 +330,8 @@ ifeq ($(DO_CHECKS), 1)
endif # USE_OMP
endif # CC is clang

CFLAGS += -funroll-loops
CFLAGS += -march=native -fno-strict-aliasing
CFLAGS += -funroll-loops -fno-strict-aliasing
# CFLAGS += -march=native ### Now we check and add -march=native separately at the end of common.mk (MS: 4th May 2023)
CFLAGS += -Wformat=2 -Wpacked -Wnested-externs -Wpointer-arith -Wredundant-decls -Wfloat-equal -Wcast-qual
CFLAGS += -Wcast-align -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wstrict-prototypes #-D_POSIX_C_SOURCE=2 -Wpadded -Wconversion
CFLAGS += -Wno-unused-local-typedefs ## to suppress the unused typedef warning for the compile time assert for sizeof(struct config_options)
Expand Down Expand Up @@ -374,6 +374,76 @@ ifeq ($(DO_CHECKS), 1)
endif
endif

# Check if using clang on Apple with M1/M1 Max/M2 etc
# if so, remove -march=native from CFLAGS and
# then add -mcpu=apple-m1 -mtune=apple-m1
# ARCH := $(shell uname -m)
# $(info ARCH is $(ARCH))
# ifeq ($(ARCH), arm64)
# ifeq ($(UNAME), Darwin)
# ifeq (clang,$(findstring clang,$(CC)))
# CFLAGS := $(filter-out -march=native, $(CFLAGS))
# CFLAGS += -mtune=apple-m1 -mcpu=apple-m1
# endif
# endif
# endif
#### UPDATE: This doesn't work in some cases because the parent
#### process might be running under ROSETTA emulation and in that
#### case arch would be reported as x86_64 (rather than arm64). For example,
#### the following lines work fine when invoked as "make" on Apple M2
#### Macbook Air, but when I try to install through python ("python -m pip install -e .")
#### then I get an error that "-march=native" is not supported. This happens because
#### anaconda python is installed as "x86_64" (as seen with file `which python`). Therefore,
#### a better strategy is to test for the relevant compiler flags and then add them
#### Specifically, this would be for -march=native, -mcpu=apple-m1 -mtune=apple-m1
#### and -flto (though -flto needs to be specified to both compile and link lines)
#### MS: 3rd May 2023
### For reasons unknown to me, the addition to CFLAGS does not work correctly if I
### change this variable name "opt" to match the remaining names of "copt". Works fine
### for 'clang' on OSX but not for 'gcc'. Adds the -march=native but somehow that
### extra flag is removed when testing the -mcpu/-mtune compiler options. For the sake
### of my sanity, I have accepted that this is how it shall work! Hopefully, in the future,
### someone will figure out/explain *why* this behaviour is expected. It
### seems more like a gcc bug to me where gcc is updating CFLAGS based on
### the options on the last compile call (since cland does not show
### this behaviour) - MS: 3rd May, 2023

## TLDR: Leave this variable as "opt" while the remaining are set to "copt". Otherwise,
## the correct flags may not be picked up when using gcc on a ARM64 OSX machine
opt := -march=native
COMPILE_OPT_SUPPORTED := $(shell $(CC) $(opt) -dM -E - < /dev/null 2>&1 1>/dev/null)
ifndef COMPILE_OPT_SUPPORTED
CFLAGS += $(opt)
else
CFLAGS := $(filter-out $(opt), $(CFLAGS))
endif

copt := -mcpu=apple-m1
COMPILE_OPT_SUPPORTED := $(shell $(CC) $(copt) -dM -E - < /dev/null 2>&1 1>/dev/null)
ifndef COMPILE_OPT_SUPPORTED
CFLAGS += $(copt)
else
CFLAGS := $(filter-out $(copt), $(CFLAGS))
endif

copt := -mtune=apple-m1
COMPILE_OPT_SUPPORTED := $(shell $(CC) $(copt) -dM -E - < /dev/null 2>&1 1>/dev/null)
ifndef COMPILE_OPT_SUPPORTED
CFLAGS += $(copt)
else
CFLAGS := $(filter-out $(copt), $(CFLAGS))
endif

# copt := -flto
# COMPILE_OPT_SUPPORTED := $(shell $(CC) $(copt) -dM -E - < /dev/null 2>&1 1>/dev/null)
# ifndef COMPILE_OPT_SUPPORTED
# CFLAGS += $(copt)
# CLINK += $(copt)
# else
# CFLAGS := $(filter-out $(copt), $(CFLAGS))
# CLINK := $(filter-out $(copt), $(CLINK))
# endif

# All of the python/numpy checks follow
export PYTHON_CHECKED ?= 0
export NUMPY_CHECKED ?= 0
Expand Down Expand Up @@ -430,7 +500,7 @@ ifeq ($(DO_CHECKS), 1)
endif

ifneq ($(COMPILE_PYTHON_EXT), 0)
PYTHON_INCL := $(shell $(PYTHON) -c "from __future__ import print_function; import sysconfig; flags = set(['-I' + sysconfig.get_path('include'),'-I' + sysconfig.get_path('platinclude')]); print(' '.join(flags));")
PYTHON_INCL := $(shell $(PYTHON) -c "from __future__ import print_function; import sysconfig; flags = set(['-I' + sysconfig.get_path('include'),'-I' + sysconfig.get_path('platinclude')]); print(' '.join(flags));")
PYTHON_INCL:=$(patsubst -I%,-isystem%, $(PYTHON_INCL))

# NUMPY is available -> next step should not fail
Expand Down
16 changes: 15 additions & 1 deletion utils/cpu_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@ int runtime_instrset_detect(void)
static int iset = -1; // remember value for next call
if (iset >= 0) {
return iset; // called before
}
}
iset = FALLBACK; // default value

#ifdef __ARM_NEON
return iset; /* return FALLBACK as the ISA on ARM64 */
#endif

int abcd[4] = {0,0,0,0}; // cpuid results
cpuid(abcd, 0); // call cpuid function 0
if (abcd[0] == 0) return iset; // no further cpuid function supported
Expand Down Expand Up @@ -149,6 +154,15 @@ int get_max_usable_isa(void)
fprintf(stderr, "[Warning] The CPU supports SSE but the compiler does not. Can you try another compiler?\n");
#endif
// fall through
// This will need to be uncommented when the ARM64 kernels are added in
// case ARM64:
// #ifdef __ARM_NEON
// iset = ARM64;
// break;
// #else
// fprintf(stderr, "[Warning] The CPU supports NEON but the compiler does not. Can you try another compiler?\n");
// #endif
// fall through
case FALLBACK:
default:
iset = FALLBACK;
Expand Down
11 changes: 11 additions & 0 deletions utils/cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,17 @@ typedef enum {
AVX=7, /* 256bit vector width */
AVX2=8, /* AVX2 (integer operations)*/
AVX512F=9,/* AVX 512 Foundation */
ARM64=10, /* ARM64 on Apple M1/M2 */
NUM_ISA /*NUM_ISA will be the next integer after
the last declared enum. AVX512F:=9 (so, NUM_ISA==10)*/
} isa; //name for instruction sets -> corresponds to the return values for functions in cpu_features.c


static inline void cpuid (int output[4], int functionnumber) {
#if defined (__ARM_NEON)
(void) output;
(void) functionnumber;
#else
#if defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax

int a, b, c, d;
Expand All @@ -57,11 +62,16 @@ static inline void cpuid (int output[4], int functionnumber) {
mov [esi+12], edx
}

#endif
#endif
}

// Define interface to xgetbv instruction
static inline int64_t xgetbv (int ctr) {
#if defined(__ARM_NEON)
(void) ctr;
return (int64_t) FALLBACK; /* use FALLBACK kernels until the ARM64 kernels are added to the pair-counters */
#else
#if (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) //Intel compiler supporting _xgetbv intrinsic
return _xgetbv(ctr); // intrinsic function for XGETBV
#elif defined(__GNUC__) // use inline assembly, Gnu/AT&T syntax
Expand All @@ -80,6 +90,7 @@ static inline int64_t xgetbv (int ctr) {
}
return a | (((uint64_t) d) << 32);

#endif
#endif
}

Expand Down

0 comments on commit 1735f3d

Please sign in to comment.