BLD: Port long double identification to C for meson #23982

matoro · 2023-06-19T00:20:41Z

This ports the old Python code for identifying the long double representation to C, so that it can be easily invoked by meson. The original implementation is at

numpy/numpy/core/setup_common.py

Lines 264 to 434 in eead09a

    
           # Code to detect long double representation taken from MPFR m4 macro 
        
           def check_long_double_representation(cmd): 
        
               cmd._check_compiler() 
        
               body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'} 
        
               # Disable whole program optimization (the default on vs2015, with python 3.5+) 
        
               # which generates intermediary object files and prevents checking the 
        
               # float representation. 
        
               if sys.platform == "win32" and not mingw32(): 
        
                   try: 
        
                       cmd.compiler.compile_options.remove("/GL") 
        
                   except (AttributeError, ValueError): 
        
                       pass 
        
               # Disable multi-file interprocedural optimization in the Intel compiler on Linux 
        
               # which generates intermediary object files and prevents checking the 
        
               # float representation. 
        
               elif (sys.platform != "win32" 
        
                       and cmd.compiler.compiler_type.startswith('intel') 
        
                       and '-ipo' in cmd.compiler.cc_exe): 
        
                   newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '') 
        
                   cmd.compiler.set_executables( 
        
                       compiler=newcompiler, 
        
                       compiler_so=newcompiler, 
        
                       compiler_cxx=newcompiler, 
        
                       linker_exe=newcompiler, 
        
                       linker_so=newcompiler + ' -shared' 
        
                   ) 
        
               # We need to use _compile because we need the object filename 
        
               src, obj = cmd._compile(body, None, None, 'c') 
        
               try: 
        
                   ltype = long_double_representation(pyod(obj)) 
        
                   return ltype 
        
               except ValueError: 
        
                   # try linking to support CC="gcc -flto" or icc -ipo 
        
                   # struct needs to be volatile so it isn't optimized away 
        
                   # additionally "clang -flto" requires the foo struct to be used 
        
                   body = body.replace('struct', 'volatile struct') 
        
                   body += "int main(void) { return foo.before[0]; }\n" 
        
                   src, obj = cmd._compile(body, None, None, 'c') 
        
                   cmd.temp_files.append("_configtest") 
        
                   cmd.compiler.link_executable([obj], "_configtest") 
        
                   ltype = long_double_representation(pyod("_configtest")) 
        
                   return ltype 
        
               finally: 
        
                   cmd._clean() 
        
           LONG_DOUBLE_REPRESENTATION_SRC = r""" 
        
           /* "before" is 16 bytes to ensure there's no padding between it and "x". 
        
            *    We're not expecting any "long double" bigger than 16 bytes or with 
        
            *       alignment requirements stricter than 16 bytes.  */ 
        
           typedef %(type)s test_type; 
        
           struct { 
        
                   char         before[16]; 
        
                   test_type    x; 
        
                   char         after[8]; 
        
           } foo = { 
        
                   { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', 
        
                     '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' }, 
        
                   -123456789.0, 
        
                   { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' } 
        
           }; 
        
           """ 
        
           def pyod(filename): 
        
               """Python implementation of the od UNIX utility (od -b, more exactly). 
        
               Parameters 
        
               ---------- 
        
               filename : str 
        
                   name of the file to get the dump from. 
        
               Returns 
        
               ------- 
        
               out : seq 
        
                   list of lines of od output 
        
               Notes 
        
               ----- 
        
               We only implement enough to get the necessary information for long double 
        
               representation, this is not intended as a compatible replacement for od. 
        
               """ 
        
               out = [] 
        
               with open(filename, 'rb') as fid: 
        
                   yo2 = [oct(o)[2:] for o in fid.read()] 
        
               for i in range(0, len(yo2), 16): 
        
                   line = ['%07d' % int(oct(i)[2:])] 
        
                   line.extend(['%03d' % int(c) for c in yo2[i:i+16]]) 
        
                   out.append(" ".join(line)) 
        
               return out 
        
           _BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000', 
        
                         '001', '043', '105', '147', '211', '253', '315', '357'] 
        
           _AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020'] 
        
           _IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000'] 
        
           _IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1] 
        
           _INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353', 
        
                                  '031', '300', '000', '000'] 
        
           _INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353', 
        
                                  '031', '300', '000', '000', '000', '000', '000', '000'] 
        
           _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', 
        
                                     '242', '240', '000', '000', '000', '000'] 
        
           _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', 
        
                                 '000', '000', '000', '000', '000', '000', '000', '000'] 
        
           _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] 
        
           _IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + 
        
                                ['000'] * 8) 
        
           _IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + 
        
                                ['000'] * 8) 
        
           def long_double_representation(lines): 
        
               """Given a binary dump as given by GNU od -b, look for long double 
        
               representation.""" 
        
               # Read contains a list of 32 items, each item is a byte (in octal 
        
               # representation, as a string). We 'slide' over the output until read is of 
        
               # the form before_seq + content + after_sequence, where content is the long double 
        
               # representation: 
        
               #  - content is 12 bytes: 80 bits Intel representation 
        
               #  - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision 
        
               #  - content is 8 bytes: same as double (not implemented yet) 
        
               read = [''] * 32 
        
               saw = None 
        
               for line in lines: 
        
                   # we skip the first word, as od -b output an index at the beginning of 
        
                   # each line 
        
                   for w in line.split()[1:]: 
        
                       read.pop(0) 
        
                       read.append(w) 
        
                       # If the end of read is equal to the after_sequence, read contains 
        
                       # the long double 
        
                       if read[-8:] == _AFTER_SEQ: 
        
                           saw = copy.copy(read) 
        
                           # if the content was 12 bytes, we only have 32 - 8 - 12 = 12 
        
                           # "before" bytes. In other words the first 4 "before" bytes went 
        
                           # past the sliding window. 
        
                           if read[:12] == _BEFORE_SEQ[4:]: 
        
                               if read[12:-8] == _INTEL_EXTENDED_12B: 
        
                                   return 'INTEL_EXTENDED_12_BYTES_LE' 
        
                               if read[12:-8] == _MOTOROLA_EXTENDED_12B: 
        
                                   return 'MOTOROLA_EXTENDED_12_BYTES_BE' 
        
                           # if the content was 16 bytes, we are left with 32-8-16 = 16 
        
                           # "before" bytes, so 8 went past the sliding window. 
        
                           elif read[:8] == _BEFORE_SEQ[8:]: 
        
                               if read[8:-8] == _INTEL_EXTENDED_16B: 
        
                                   return 'INTEL_EXTENDED_16_BYTES_LE' 
        
                               elif read[8:-8] == _IEEE_QUAD_PREC_BE: 
        
                                   return 'IEEE_QUAD_BE' 
        
                               elif read[8:-8] == _IEEE_QUAD_PREC_LE: 
        
                                   return 'IEEE_QUAD_LE' 
        
                               elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: 
        
                                   return 'IBM_DOUBLE_DOUBLE_LE' 
        
                               elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: 
        
                                   return 'IBM_DOUBLE_DOUBLE_BE' 
        
                           # if the content was 8 bytes, left with 32-8-8 = 16 bytes 
        
                           elif read[:16] == _BEFORE_SEQ: 
        
                               if read[16:-8] == _IEEE_DOUBLE_LE: 
        
                                   return 'IEEE_DOUBLE_LE' 
        
                               elif read[16:-8] == _IEEE_DOUBLE_BE: 
        
                                   return 'IEEE_DOUBLE_BE' 
        
               if saw is not None: 
        
                   raise ValueError("Unrecognized format (%s)" % saw) 
        
               else: 
        
                   # We never detected the after_sequence 
        
                   raise ValueError("Could not lock sequences (%s)" % saw)

The C portion of the code has been tested and confirmed to work on systems with the following formats, either natively or via an alternative ABI: INTEL_EXTENDED_16_BYTES_LE, IEEE_QUAD_BE, IEEE_QUAD_LE, IBM_DOUBLE_DOUBLE_BE, IBM_DOUBLE_DOUBLE_LE, IEEE_DOUBLE_BE, INTEL_EXTENDED_12_BYTES_LE.

The original meson port includes an error condition with the comment "This should not be possible, 12 bits of "content" should still result in sizeof() being 16." As far as I can tell this is incorrect, as compiling on an x86_64 system with 32-bit ABI (gcc -m32) does indeed have sizeof(long double)==12. This is reflected in the C code.

Closes gh-23972, closes mesonbuild/meson#11068.

This ports the old Python code for identifying the long double representation to C, so that it can be easily invoked by meson. The original implementation is at https://github.com/numpy/numpy/blob/eead09a3d02c09374942cdc787c0b5e4fe9e7472/numpy/core/setup_common.py#L264-L434 The C portion of the code has been tested and confirmed to work on systems with the following formats, either natively or via an alternative ABI: INTEL_EXTENDED_16_BYTES_LE, IEEE_QUAD_BE, IEEE_QUAD_LE, IBM_DOUBLE_DOUBLE_BE, IBM_DOUBLE_DOUBLE_LE, IEEE_DOUBLE_BE, INTEL_EXTENDED_12_BYTES_LE. The original meson port includes an error condition with the comment "This should not be possible, 12 bits of "content" should still result in sizeof() being 16." As far as I can tell this is incorrect, as compiling on an x86_64 system with 32-bit ABI (gcc -m32) does indeed have sizeof(long double)==12. This is reflected in the C code. Closes gh-23972, closes mesonbuild/meson#11068.

rgommers

Awesome, thanks so much @matoro for fixing this and testing on all those platforms! This looks great, and is much clearer than the original Python code.

charris · 2023-06-19T03:24:20Z

As far as I can tell this is incorrect,

Yes, we call it float96 (12 bytes).

github-actions bot added the 36 - Build Build related PR label Jun 19, 2023

rgommers approved these changes Jun 19, 2023

View reviewed changes

rgommers merged commit eb7e9b2 into numpy:main Jun 19, 2023
75 checks passed

rgommers added the Meson Items related to the introduction of Meson as the new build system for NumPy label Jun 19, 2023

rgommers added this to the 2.0.0 release milestone Jun 19, 2023

rgommers added the 09 - Backport-Candidate PRs tagged should be backported label Jun 19, 2023

rgommers mentioned this pull request Jun 19, 2023

Revert "TST: disable longdouble string/print tests on Linux aarch64" #23985

Merged

charris mentioned this pull request Jun 24, 2023

BLD: Port long double identification to C for meson #24036

Merged

charris removed the 09 - Backport-Candidate PRs tagged should be backported label Jun 24, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

BLD: Port long double identification to C for meson #23982

BLD: Port long double identification to C for meson #23982

matoro commented Jun 19, 2023

rgommers left a comment

charris commented Jun 19, 2023 •

edited

	# Code to detect long double representation taken from MPFR m4 macro
	def check_long_double_representation(cmd):
	cmd._check_compiler()
	body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'}

	# Disable whole program optimization (the default on vs2015, with python 3.5+)
	# which generates intermediary object files and prevents checking the
	# float representation.
	if sys.platform == "win32" and not mingw32():
	try:
	cmd.compiler.compile_options.remove("/GL")
	except (AttributeError, ValueError):
	pass

	# Disable multi-file interprocedural optimization in the Intel compiler on Linux
	# which generates intermediary object files and prevents checking the
	# float representation.
	elif (sys.platform != "win32"
	and cmd.compiler.compiler_type.startswith('intel')
	and '-ipo' in cmd.compiler.cc_exe):
	newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '')
	cmd.compiler.set_executables(
	compiler=newcompiler,
	compiler_so=newcompiler,
	compiler_cxx=newcompiler,
	linker_exe=newcompiler,
	linker_so=newcompiler + ' -shared'
	)

	# We need to use _compile because we need the object filename
	src, obj = cmd._compile(body, None, None, 'c')
	try:
	ltype = long_double_representation(pyod(obj))
	return ltype
	except ValueError:
	# try linking to support CC="gcc -flto" or icc -ipo
	# struct needs to be volatile so it isn't optimized away
	# additionally "clang -flto" requires the foo struct to be used
	body = body.replace('struct', 'volatile struct')
	body += "int main(void) { return foo.before[0]; }\n"
	src, obj = cmd._compile(body, None, None, 'c')
	cmd.temp_files.append("_configtest")
	cmd.compiler.link_executable([obj], "_configtest")
	ltype = long_double_representation(pyod("_configtest"))
	return ltype
	finally:
	cmd._clean()

	LONG_DOUBLE_REPRESENTATION_SRC = r"""
	/* "before" is 16 bytes to ensure there's no padding between it and "x".
	* We're not expecting any "long double" bigger than 16 bytes or with
	* alignment requirements stricter than 16 bytes. */
	typedef %(type)s test_type;

	struct {
	char before[16];
	test_type x;
	char after[8];
	} foo = {
	{ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
	'\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' },
	-123456789.0,
	{ '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' }
	};
	"""

	def pyod(filename):
	"""Python implementation of the od UNIX utility (od -b, more exactly).

	Parameters
	----------
	filename : str
	name of the file to get the dump from.

	Returns
	-------
	out : seq
	list of lines of od output

	Notes
	-----
	We only implement enough to get the necessary information for long double
	representation, this is not intended as a compatible replacement for od.
	"""
	out = []
	with open(filename, 'rb') as fid:
	yo2 = [oct(o)[2:] for o in fid.read()]
	for i in range(0, len(yo2), 16):
	line = ['%07d' % int(oct(i)[2:])]
	line.extend(['%03d' % int(c) for c in yo2[i:i+16]])
	out.append(" ".join(line))
	return out


	_BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000',
	'001', '043', '105', '147', '211', '253', '315', '357']
	_AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020']

	_IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000']
	_IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1]
	_INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353',
	'031', '300', '000', '000']
	_INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353',
	'031', '300', '000', '000', '000', '000', '000', '000']
	_MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171',
	'242', '240', '000', '000', '000', '000']
	_IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000',
	'000', '000', '000', '000', '000', '000', '000', '000']
	_IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1]
	_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
	['000'] * 8)
	_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
	['000'] * 8)

	def long_double_representation(lines):
	"""Given a binary dump as given by GNU od -b, look for long double
	representation."""

	# Read contains a list of 32 items, each item is a byte (in octal
	# representation, as a string). We 'slide' over the output until read is of
	# the form before_seq + content + after_sequence, where content is the long double
	# representation:
	# - content is 12 bytes: 80 bits Intel representation
	# - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision
	# - content is 8 bytes: same as double (not implemented yet)
	read = [''] * 32
	saw = None
	for line in lines:
	# we skip the first word, as od -b output an index at the beginning of
	# each line
	for w in line.split()[1:]:
	read.pop(0)
	read.append(w)

	# If the end of read is equal to the after_sequence, read contains
	# the long double
	if read[-8:] == _AFTER_SEQ:
	saw = copy.copy(read)
	# if the content was 12 bytes, we only have 32 - 8 - 12 = 12
	# "before" bytes. In other words the first 4 "before" bytes went
	# past the sliding window.
	if read[:12] == _BEFORE_SEQ[4:]:
	if read[12:-8] == _INTEL_EXTENDED_12B:
	return 'INTEL_EXTENDED_12_BYTES_LE'
	if read[12:-8] == _MOTOROLA_EXTENDED_12B:
	return 'MOTOROLA_EXTENDED_12_BYTES_BE'
	# if the content was 16 bytes, we are left with 32-8-16 = 16
	# "before" bytes, so 8 went past the sliding window.
	elif read[:8] == _BEFORE_SEQ[8:]:
	if read[8:-8] == _INTEL_EXTENDED_16B:
	return 'INTEL_EXTENDED_16_BYTES_LE'
	elif read[8:-8] == _IEEE_QUAD_PREC_BE:
	return 'IEEE_QUAD_BE'
	elif read[8:-8] == _IEEE_QUAD_PREC_LE:
	return 'IEEE_QUAD_LE'
	elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE:
	return 'IBM_DOUBLE_DOUBLE_LE'
	elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE:
	return 'IBM_DOUBLE_DOUBLE_BE'
	# if the content was 8 bytes, left with 32-8-8 = 16 bytes
	elif read[:16] == _BEFORE_SEQ:
	if read[16:-8] == _IEEE_DOUBLE_LE:
	return 'IEEE_DOUBLE_LE'
	elif read[16:-8] == _IEEE_DOUBLE_BE:
	return 'IEEE_DOUBLE_BE'

	if saw is not None:
	raise ValueError("Unrecognized format (%s)" % saw)
	else:
	# We never detected the after_sequence
	raise ValueError("Could not lock sequences (%s)" % saw)

BLD: Port long double identification to C for meson #23982

BLD: Port long double identification to C for meson #23982

Conversation

matoro commented Jun 19, 2023

rgommers left a comment

Choose a reason for hiding this comment

charris commented Jun 19, 2023 • edited

charris commented Jun 19, 2023 •

edited