BLD: Port long double identification to C for meson #24036

charris · 2023-06-24T18:03:53Z

Backport of #23982.

This ports the old Python code for identifying the long double representation to C, so that it can be easily invoked by meson. The original implementation is at

numpy/numpy/core/setup_common.py

Lines 264 to 434 in eead09a

    
           # Code to detect long double representation taken from MPFR m4 macro 
        
           def check_long_double_representation(cmd): 
        
               cmd._check_compiler() 
        
               body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'} 
        
               # Disable whole program optimization (the default on vs2015, with python 3.5+) 
        
               # which generates intermediary object files and prevents checking the 
        
               # float representation. 
        
               if sys.platform == "win32" and not mingw32(): 
        
                   try: 
        
                       cmd.compiler.compile_options.remove("/GL") 
        
                   except (AttributeError, ValueError): 
        
                       pass 
        
               # Disable multi-file interprocedural optimization in the Intel compiler on Linux 
        
               # which generates intermediary object files and prevents checking the 
        
               # float representation. 
        
               elif (sys.platform != "win32" 
        
                       and cmd.compiler.compiler_type.startswith('intel') 
        
                       and '-ipo' in cmd.compiler.cc_exe): 
        
                   newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '') 
        
                   cmd.compiler.set_executables( 
        
                       compiler=newcompiler, 
        
                       compiler_so=newcompiler, 
        
                       compiler_cxx=newcompiler, 
        
                       linker_exe=newcompiler, 
        
                       linker_so=newcompiler + ' -shared' 
        
                   ) 
        
               # We need to use _compile because we need the object filename 
        
               src, obj = cmd._compile(body, None, None, 'c') 
        
               try: 
        
                   ltype = long_double_representation(pyod(obj)) 
        
                   return ltype 
        
               except ValueError: 
        
                   # try linking to support CC="gcc -flto" or icc -ipo 
        
                   # struct needs to be volatile so it isn't optimized away 
        
                   # additionally "clang -flto" requires the foo struct to be used 
        
                   body = body.replace('struct', 'volatile struct') 
        
                   body += "int main(void) { return foo.before[0]; }\n" 
        
                   src, obj = cmd._compile(body, None, None, 'c') 
        
                   cmd.temp_files.append("_configtest") 
        
                   cmd.compiler.link_executable([obj], "_configtest") 
        
                   ltype = long_double_representation(pyod("_configtest")) 
        
                   return ltype 
        
               finally: 
        
                   cmd._clean() 
        
           LONG_DOUBLE_REPRESENTATION_SRC = r""" 
        
           /* "before" is 16 bytes to ensure there's no padding between it and "x". 
        
            *    We're not expecting any "long double" bigger than 16 bytes or with 
        
            *       alignment requirements stricter than 16 bytes.  */ 
        
           typedef %(type)s test_type; 
        
           struct { 
        
                   char         before[16]; 
        
                   test_type    x; 
        
                   char         after[8]; 
        
           } foo = { 
        
                   { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', 
        
                     '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' }, 
        
                   -123456789.0, 
        
                   { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' } 
        
           }; 
        
           """ 
        
           def pyod(filename): 
        
               """Python implementation of the od UNIX utility (od -b, more exactly). 
        
               Parameters 
        
               ---------- 
        
               filename : str 
        
                   name of the file to get the dump from. 
        
               Returns 
        
               ------- 
        
               out : seq 
        
                   list of lines of od output 
        
               Notes 
        
               ----- 
        
               We only implement enough to get the necessary information for long double 
        
               representation, this is not intended as a compatible replacement for od. 
        
               """ 
        
               out = [] 
        
               with open(filename, 'rb') as fid: 
        
                   yo2 = [oct(o)[2:] for o in fid.read()] 
        
               for i in range(0, len(yo2), 16): 
        
                   line = ['%07d' % int(oct(i)[2:])] 
        
                   line.extend(['%03d' % int(c) for c in yo2[i:i+16]]) 
        
                   out.append(" ".join(line)) 
        
               return out 
        
           _BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000', 
        
                         '001', '043', '105', '147', '211', '253', '315', '357'] 
        
           _AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020'] 
        
           _IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000'] 
        
           _IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1] 
        
           _INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353', 
        
                                  '031', '300', '000', '000'] 
        
           _INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353', 
        
                                  '031', '300', '000', '000', '000', '000', '000', '000'] 
        
           _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', 
        
                                     '242', '240', '000', '000', '000', '000'] 
        
           _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', 
        
                                 '000', '000', '000', '000', '000', '000', '000', '000'] 
        
           _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] 
        
           _IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + 
        
                                ['000'] * 8) 
        
           _IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + 
        
                                ['000'] * 8) 
        
           def long_double_representation(lines): 
        
               """Given a binary dump as given by GNU od -b, look for long double 
        
               representation.""" 
        
               # Read contains a list of 32 items, each item is a byte (in octal 
        
               # representation, as a string). We 'slide' over the output until read is of 
        
               # the form before_seq + content + after_sequence, where content is the long double 
        
               # representation: 
        
               #  - content is 12 bytes: 80 bits Intel representation 
        
               #  - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision 
        
               #  - content is 8 bytes: same as double (not implemented yet) 
        
               read = [''] * 32 
        
               saw = None 
        
               for line in lines: 
        
                   # we skip the first word, as od -b output an index at the beginning of 
        
                   # each line 
        
                   for w in line.split()[1:]: 
        
                       read.pop(0) 
        
                       read.append(w) 
        
                       # If the end of read is equal to the after_sequence, read contains 
        
                       # the long double 
        
                       if read[-8:] == _AFTER_SEQ: 
        
                           saw = copy.copy(read) 
        
                           # if the content was 12 bytes, we only have 32 - 8 - 12 = 12 
        
                           # "before" bytes. In other words the first 4 "before" bytes went 
        
                           # past the sliding window. 
        
                           if read[:12] == _BEFORE_SEQ[4:]: 
        
                               if read[12:-8] == _INTEL_EXTENDED_12B: 
        
                                   return 'INTEL_EXTENDED_12_BYTES_LE' 
        
                               if read[12:-8] == _MOTOROLA_EXTENDED_12B: 
        
                                   return 'MOTOROLA_EXTENDED_12_BYTES_BE' 
        
                           # if the content was 16 bytes, we are left with 32-8-16 = 16 
        
                           # "before" bytes, so 8 went past the sliding window. 
        
                           elif read[:8] == _BEFORE_SEQ[8:]: 
        
                               if read[8:-8] == _INTEL_EXTENDED_16B: 
        
                                   return 'INTEL_EXTENDED_16_BYTES_LE' 
        
                               elif read[8:-8] == _IEEE_QUAD_PREC_BE: 
        
                                   return 'IEEE_QUAD_BE' 
        
                               elif read[8:-8] == _IEEE_QUAD_PREC_LE: 
        
                                   return 'IEEE_QUAD_LE' 
        
                               elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: 
        
                                   return 'IBM_DOUBLE_DOUBLE_LE' 
        
                               elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: 
        
                                   return 'IBM_DOUBLE_DOUBLE_BE' 
        
                           # if the content was 8 bytes, left with 32-8-8 = 16 bytes 
        
                           elif read[:16] == _BEFORE_SEQ: 
        
                               if read[16:-8] == _IEEE_DOUBLE_LE: 
        
                                   return 'IEEE_DOUBLE_LE' 
        
                               elif read[16:-8] == _IEEE_DOUBLE_BE: 
        
                                   return 'IEEE_DOUBLE_BE' 
        
               if saw is not None: 
        
                   raise ValueError("Unrecognized format (%s)" % saw) 
        
               else: 
        
                   # We never detected the after_sequence 
        
                   raise ValueError("Could not lock sequences (%s)" % saw)

The C portion of the code has been tested and confirmed to work on systems with the following formats, either natively or via an alternative ABI: INTEL_EXTENDED_16_BYTES_LE, IEEE_QUAD_BE, IEEE_QUAD_LE, IBM_DOUBLE_DOUBLE_BE, IBM_DOUBLE_DOUBLE_LE, IEEE_DOUBLE_BE, INTEL_EXTENDED_12_BYTES_LE.

The original meson port includes an error condition with the comment "This should not be possible, 12 bits of "content" should still result in sizeof() being 16." As far as I can tell this is incorrect, as compiling on an x86_64 system with 32-bit ABI (gcc -m32) does indeed have sizeof(long double)==12. This is reflected in the C code.

Closes gh-23972, closes
mesonbuild/meson#11068.

This ports the old Python code for identifying the long double representation to C, so that it can be easily invoked by meson. The original implementation is at https://github.com/numpy/numpy/blob/eead09a3d02c09374942cdc787c0b5e4fe9e7472/numpy/core/setup_common.py#L264-L434 The C portion of the code has been tested and confirmed to work on systems with the following formats, either natively or via an alternative ABI: INTEL_EXTENDED_16_BYTES_LE, IEEE_QUAD_BE, IEEE_QUAD_LE, IBM_DOUBLE_DOUBLE_BE, IBM_DOUBLE_DOUBLE_LE, IEEE_DOUBLE_BE, INTEL_EXTENDED_12_BYTES_LE. The original meson port includes an error condition with the comment "This should not be possible, 12 bits of "content" should still result in sizeof() being 16." As far as I can tell this is incorrect, as compiling on an x86_64 system with 32-bit ABI (gcc -m32) does indeed have sizeof(long double)==12. This is reflected in the C code. Closes numpygh-23972, closes mesonbuild/meson#11068.

charris added 08 - Backport Used to tag backport PRs 36 - Build Build related PR Meson Items related to the introduction of Meson as the new build system for NumPy labels Jun 24, 2023

charris added this to the 1.25.1 release milestone Jun 24, 2023

charris merged commit 0fb6ffd into numpy:maintenance/1.25.x Jun 24, 2023

charris deleted the backport-23982 branch June 24, 2023 21:08

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

BLD: Port long double identification to C for meson #24036

BLD: Port long double identification to C for meson #24036

Uh oh!

charris commented Jun 24, 2023

Uh oh!

Uh oh!

	# Code to detect long double representation taken from MPFR m4 macro
	def check_long_double_representation(cmd):
	cmd._check_compiler()
	body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'}

	# Disable whole program optimization (the default on vs2015, with python 3.5+)
	# which generates intermediary object files and prevents checking the
	# float representation.
	if sys.platform == "win32" and not mingw32():
	try:
	cmd.compiler.compile_options.remove("/GL")
	except (AttributeError, ValueError):
	pass

	# Disable multi-file interprocedural optimization in the Intel compiler on Linux
	# which generates intermediary object files and prevents checking the
	# float representation.
	elif (sys.platform != "win32"
	and cmd.compiler.compiler_type.startswith('intel')
	and '-ipo' in cmd.compiler.cc_exe):
	newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '')
	cmd.compiler.set_executables(
	compiler=newcompiler,
	compiler_so=newcompiler,
	compiler_cxx=newcompiler,
	linker_exe=newcompiler,
	linker_so=newcompiler + ' -shared'
	)

	# We need to use _compile because we need the object filename
	src, obj = cmd._compile(body, None, None, 'c')
	try:
	ltype = long_double_representation(pyod(obj))
	return ltype
	except ValueError:
	# try linking to support CC="gcc -flto" or icc -ipo
	# struct needs to be volatile so it isn't optimized away
	# additionally "clang -flto" requires the foo struct to be used
	body = body.replace('struct', 'volatile struct')
	body += "int main(void) { return foo.before[0]; }\n"
	src, obj = cmd._compile(body, None, None, 'c')
	cmd.temp_files.append("_configtest")
	cmd.compiler.link_executable([obj], "_configtest")
	ltype = long_double_representation(pyod("_configtest"))
	return ltype
	finally:
	cmd._clean()

	LONG_DOUBLE_REPRESENTATION_SRC = r"""
	/* "before" is 16 bytes to ensure there's no padding between it and "x".
	* We're not expecting any "long double" bigger than 16 bytes or with
	* alignment requirements stricter than 16 bytes. */
	typedef %(type)s test_type;

	struct {
	char before[16];
	test_type x;
	char after[8];
	} foo = {
	{ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
	'\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' },
	-123456789.0,
	{ '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' }
	};
	"""

	def pyod(filename):
	"""Python implementation of the od UNIX utility (od -b, more exactly).

	Parameters
	----------
	filename : str
	name of the file to get the dump from.

	Returns
	-------
	out : seq
	list of lines of od output

	Notes
	-----
	We only implement enough to get the necessary information for long double
	representation, this is not intended as a compatible replacement for od.
	"""
	out = []
	with open(filename, 'rb') as fid:
	yo2 = [oct(o)[2:] for o in fid.read()]
	for i in range(0, len(yo2), 16):
	line = ['%07d' % int(oct(i)[2:])]
	line.extend(['%03d' % int(c) for c in yo2[i:i+16]])
	out.append(" ".join(line))
	return out


	_BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000',
	'001', '043', '105', '147', '211', '253', '315', '357']
	_AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020']

	_IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000']
	_IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1]
	_INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353',
	'031', '300', '000', '000']
	_INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353',
	'031', '300', '000', '000', '000', '000', '000', '000']
	_MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171',
	'242', '240', '000', '000', '000', '000']
	_IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000',
	'000', '000', '000', '000', '000', '000', '000', '000']
	_IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1]
	_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
	['000'] * 8)
	_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
	['000'] * 8)

	def long_double_representation(lines):
	"""Given a binary dump as given by GNU od -b, look for long double
	representation."""

	# Read contains a list of 32 items, each item is a byte (in octal
	# representation, as a string). We 'slide' over the output until read is of
	# the form before_seq + content + after_sequence, where content is the long double
	# representation:
	# - content is 12 bytes: 80 bits Intel representation
	# - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision
	# - content is 8 bytes: same as double (not implemented yet)
	read = [''] * 32
	saw = None
	for line in lines:
	# we skip the first word, as od -b output an index at the beginning of
	# each line
	for w in line.split()[1:]:
	read.pop(0)
	read.append(w)

	# If the end of read is equal to the after_sequence, read contains
	# the long double
	if read[-8:] == _AFTER_SEQ:
	saw = copy.copy(read)
	# if the content was 12 bytes, we only have 32 - 8 - 12 = 12
	# "before" bytes. In other words the first 4 "before" bytes went
	# past the sliding window.
	if read[:12] == _BEFORE_SEQ[4:]:
	if read[12:-8] == _INTEL_EXTENDED_12B:
	return 'INTEL_EXTENDED_12_BYTES_LE'
	if read[12:-8] == _MOTOROLA_EXTENDED_12B:
	return 'MOTOROLA_EXTENDED_12_BYTES_BE'
	# if the content was 16 bytes, we are left with 32-8-16 = 16
	# "before" bytes, so 8 went past the sliding window.
	elif read[:8] == _BEFORE_SEQ[8:]:
	if read[8:-8] == _INTEL_EXTENDED_16B:
	return 'INTEL_EXTENDED_16_BYTES_LE'
	elif read[8:-8] == _IEEE_QUAD_PREC_BE:
	return 'IEEE_QUAD_BE'
	elif read[8:-8] == _IEEE_QUAD_PREC_LE:
	return 'IEEE_QUAD_LE'
	elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE:
	return 'IBM_DOUBLE_DOUBLE_LE'
	elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE:
	return 'IBM_DOUBLE_DOUBLE_BE'
	# if the content was 8 bytes, left with 32-8-8 = 16 bytes
	elif read[:16] == _BEFORE_SEQ:
	if read[16:-8] == _IEEE_DOUBLE_LE:
	return 'IEEE_DOUBLE_LE'
	elif read[16:-8] == _IEEE_DOUBLE_BE:
	return 'IEEE_DOUBLE_BE'

	if saw is not None:
	raise ValueError("Unrecognized format (%s)" % saw)
	else:
	# We never detected the after_sequence
	raise ValueError("Could not lock sequences (%s)" % saw)

Uh oh!

BLD: Port long double identification to C for meson #24036

BLD: Port long double identification to C for meson #24036

Uh oh!

Conversation

charris commented Jun 24, 2023

Uh oh!

Uh oh!