Skip to content

Commit

Permalink
[Cleanup] Conform strict aliasing rules (#392)
Browse files Browse the repository at this point in the history
This is a combined patch for removing potential problems with the strict aliasing rule.
It also drops long double support for DFT.
  • Loading branch information
shibatch committed Jan 13, 2021
1 parent ea29e62 commit 1d66bbd
Show file tree
Hide file tree
Showing 31 changed files with 182 additions and 2,544 deletions.
5 changes: 0 additions & 5 deletions CMakeLists.txt
Expand Up @@ -88,11 +88,6 @@ set(COSTOVERRIDE_SVENOFMA 10)

cmake_minimum_required(VERSION 3.4.3)

# Set to NEW when updating cmake_minimum_required to VERSION >= 3.7.2
if(${CMAKE_VERSION} VERSION_GREATER "3.7.1")
cmake_policy(SET CMP0066 OLD)
endif()

if(${CMAKE_VERSION} VERSION_GREATER "3.14.99")
cmake_policy(SET CMP0091 NEW)
endif()
Expand Down
2 changes: 1 addition & 1 deletion Jenkinsfile
Expand Up @@ -53,7 +53,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=FALSE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand Down
8 changes: 4 additions & 4 deletions src/arch/helperpurec_scalar.h
Expand Up @@ -123,7 +123,7 @@ typedef struct {

#if defined(ENABLEFLOAT128) && CONFIG != 3
typedef __float128 vargquad;
#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) && CONFIG != 3
#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__) && CONFIG != 3
typedef long double vargquad;
#else
typedef vquad vargquad;
Expand Down Expand Up @@ -444,19 +444,19 @@ static INLINE vargquad cast_aq_vq(vquad vq) {
#else
static vquad loadu_vq_p(void *p) {
vquad vq;
memcpy(&vq, p, 16);
memcpy(&vq, p, sizeof(vq));
return vq;
}

static INLINE vquad cast_vq_aq(vargquad aq) {
vquad vq;
memcpy(&vq, &aq, 16);
memcpy(&vq, &aq, sizeof(vq));
return vq;
}

static INLINE vargquad cast_aq_vq(vquad vq) {
vargquad aq;
memcpy(&aq, &vq, 16);
memcpy(&aq, &vq, sizeof(aq));
return aq;
}
#endif
Expand Down
57 changes: 1 addition & 56 deletions src/common/misc.h
Expand Up @@ -150,10 +150,6 @@
#define stringify(s) stringify_(s)
#define stringify_(s) #s

#if !defined(SLEEF_GENHEADER)
typedef long double longdouble;
#endif

#if !defined(Sleef_double2_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_double2_DEFINED
typedef struct {
Expand All @@ -168,70 +164,19 @@ typedef struct {
} Sleef_float2;
#endif

#if !defined(Sleef_longdouble2_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_longdouble2_DEFINED
typedef struct {
long double x, y;
} Sleef_longdouble2;
#endif

#if !defined(Sleef_quad_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_quad_DEFINED
#if defined(ENABLEFLOAT128)
typedef __float128 Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## Q)
#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__)
typedef long double Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## L)
#else
typedef struct { uint64_t x, y; } Sleef_quad;
#endif
#endif

#if !defined(Sleef_quad1_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_quad1_DEFINED
typedef union {
struct {
Sleef_quad x;
};
Sleef_quad s[1];
} Sleef_quad1;
#endif

#if !defined(Sleef_quad2_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_quad2_DEFINED
typedef union {
struct {
Sleef_quad x, y;
};
Sleef_quad s[2];
} Sleef_quad2;
#endif

#if !defined(Sleef_quad4_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_quad4_DEFINED
typedef union {
struct {
Sleef_quad x, y, z, w;
};
Sleef_quad s[4];
} Sleef_quad4;
#endif

#if !defined(Sleef_quad8_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_quad8_DEFINED
typedef union {
Sleef_quad s[8];
} Sleef_quad8;
#endif

#if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) && !defined(SLEEF_GENHEADER)
#define Sleef_quadx_DEFINED
typedef union {
Sleef_quad s[32];
} Sleef_quadx;
#endif

//

#if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER)
Expand Down
32 changes: 3 additions & 29 deletions src/dft/CMakeLists.txt
Expand Up @@ -12,16 +12,14 @@ if (SLEEFDFT_MAXBUTWIDTH GREATER 7)
endif()

option(SLEEFDFT_ENABLE_STREAM "Streaming instructions are utilized in DFT." OFF)
option(SLEEFDFT_ENABLE_LONGDOUBLE "Long double routines will be compiled in." OFF)
option(SLEEFDFT_ENABLE_QUAD "Quad precision routines will be compiled in." OFF)

# Settings

# Constants definition

set(LISTSHORTTYPENAME "dp" "sp" "ld" "qp")
set(LISTLONGTYPENAME "double" "float" "longdouble" "Sleef_quad")
set(LISTTYPEID "1" "2" "3" "4")
set(LISTSHORTTYPENAME "dp" "sp")
set(LISTLONGTYPENAME "double" "float")
set(LISTTYPEID "1" "2")

set(MACRODEF_vecextdp BASETYPEID=1 ENABLE_VECEXT CONFIG=1)
set(CFLAGS_vecextdp ${FLAGS_ENABLE_VECEXT})
Expand Down Expand Up @@ -105,22 +103,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
set(ISALIST_DP vecextdp)
endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")

if (COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE)
set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 2)
set(ISALIST_QP purecld)
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
set(ISALIST_LD vecextld)
endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
endif(COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE)

if (COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD)
set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 3)
set(ISALIST_QP purecqp)
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
set(ISALIST_QP vecextqp)
endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
endif(COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD)

# List all available vector data types

if (COMPILER_SUPPORTS_SSE4)
Expand Down Expand Up @@ -204,14 +186,6 @@ else()
set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=0)
endif()

if (COMPILER_SUPPORTS_FLOAT128 AND NOT (SLEEF_ARCH_32BIT AND SLEEF_ARCH_X86))
set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLEFLOAT128)
endif()

if (COMPILER_SUPPORTS_LONG_DOUBLE AND NOT (SLEEF_ARCH_32BIT AND SLEEF_ARCH_X86))
set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_LONGDOUBLE)
endif()

if(COMPILER_SUPPORTS_OPENMP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
endif(COMPILER_SUPPORTS_OPENMP)
Expand Down
86 changes: 0 additions & 86 deletions src/dft/dft.c
Expand Up @@ -68,50 +68,6 @@ typedef Sleef_float2 sc_t;
#define BUTB butb_float
#define SINCOSPI Sleef_sincospif_u05
#include "dispatchsp.h"
#elif BASETYPEID == 3
typedef long double real;
typedef Sleef_longdouble2 sc_t;
#define BASETYPESTRING "long double"
#define MAGIC 0x14142135
#define MAGIC2D 0x26457513
#define INIT SleefDFT_longdouble_init1d
#define EXECUTE SleefDFT_longdouble_execute
#define INIT2D SleefDFT_longdouble_init2d
#define CTBL ctbl_longdouble
#define REALSUB0 realSub0_longdouble
#define REALSUB1 realSub1_longdouble
#define GETINT getInt_longdouble
#define GETPTR getPtr_longdouble
#define DFTF dftf_longdouble
#define DFTB dftb_longdouble
#define TBUTF tbutf_longdouble
#define TBUTB tbutb_longdouble
#define BUTF butf_longdouble
#define BUTB butb_longdouble
#define SINCOSPI Sleef_sincospil_u05
#include "dispatchld.h"
#elif BASETYPEID == 4
typedef Sleef_quad real;
typedef Sleef_quad2 sc_t;
#define BASETYPESTRING "Sleef_quad"
#define MAGIC 0x33166247
#define MAGIC2D 0x36055512
#define INIT SleefDFT_quad_init1d
#define EXECUTE SleefDFT_quad_execute
#define INIT2D SleefDFT_quad_init2d
#define CTBL ctbl_Sleef_quad
#define REALSUB0 realSub0_Sleef_quad
#define REALSUB1 realSub1_Sleef_quad
#define GETINT getInt_Sleef_quad
#define GETPTR getPtr_Sleef_quad
#define DFTF dftf_Sleef_quad
#define DFTB dftb_Sleef_quad
#define TBUTF tbutf_Sleef_quad
#define TBUTB tbutb_Sleef_quad
#define BUTF butf_Sleef_quad
#define BUTB butb_Sleef_quad
#define SINCOSPI Sleef_sincospiq_u05
#include "dispatchqp.h"
#else
#error No BASETYPEID specified
#endif
Expand All @@ -121,47 +77,6 @@ typedef Sleef_quad2 sc_t;

//

#if BASETYPEID == 4
real CTBL[] = {
0.7071067811865475243818940365159164684883Q, -0.7071067811865475243818940365159164684883Q,
0.9238795325112867561014214079495587839119Q, -0.382683432365089771723257530688933059082Q,
0.382683432365089771723257530688933059082Q, -0.9238795325112867561014214079495587839119Q,
#if MAXBUTWIDTH >= 5
0.9807852804032304491190993878113602022495Q, -0.1950903220161282678433729148581576851029Q,
0.5555702330196022247573058028269343822103Q, -0.8314696123025452370808655033762590846891Q,
0.8314696123025452370808655033762590846891Q, -0.5555702330196022247573058028269343822103Q,
0.1950903220161282678433729148581576851029Q, -0.9807852804032304491190993878113602022495Q,
#endif
#if MAXBUTWIDTH >= 6
0.9951847266721968862310254699821143731242Q, -0.09801714032956060199569840382660679267701Q,
0.6343932841636454982026105398063009488396Q, -0.7730104533627369607965383602188325085081Q,
0.881921264348355029715105513066220055407Q, -0.4713967368259976485449225247492677226546Q,
0.2902846772544623676448431737195932100803Q, -0.9569403357322088649310892760624369657307Q,
0.9569403357322088649310892760624369657307Q, -0.2902846772544623676448431737195932100803Q,
0.4713967368259976485449225247492677226546Q, -0.881921264348355029715105513066220055407Q,
0.7730104533627369607965383602188325085081Q, -0.6343932841636454982026105398063009488396Q,
0.09801714032956060199569840382660679267701Q, -0.9951847266721968862310254699821143731242Q,
#endif
#if MAXBUTWIDTH >= 7
0.9987954562051723927007702841240899260811Q, -0.04906767432741801425355085940205324135377Q,
0.6715589548470184006194634573905233310143Q, -0.7409511253549590911932944126139233276263Q,
0.9039892931234433315823215138173907234886Q, -0.427555093430282094315230886905077056781Q,
0.336889853392220050702686798271834334173Q, -0.9415440651830207783906830087961026265475Q,
0.9700312531945439926159106824865574481009Q, -0.2429801799032638899447731489766866275204Q,
0.5141027441932217266072797923204262815489Q, -0.8577286100002720698929313536407192941624Q,
0.8032075314806449097991200569701675249235Q, -0.5956993044924333434615715265891822127742Q,
0.1467304744553617516588479505190711904561Q, -0.9891765099647809734561415551112872890371Q,
0.9891765099647809734561415551112872890371Q, -0.1467304744553617516588479505190711904561Q,
0.5956993044924333434615715265891822127742Q, -0.8032075314806449097991200569701675249235Q,
0.8577286100002720698929313536407192941624Q, -0.5141027441932217266072797923204262815489Q,
0.2429801799032638899447731489766866275204Q, -0.9700312531945439926159106824865574481009Q,
0.9415440651830207783906830087961026265475Q, -0.336889853392220050702686798271834334173Q,
0.427555093430282094315230886905077056781Q, -0.9039892931234433315823215138173907234886Q,
0.7409511253549590911932944126139233276263Q, -0.6715589548470184006194634573905233310143Q,
0.04906767432741801425355085940205324135377Q, -0.9987954562051723927007702841240899260811Q,
#endif
};
#else
real CTBL[] = {
0.7071067811865475243818940365159164684883L, -0.7071067811865475243818940365159164684883L,
0.9238795325112867561014214079495587839119L, -0.382683432365089771723257530688933059082L,
Expand Down Expand Up @@ -201,7 +116,6 @@ real CTBL[] = {
0.04906767432741801425355085940205324135377L, -0.9987954562051723927007702841240899260811L,
#endif
};
#endif

#ifndef ENABLE_STREAM
#error ENABLE_STREAM not defined
Expand Down
18 changes: 7 additions & 11 deletions src/dft/dftcommon.c
Expand Up @@ -29,13 +29,9 @@

#define MAGIC_FLOAT 0x31415926
#define MAGIC_DOUBLE 0x27182818
#define MAGIC_LONGDOUBLE 0x14142135
#define MAGIC_QUAD 0x33166247

#define MAGIC2D_FLOAT 0x22360679
#define MAGIC2D_DOUBLE 0x17320508
#define MAGIC2D_LONGDOUBLE 0x26457513
#define MAGIC2D_QUAD 0x36055512

const char *configStr[] = { "ST", "ST stream", "MT", "MT stream" };

Expand Down Expand Up @@ -76,7 +72,7 @@ static int parsePathStr(char *p, int *path, int *config, int pathLenMax, int log
}

EXPORT void SleefDFT_setPath(SleefDFT *p, char *pathStr) {
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD));
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));

int path[32], config[32];
int pathLen = parsePathStr(pathStr, path, config, 31, p->log2len);
Expand Down Expand Up @@ -116,7 +112,7 @@ void freeTables(SleefDFT *p) {
}

EXPORT void SleefDFT_dispose(SleefDFT *p) {
if (p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)) {
if (p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE)) {
Sleef_free(p->tBuf);
SleefDFT_dispose(p->instH);
if (p->hlen != p->vlen) SleefDFT_dispose(p->instV);
Expand All @@ -126,7 +122,7 @@ EXPORT void SleefDFT_dispose(SleefDFT *p) {
return;
}

assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD));
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));

if (p->log2len <= 1) {
p->magic = 0;
Expand Down Expand Up @@ -322,7 +318,7 @@ static void planMap_putU64(uint64_t key, uint64_t value) {
}

int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat) {
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD));
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));

initPlanMapLock();

Expand Down Expand Up @@ -357,7 +353,7 @@ int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat) {
}

void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat) {
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD));
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));

initPlanMapLock();

Expand Down Expand Up @@ -388,7 +384,7 @@ void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat) {
}

int PlanManager_loadMeasurementResultsT(SleefDFT *p) {
assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD));
assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE));

initPlanMapLock();

Expand All @@ -409,7 +405,7 @@ int PlanManager_loadMeasurementResultsT(SleefDFT *p) {
}

void PlanManager_saveMeasurementResultsT(SleefDFT *p) {
assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD));
assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE));

initPlanMapLock();

Expand Down

0 comments on commit 1d66bbd

Please sign in to comment.