Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clc/utils.h>

#ifdef __CLC_SCALAR

#ifndef __CLC_IMPL_FUNCTION
#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
#endif

#ifndef __CLC_RET_TYPE
#define __CLC_RET_TYPE __CLC_GENTYPE
#endif

#ifndef __CLC_ARG1_TYPE
#define __CLC_ARG1_TYPE __CLC_GENTYPE
#endif

#ifndef __CLC_ARG2_TYPE
#define __CLC_ARG2_TYPE __CLC_GENTYPE
#endif

#define __CLC_RET_VECTYPE __CLC_XCONCAT(__CLC_RET_TYPE, __CLC_VECTOR_SIZE)
#define __CLC_ARG1_VECTYPE __CLC_XCONCAT(__CLC_ARG1_TYPE, __CLC_VECTOR_SIZE)
#define __CLC_ARG2_VECTYPE __CLC_XCONCAT(__CLC_ARG2_TYPE, __CLC_VECTOR_SIZE)

#define __CLC_VECTOR_SIZE 2
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;

return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
__CLC_IMPL_FUNCTION(x.s1, p + 1));
}
#undef __CLC_VECTOR_SIZE

#define __CLC_VECTOR_SIZE 3
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
__CLC_IMPL_FUNCTION(x.s1, p + 1),
__CLC_IMPL_FUNCTION(x.s2, p + 2));
}
#undef __CLC_VECTOR_SIZE

#define __CLC_VECTOR_SIZE 4
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
__CLC_IMPL_FUNCTION(x.s1, p + 1),
__CLC_IMPL_FUNCTION(x.s2, p + 2),
__CLC_IMPL_FUNCTION(x.s3, p + 3));
}
#undef __CLC_VECTOR_SIZE

#define __CLC_VECTOR_SIZE 8
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
__CLC_IMPL_FUNCTION(x.s1, p + 1),
__CLC_IMPL_FUNCTION(x.s2, p + 2),
__CLC_IMPL_FUNCTION(x.s3, p + 3),
__CLC_IMPL_FUNCTION(x.s4, p + 4),
__CLC_IMPL_FUNCTION(x.s5, p + 5),
__CLC_IMPL_FUNCTION(x.s6, p + 6),
__CLC_IMPL_FUNCTION(x.s7, p + 7));
}
#undef __CLC_VECTOR_SIZE

#define __CLC_VECTOR_SIZE 16
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
__CLC_IMPL_FUNCTION(x.s1, p + 1),
__CLC_IMPL_FUNCTION(x.s2, p + 2),
__CLC_IMPL_FUNCTION(x.s3, p + 3),
__CLC_IMPL_FUNCTION(x.s4, p + 4),
__CLC_IMPL_FUNCTION(x.s5, p + 5),
__CLC_IMPL_FUNCTION(x.s6, p + 6),
__CLC_IMPL_FUNCTION(x.s7, p + 7),
__CLC_IMPL_FUNCTION(x.s8, p + 8),
__CLC_IMPL_FUNCTION(x.s9, p + 9),
__CLC_IMPL_FUNCTION(x.sa, p + 10),
__CLC_IMPL_FUNCTION(x.sb, p + 11),
__CLC_IMPL_FUNCTION(x.sc, p + 12),
__CLC_IMPL_FUNCTION(x.sd, p + 13),
__CLC_IMPL_FUNCTION(x.se, p + 14),
__CLC_IMPL_FUNCTION(x.sf, p + 15));
}
#undef __CLC_VECTOR_SIZE

#undef __CLC_RET_VECTYPE
#undef __CLC_ARG1_VECTYPE
#undef __CLC_ARG2_VECTYPE

#endif // __CLC_SCALAR
71 changes: 9 additions & 62 deletions libclc/clc/lib/generic/math/clc_lgamma_r.cl
Original file line number Diff line number Diff line change
Expand Up @@ -16,60 +16,6 @@
#include <clc/math/clc_sinpi.h>
#include <clc/math/math.h>

#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \
ADDR_SPACE, ARG2_TYPE) \
DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \
__CLC_FUNCTION(x.s1, ptr + 1)); \
} \
\
DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \
__CLC_FUNCTION(x.s1, ptr + 1), \
__CLC_FUNCTION(x.s2, ptr + 2)); \
} \
\
DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
return (__CLC_XCONCAT(RET_TYPE, 4))( \
__CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
__CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \
} \
\
DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
return (__CLC_XCONCAT(RET_TYPE, 8))( \
__CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
__CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
__CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
__CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \
} \
\
DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
return (__CLC_XCONCAT(RET_TYPE, 16))( \
__CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
__CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
__CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
__CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \
__CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \
__CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \
__CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \
__CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \
}

// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
Expand Down Expand Up @@ -333,9 +279,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_lgamma_r(float x, private int *signp) {
return r;
}

_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float,
private, int)

#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// ====================================================
Expand Down Expand Up @@ -639,8 +582,6 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) {
return r;
}

_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_lgamma_r, double,
private, int)
#endif

#ifdef cl_khr_fp16
Expand All @@ -651,11 +592,17 @@ _CLC_OVERLOAD _CLC_DEF half __clc_lgamma_r(half x, private int *iptr) {
return (half)__clc_lgamma_r((float)x, iptr);
}

_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_lgamma_r, half,
private, int);

#endif

#define __CLC_FUNCTION __clc_lgamma_r
#define __CLC_ARG2_TYPE int
#define __CLC_ADDRSPACE private
#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
#include <clc/math/gentype.inc>
#undef __CLC_ADDRSPACE
#undef __CLC_ARG2_TYPE
#undef __CLC_FUNCTION

#define __CLC_ADDRSPACE global
#define __CLC_BODY <clc_lgamma_r.inc>
#include <clc/math/gentype.inc>
Expand Down
Loading