From a07a266f9b74586495b0ecbd253bcaf95f8bea37 Mon Sep 17 00:00:00 2001 From: Milot Mirdita Date: Sat, 14 Mar 2020 18:35:51 +0100 Subject: [PATCH] Working on PPC64LE support --- .travis.yml | 11 +++- CMakeLists.txt | 3 + Dockerfile | 73 ++++++++++----------- lib/ksw2/ksw2_extz2_sse.cpp | 8 ++- lib/simd/simd.h | 10 ++- lib/simd/sse2altivec.h | 127 ++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 2 +- src/commons/Application.cpp | 2 +- src/commons/Orf.cpp | 2 +- src/commons/Util.cpp | 2 +- src/commons/itoa.h | 9 ++- 11 files changed, 199 insertions(+), 50 deletions(-) create mode 100644 lib/simd/sse2altivec.h diff --git a/.travis.yml b/.travis.yml index 6c13e3d87..eec6725dc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,6 +19,12 @@ jobs: - *default_packages - build-essential arch: arm64 + - addons: + apt: + packages: + - *default_packages + - build-essential + arch: ppc64le - addons: apt: packages: @@ -85,6 +91,8 @@ jobs: - mpi-default-bin env: MPI=1 CC=gcc-9 CXX=g++-9 fast_finish: true + allow_failures: + - arch: ppc64le before_install: - export CC @@ -93,8 +101,7 @@ before_install: script: - | mkdir build; cd build; \ - [[ "${TRAVIS_CPU_ARCH}" = "arm64" ]] && SIMD="" || SIMD="-DHAVE_SSE4_1=1"; \ - cmake -DHAVE_MPI="$([[ -z "$MPI" ]]; echo $?)" "$SIMD" -DENABLE_WERROR=1 -DHAVE_TESTS=1 ..; \ + cmake -DHAVE_MPI="$([[ -z "$MPI" ]]; echo $?)" -DENABLE_WERROR=1 -DHAVE_TESTS=1 ..; \ make -j $(nproc --all); \ mkdir path; \ printf '#!/bin/sh\n/usr/bin/tee "$@" | tail\n' > path/tee; \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 4723cb8e1..f9d1b9444 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,9 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") set(ARM 1) set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -DNEON=1 -DSSE=1 -fsigned-char") +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "PPC64*|ppc64*|powerpc64*") + set(PPC64 1) + set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -DSSE=1 -std=gnu++0x -mcpu=power8 -mvsx") elseif (EMSCRIPTEN) set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -DSSE=1 -DWASM=1 -msimd128 -s WASM=1 -s ASSERTIONS=1") else () diff --git a/Dockerfile b/Dockerfile index 38683debb..91ba8ee52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,22 @@ ARG NAMESPACE= FROM debian:stable-slim as qemu-downloader ARG NAMESPACE -RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* -RUN if [ X"$NAMESPACE" = X"arm64v8/" ]; then \ - wget -nv -O "/usr/bin/qemu-aarch64-static" https://github.com/multiarch/qemu-user-static/releases/download/v3.1.0-2/qemu-aarch64-static; \ - else \ - echo -e '#!/bin/sh\n"$@"\n' > "/usr/bin/qemu-aarch64-static"; \ +RUN if [ X"$NAMESPACE" != X"" ]; then \ + apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*; \ fi; \ - chmod +x /usr/bin/qemu-aarch64-static; + if [ X"$NAMESPACE" = X"ppc64le/" ]; then \ + wget -nv -O /usr/bin/qemu-ppc64le-static https://github.com/multiarch/qemu-user-static/releases/download/v4.2.0-4/qemu-ppc64le-static; \ + chmod +x /usr/bin/qemu-ppc64le-static; \ + fi; \ + if [ X"$NAMESPACE" = X"aarch64/" ]; then \ + wget -nv -O /usr/bin/qemu-aarch64-static https://github.com/multiarch/qemu-user-static/releases/download/v4.2.0-4/qemu-aarch64-static; \ + chmod +x /usr/bin/qemu-aarch64-static; \ + fi; \ + touch /usr/bin/dummy_copy -FROM ${NAMESPACE}debian:stable-slim as mmseqs-builder +FROM ${NAMESPACE}debian:stable-slim as builder ARG NAMESPACE -COPY --from=qemu-downloader /usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static +COPY --from=qemu-downloader /usr/bin/dummy_copy /usr/bin/qemu-aarch64-static* /usr/bin/qemu-ppc64le-static* /usr/bin/ RUN apt-get update && apt-get install -y \ build-essential cmake xxd git zlib1g-dev libbz2-dev \ @@ -19,45 +24,37 @@ RUN apt-get update && apt-get install -y \ WORKDIR /opt/mmseqs ADD . . -RUN mkdir -p build_sse/bin && mkdir -p build_avx/bin && mkdir -p build_neon/bin - -WORKDIR /opt/mmseqs/build_sse -RUN if [ X"$NAMESPACE" = X"" ]; then \ - cmake -DHAVE_SSE4_1=1 -DHAVE_MPI=0 -DHAVE_TESTS=0 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..; \ - make -j $(nproc --all) && make install; \ - fi - -WORKDIR /opt/mmseqs/build_avx -RUN if [ X"$NAMESPACE" = X"" ]; then \ - cmake -DHAVE_AVX2=1 -DHAVE_MPI=0 -DHAVE_TESTS=0 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..; \ - make -j $(nproc --all) && make install; \ - fi - -WORKDIR /opt/mmseqs/build_neon -RUN if [ X"$NAMESPACE" = X"arm64v8/" ]; then \ - cmake -DHAVE_MPI=0 -DHAVE_TESTS=0 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..; \ - make -j $(nproc --all) && make install; \ - touch /opt/mmseqs/build_sse/bin/mmseqs; \ - touch /opt/mmseqs/build_avx/bin/mmseqs; \ - else \ - touch /opt/mmseqs/build_neon/bin/mmseqs; \ - fi + +RUN mkdir -p build_sse/src && mkdir -p build_avx/src && mkdir -p build/src; \ + if [ X"$NAMESPACE" = X"" ]; then \ + cd /opt/mmseqs/build_sse; \ + cmake -DHAVE_SSE4_1=1 -DHAVE_MPI=0 -DHAVE_TESTS=0 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..; \ + make -j $(nproc --all); \ + mv src/mmseqs /opt/mmseqs/mmseqs_sse42; \ + cd /opt/mmseqs/build_avx; \ + cmake -DHAVE_AVX2=1 -DHAVE_MPI=0 -DHAVE_TESTS=0 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..; \ + make -j $(nproc --all); \ + mv src/mmseqs /opt/mmseqs/mmseqs_avx2; \ + else \ + cd /opt/mmseqs/build; \ + cmake -DHAVE_MPI=0 -DHAVE_TESTS=0 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..; \ + make -j $(nproc --all); \ + mv src/mmseqs /opt/mmseqs/mmseqs_arch; \ + touch /opt/mmseqs/mmseqs_sse42 /opt/mmseqs/mmseqs_avx2; \ + fi FROM ${NAMESPACE}debian:stable-slim ARG NAMESPACE MAINTAINER Milot Mirdita -COPY --from=qemu-downloader /usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static +COPY --from=qemu-downloader /usr/bin/dummy_copy /usr/bin/qemu-aarch64-static* /usr/bin/qemu-ppc64le-static* /usr/bin/ RUN apt-get update && apt-get install -y \ - gawk bash grep libstdc++6 libgomp1 zlib1g libbz2-1.0 \ + gawk bash grep libstdc++6 libgomp1 zlib1g libbz2-1.0 wget tar \ && rm -rf /var/lib/apt/lists/* -COPY --from=mmseqs-builder /opt/mmseqs/build_sse/bin/mmseqs /usr/local/bin/mmseqs_sse42 -COPY --from=mmseqs-builder /opt/mmseqs/build_avx/bin/mmseqs /usr/local/bin/mmseqs_avx2 -COPY --from=mmseqs-builder /opt/mmseqs/build_neon/bin/mmseqs /usr/local/bin/mmseqs_neon +COPY --from=builder /opt/mmseqs/mmseqs_arch /opt/mmseqs/mmseqs_sse42 /opt/mmseqs/mmseqs_avx2 /usr/local/bin/ ADD util/mmseqs_wrapper.sh /usr/local/bin/mmseqs - -RUN if [ X"$NAMESPACE" = X"arm64v8/" ]; then mv -f /usr/local/bin/mmseqs_neon /usr/local/bin/mmseqs; fi +RUN if [ X"$NAMESPACE" != X"" ]; then mv -f /usr/local/bin/mmseqs_arch /usr/local/bin/mmseqs; fi CMD ["/usr/local/bin/mmseqs"] diff --git a/lib/ksw2/ksw2_extz2_sse.cpp b/lib/ksw2/ksw2_extz2_sse.cpp index e39bd28ce..676ed7df4 100644 --- a/lib/ksw2/ksw2_extz2_sse.cpp +++ b/lib/ksw2/ksw2_extz2_sse.cpp @@ -43,8 +43,14 @@ See: https://github.com/lh3/minimap2 #define KSW_SSE2_ONLY #endif +#ifdef __ALTIVEC__ +#include "sse2altivec.h" +#define __SSE2__ +#define KSW_SSE2_ONLY +#endif + #ifdef __SSE2__ -#if !defined(NEON) && !defined(WASM) +#if !defined(NEON) && !defined(WASM) && !defined(__ALTIVEC__) #include #endif diff --git a/lib/simd/simd.h b/lib/simd/simd.h index 23ae36e13..b81005d08 100644 --- a/lib/simd/simd.h +++ b/lib/simd/simd.h @@ -56,9 +56,13 @@ #ifdef WASM #include "sse2wasm.h" #else +#ifdef __ALTIVEC__ +#include "sse2altivec.h" +#else #include #endif #endif +#endif #ifdef AVX512 #include // AVX512 @@ -291,7 +295,7 @@ typedef __m256 simd_float; #ifdef SSE uint16_t simd_hmax16(const __m128i buffer); uint8_t simd_hmax8(const __m128i buffer); -#if !defined(NEON) && !defined(WASM) +#if !defined(NEON) && !defined(WASM) && !defined(__ALTIVEC__) #include //SSE4.1 // double support #ifndef SIMD_DOUBLE @@ -401,7 +405,7 @@ typedef __m128i simd_int; #endif //SIMD_INT #endif //SSE -#if WASM +#if defined(WASM) || defined(__ALTIVEC__) template inline F simd_hmax(const F * in, unsigned int n); @@ -621,7 +625,7 @@ inline float ScalarProd20(const float* qi, const float* tj) { // // //TODO fix this -#if defined(SSE) && !defined(WASM) +#if defined(SSE) && !defined(WASM) && !defined(__ALTIVEC__) float __attribute__((aligned(16))) res; __m128 P; // query 128bit SSE2 register holding 4 floats __m128 R;// result diff --git a/lib/simd/sse2altivec.h b/lib/simd/sse2altivec.h new file mode 100644 index 000000000..01dee9ddb --- /dev/null +++ b/lib/simd/sse2altivec.h @@ -0,0 +1,127 @@ +// sse2altivec is still very incomplete +// licensed under GPLv3 see LICENCE file +#ifndef SSE2ALTIVEC +#define SSE2ALTIVEC + +// ignore all warnings +#pragma GCC system_header + +#include +#define SSE 1 + +typedef __vector double __m128d; +typedef __vector float __m128; +typedef __vector int __m128i; + +typedef __vector signed char simd_s8; +typedef __vector unsigned char simd_u8; +typedef __vector signed short simd_s16; +typedef __vector unsigned short simd_u16; +typedef __vector int64_t simd_s64; +typedef __vector uint64_t simd_u64; + +#define _mm_add_ps(x,y) (__m128)((__m128)(x) + (__m128)(y)) +#define _mm_sub_ps(x,y) (__m128)((__m128)(x) - (__m128)(y)) +#define _mm_mul_ps(x,y) (__m128)((__m128)(x) * (__m128)(y)) +#define _mm_div_ps(x,y) (__m128)((__m128)(x) / (__m128)(y)) +#define _mm_rcp_ps(x) (__m128)vec_re((__m128)(x)) +#define _mm_max_ps(x,y) (__m128)vec_max((__m128)(x),(__m128)(y)) +#define _mm_min_ps(x,y) (__m128)vec_min((__m128)(x),(__m128)(y)) +#define _mm_load_ps(x) (__m128)vec_vsx_ld(0, (__m128 const*)(x)) +#define _mm_store_ps(x,y) vec_vsx_st((__m128)(y),0,(__m128*)(x)) +#define _mm_store_ss(x,y) vec_vsx_st((__m128)(y),0,(__m128*)(x)) +#define _mm_set1_ps(x) (__m128)vec_splats((float)(x)) +#define _mm_setzero_ps(x) (__m128)vec_splats((float)0) +#define _mm_cmpgt_ps(x,y) (__m128)vec_cmpgt((__m128)(x),(__m128)(y)) +#define _mm_cmpeq_ps(x,y) (__m128)vec_cmpeq((__m128)(x),(__m128)(y)) +#define _mm_cmplt_ps(x,y) (__m128)vec_cmplt((__m128)(x),(__m128)(y)) +#define _mm_or_ps(x,y) (__m128)vec_or((__m128)(x),(__m128)(y)) +#define _mm_and_ps(x,y) (__m128)vec_and((__m128)(x),(__m128)(y)) +#define _mm_andnot_ps(x,y) (__m128)vec_andc((__m128)(x),(__m128)(y)) +#define _mm_xor_ps(x,y) (__m128)vec_xor((__m128)(x),(__m128)(y)) +#define _mm_cvtps_epi32(x) (__m128i)vec_cts((x),0) +#define _mm_castps_si128(x) (__m128i)(x) +#define _mm_add_epi32(x,y) (__m128i)vec_add((__m128i)(x),(__m128i)(y)) +#define _mm_add_epi16(x,y) (__m128i)vec_add((simd_s16)(x),(simd_s16)(y)) +#define _mm_add_epi8(x,y) (__m128i)vec_add((simd_s8)(x),(simd_s8)(y)) +#define _mm_adds_epi16(x,y) (__m128i)vec_adds((simd_s16)(x),(simd_s16)(y)) +#define _mm_adds_epu8(x,y) (__m128i)vec_adds((simd_u8)(x),(simd_u8)(y)) +#define _mm_sub_epi32(x,y) (__m128i)vec_sub((__m128i)(x),(__m128i)(y)) +#define _mm_sub_epi16(x,y) (__m128i)vec_sub((simd_s16)(x),(simd_s16)(y)) +#define _mm_sub_epi8(x,y) (__m128i)vec_sub((simd_s8)(x),(simd_s8)(y)) +#define _mm_subs_epu16(x,y) (__m128i)vec_subs((simd_u16)(x),(simd_u16)(y)) +#define _mm_subs_epu8(x,y) (__m128i)vec_subs((simd_u8)(x),(simd_u8)(y)) +#define _mm_mullo_epi32(x,y) (__m128i)vec_mul((__m128i)(x),(__m128i)(y)) +#define _mm_max_epi32(x,y) (__m128i)vec_max((__m128i)(x),(__m128i)(y)) +#define _mm_max_epi16(x,y) (__m128i)vec_max((simd_s16)(x),(simd_s16)(y)) +#define _mm_max_epu8(x,y) (__m128i)vec_max((simd_u8)(x),(simd_u8)(y)) +#define _mm_min_epu8(x,y) (__m128i)vec_min((simd_u8)(x),(simd_u8)(y)) +#define _mm_load_si128(x) (__m128i)vec_vsx_ld(0,(__m128i const*)(x)) +#define _mm_loadu_si128(x) (__m128i)vec_vsx_ld(0,(__m128i const*)(x)) +#define _mm_storeu_si128(x,y) vec_vsx_st((__m128i)(y),0,(__m128i*)(x)) +#define _mm_store_si128(x,y) vec_vsx_st((__m128i)(y),0,(__m128i*)(x)) +#define _mm_set1_epi32(x) (__m128i)vec_splats((signed int)(x)) +#define _mm_set1_epi16(x) (__m128i)vec_splats((signed short)(x)) +#define _mm_set1_epi8(x) (__m128i)vec_splats((signed char)(x)) +#define _mm_setzero_si128(x) (__m128i)vec_splats(0) +#define _mm_cmpgt_epi32(x,y) (__m128i)vec_cmpgt((__m128i)(x),(__m128i)(y)) +#define _mm_cmpgt_epi16(x,y) (__m128i)vec_cmpgt((simd_s16)(x),(simd_s16)(y)) +#define _mm_cmpgt_epi8(x,y) (__m128i)vec_cmpgt((simd_s8)(x),(simd_s8)(y)) +#define _mm_cmpeq_epi32(x,y) (__m128i)vec_cmpeq((__m128i)(x),(__m128i)(y)) +#define _mm_cmpeq_epi16(x,y) (__m128i)vec_cmpeq((simd_s16)(x),(simd_s16)(y)) +#define _mm_cmpeq_epi8(x,y) (__m128i)vec_cmpeq((simd_s8)(x),(simd_s8)(y)) +#define _mm_cmplt_epi32(x,y) (__m128i)vec_cmplt((__m128i)(x),(__m128i)(y)) +#define _mm_cmplt_epi16(x,y) (__m128i)vec_cmplt((simd_s16)(x),(simd_s16)(y)) +#define _mm_cmplt_epi8(x,y) (__m128i)vec_cmplt((simd_s8)(x),(simd_s8)(y)) +#define _mm_or_si128(x,y) (__m128i)vec_or((__m128i)(x),(__m128i)(y)) +#define _mm_and_si128(x,y) (__m128i)vec_and((__m128i)(x),(__m128i)(y)) +#define _mm_andnot_si128(x,y) (__m128i)vec_andc((__m128i)(x),(__m128i)(y)) +#define _mm_xor_si128(x,y) (__m128i)vec_xor((__m128i)(x),(__m128i)(y)) +#define _mm_extract_epi16(x,imm) (int16_t)vec_extract((simd_s16)(x),(imm)) +#define _mm_extract_epi8(x,imm) (int8_t)vec_extract((simd_s8)(x),(imm)) +#define _mm_slli_epi16(x,y) (simd_s16)vec_sl((simd_s16)(x),vec_splats((unsigned short)(y))) +#define _mm_srli_epi16(x,y) (simd_s16)vec_sr((simd_s16)(x),vec_splats((unsigned short)(y))) +#define _mm_slli_epi32(x,y) (__m128i)vec_sl((__m128i)(x),vec_splats((unsigned int)(y))) +#define _mm_srli_epi32(x,y) (__m128i)vec_sr((__m128i)(x),vec_splats((unsigned int)(y))) +#define _mm_cvtepi32_ps(x) (__m128)vec_ctf((__m128i)(x),0) +#define _mm_castsi128_ps(x) (__m128)(x) +#define _mm_slli_si128(x,y) (__m128i)vec_slo((simd_u8)(x),(simd_u8)vec_splats((char)(y << 3))) +#define _mm_srli_si128(x,y) (__m128i)vec_sro((simd_u8)(x),(simd_u8)vec_splats((char)(y << 3))) +#define _mm_cvtsi128_si64(a) (int64_t)vec_extract((simd_s64)(a),0) +#define _mm_cvtsi128_si32(a) (int32_t)vec_extract((__m128i)(a),0) +#define _mm_cvtsi64_si128(a) (__m128i)((simd_s64){(int64_t)(a),0}) +#define _mm_cvtsi32_si128(a) (__m128i){(int)(a),0,0,0} +#define _mm_packs_epi32(x,y) (simd_s16)vec_packs((__m128i)(x), (__m128i)(y)) +#define _mm_packus_epi16(x,y) (simd_u8)vec_packsu((simd_s16)(x), (simd_s16)(y)) +#define _mm_set_epi32(e3,e2,e1,e0) (__m128i){(e0),(e1),(e2),(e3)} +#define _mm_setr_epi32(e3,e2,e1,e0) (__m128i){(e3),(e2),(e1),(e0)} +#define _mm_set_epi16(e7,e6,e5,e4,e3,e2,e1,e0) \ + (__m128i)((simd_s16){(e0),(e1),(e2),(e3),(e4),(e5),(e6),(e7)}) +#define _mm_setr_epi16(e7,e6,e5,e4,e3,e2,e1,e0) \ + (__m128i)((simd_s16){(e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)}) +#define _mm_set_epi8(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) \ + (__m128i)((simd_s8){(e0),(e1),(e2),(e3),(e4),(e5),(e6),(e7),(e8),(e9),(e10),(e11),(e12),(e13),(e14),(e15)}) +#define _mm_setr_epi8(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) \ + (__m128i)((simd_s8){(e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)}) + +static inline void _mm_storel_epi64(__m128i* mem_addr, __m128i a) { + *((int64_t*)mem_addr) = (int64_t)vec_extract((simd_s64)(a), 0); +} + +// From OpenCV +// https://github.com/opencv/opencv/pull/15235 +// 3-Clause BSD License +static inline unsigned short _mm_movemask_epi8(__m128i value) { + static const simd_u8 perm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0}; + return vec_extract((__m128i)vec_vbpermq((simd_u8)value, perm), 2); +} + +// From reedsolomon +// https://github.com/NicolasT/reedsolomon/blob/master/cbits/reedsolomon.c +// MIT License +static inline __m128i _mm_shuffle_epi8(__m128i a, __m128i b) { + const __m128i zero = (__m128i)vec_splats((unsigned char)0); + return (__m128i)vec_perm((simd_u8)a, (simd_u8)zero, (simd_u8)b); +} + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0bd408b97..c29a7796b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -121,7 +121,7 @@ if (HAVE_POSIX_MADVISE) endif () # SIMD instruction sets support -if (ARM OR EMSCRIPTEN) +if (ARM OR PPC64 OR EMSCRIPTEN) elseif (HAVE_AVX2) target_compile_definitions(mmseqs-framework PUBLIC -DAVX2=1) if (CMAKE_COMPILER_IS_CLANG) diff --git a/src/commons/Application.cpp b/src/commons/Application.cpp index 12a003c61..b5d3023b2 100644 --- a/src/commons/Application.cpp +++ b/src/commons/Application.cpp @@ -4,7 +4,7 @@ #include "DistanceCalculator.h" #include "Timer.h" -#if !defined(NEON) && !defined(WASM) +#if !defined(NEON) && !defined(WASM) && !defined(__ALTIVEC__) #include #else #define NO_CPUINFO diff --git a/src/commons/Orf.cpp b/src/commons/Orf.cpp index 79042b905..6a57851bf 100644 --- a/src/commons/Orf.cpp +++ b/src/commons/Orf.cpp @@ -211,7 +211,7 @@ inline bool isInCodons(const char* sequence, simd_int codons, simd_int) { // c: ATG0 ATG0 ATG0 ATG0 c = simdi_and(mask, c); // t: FFFF 0000 0000 0000 - simd_int test = simdi32_eq(shuf, codons); + simd_int test = simdi32_eq(c, codons); #ifndef AVX2 if (N > 4) { simd_int test2 = simdi32_eq(c, codons2); diff --git a/src/commons/Util.cpp b/src/commons/Util.cpp index 3c2234cfb..2fe2498e6 100644 --- a/src/commons/Util.cpp +++ b/src/commons/Util.cpp @@ -604,7 +604,7 @@ uint64_t Util::revComplement(const uint64_t kmer, const int k) { // create lookup (set 16 bytes in 128 bit) // a lookup entry at the index of two nucleotides (4 bit) describes the reverse // complement of these two nucleotide in the higher 4 bits (lookup1) or in the lower 4 bits (lookup2) -#define c (char) +#define c (signed char) __m128i lookup1 = _mm_set_epi8(c(0x50),c(0x10),c(0xD0),c(0x90),c(0x40),c(0x00),c(0xC0),c(0x80), c(0x70),c(0x30),c(0xF0),c(0xB0),c(0x60),c(0x20),c(0xE0),c(0xA0)); __m128i lookup2 = _mm_set_epi8(c(0x05),c(0x01),c(0x0D),c(0x09),c(0x04),c(0x00),c(0x0C),c(0x08), diff --git a/src/commons/itoa.h b/src/commons/itoa.h index dcdb264bc..a79c316f1 100644 --- a/src/commons/itoa.h +++ b/src/commons/itoa.h @@ -22,9 +22,10 @@ THE SOFTWARE. */ // SSE2 implementation according to http://0x80.pl/articles/sse-itoa.html // Modifications: (1) fix incorrect digits (2) accept all ranges (3) write to user provided buffer. -#include +#include -#ifdef WASM +#if defined(WASM) || defined(__ALTIVEC__) +#include class Itoa{ public: static char* u32toa_sse2(uint32_t value, char* buffer) { @@ -44,8 +45,12 @@ class Itoa{ #ifdef NEON #include "sse2neon.h" #else +#ifdef __ALTIVEC__ +#include "sse2altivec.h" +#else #include #endif +#endif #define ALIGN_PRE