diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..c492825 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.5.6 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..ced47c7 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,209 @@ +# The test build matrix (stage: test) is constructed to test a wide range of +# configurations, rather than a single pass/fail. This helps to catch build +# failures and logic errors that present on platforms other than the ones the +# author has tested. +# +# Some builders use the dependency-generator in `./depends`, rather than using +# apt-get to install build dependencies. This guarantees that the tester is +# using the same versions as Gitian, so the build results are nearly identical +# to what would be found in a final release. +# +# In order to avoid rebuilding all dependencies for each build, the binaries +# are cached and re-used when possible. Changes in the dependency-generator +# will trigger cache-invalidation and rebuilds as necessary. +# +# These caches can be manually removed if necessary. This is one of the very +# few manual operations that is possible with Travis, and it can be done by a +# Simplicity GitHub member via the Travis web interface [0]. +# +# Travis CI uploads the cache after the script phase of the build [1]. +# However, the build is terminated without saving the chache if it takes over +# 50 minutes [2]. Thus, if we spent too much time in early build stages, fail +# with an error and save the cache. +# +# [0] https://travis-ci.org/simplicity-coin/simplicity/caches +# [1] https://docs.travis-ci.com/user/caching/#build-phases +# [2] https://docs.travis-ci.com/user/customizing-the-build#build-timeouts + +dist: xenial +os: linux +language: minimal +cache: + ccache: true + directories: + - $TRAVIS_BUILD_DIR/depends/built + - $TRAVIS_BUILD_DIR/depends/sdk-sources + - $HOME/.ccache +stages: + - lint + - test +env: + global: + - MAKEJOBS=-j3 + - RUN_UNIT_TESTS=false # todo - fix + - RUN_FUNCTIONAL_TESTS=false # Not Yet Implemented + - RUN_BENCH=false # Set to true for any one job that has debug enabled, to quickly check bench is not crashing or hitting assertions + - DOCKER_NAME_TAG=ubuntu:18.04 + - BOOST_TEST_RANDOM=1$TRAVIS_BUILD_ID + - CCACHE_SIZE=100M + - CCACHE_TEMPDIR=/tmp/.ccache-temp + - CCACHE_COMPRESS=1 + - CCACHE_DIR=$HOME/.ccache + - BASE_OUTDIR=$TRAVIS_BUILD_DIR/out + - SDK_URL=https://bitcoincore.org/depends-sources/sdks + - WINEDEBUG=fixme-all + - DOCKER_PACKAGES="build-essential libtool autotools-dev automake pkg-config bsdmainutils curl git ca-certificates ccache" + - CACHE_ERR_MSG="Error! Initial build successful, but not enough time remains to run later build stages and tests. Please manually re-run this job by using the travis restart button or asking a bitcoin maintainer to restart. The next run should not time out because the build cache has been saved." +before_install: + - set -o errexit; source .travis/test_03_before_install.sh +install: + - set -o errexit; source .travis/test_04_install.sh +before_script: + - set -o errexit; source .travis/test_05_before_script.sh +script: + - export CONTINUE=1 + - if [ $SECONDS -gt 1200 ]; then export CONTINUE=0; fi # Likely the depends build took very long + - if [ $CONTINUE = "1" ]; then set -o errexit; source .travis/test_06_script_a.sh; else set +o errexit; echo "$CACHE_ERR_MSG"; false; fi + - if [ $SECONDS -gt 1500 ]; then export CONTINUE=0; fi # Likely the build took very long; The tests take about 1000s, so we should abort if we have less than 50*60-1000=2000s left + - if [ $CONTINUE = "1" ]; then set -o errexit; source .travis/test_06_script_b.sh; else set +o errexit; echo "$CACHE_ERR_MSG"; false; fi +after_script: + - echo $TRAVIS_COMMIT_RANGE + - echo $TRAVIS_COMMIT_LOG +jobs: + include: + + - stage: lint + name: 'lint' + env: + cache: false + language: python + python: '3.5' # Oldest supported version according to doc/dependencies.md + install: + - set -o errexit; source .travis/lint_04_install.sh + before_script: + - set -o errexit; source .travis/lint_05_before_script.sh + script: + - set -o errexit; source .travis/lint_06_script.sh + + - stage: test + name: 'ARM 32-bit [GOAL: install] [no unit or functional tests]' + env: >- + HOST=arm-linux-gnueabihf + PACKAGES="python3 g++-arm-linux-gnueabihf" + RUN_UNIT_TESTS=false + RUN_FUNCTIONAL_TESTS=false + GOAL="install" + # -Wno-psabi is to disable ABI warnings: "note: parameter passing for argument of type ... changed in GCC 7.1" + # This could be removed once the ABI change warning does not show up by default + BITCOIN_CONFIG="--enable-glibc-back-compat --enable-reduce-exports CXXFLAGS=-Wno-psabi" + + - stage: test + name: 'ARM 64-bit [GOAL:install] [no unit or functional tests]' + env: >- + HOST=aarch64-linux-gnu + PACKAGES="python3 g++-aarch64-linux-gnu" + RUN_UNIT_TESTS=false + RUN_FUNCTIONAL_TESTS=false + GOAL="install" + BITCOIN_CONFIG="--enable-glibc-back-compat --enable-reduce-exports" + + - stage: test + name: 'Win32 [GOAL: deploy] [no functional tests]' + env: >- + HOST=i686-w64-mingw32 + DPKG_ADD_ARCH="i386" + PACKAGES="python3 nsis g++-mingw-w64-i686 wine-binfmt wine32" + RUN_FUNCTIONAL_TESTS=false + GOAL="deploy" + BITCOIN_CONFIG="--enable-reduce-exports" + + - stage: test + name: 'Win64 [GOAL: deploy] [no functional tests]' + env: >- + HOST=x86_64-w64-mingw32 + PACKAGES="python3 nsis g++-mingw-w64-x86-64 wine-binfmt wine64" + RUN_FUNCTIONAL_TESTS=false + GOAL="deploy" + BITCOIN_CONFIG="--enable-reduce-exports" + + - stage: test + name: '32-bit + dash [GOAL: install] [no gui]' + env: >- + HOST=i686-pc-linux-gnu + PACKAGES="g++-multilib python3-zmq" + GOAL="install" + BITCOIN_CONFIG="--enable-zmq --with-gui=qt5 --enable-glibc-back-compat --enable-reduce-exports LDFLAGS=-static-libstdc++" + CONFIG_SHELL="/bin/dash" + + #- stage: test # todo - fix + #name: 'x86_64 Linux [GOAL: install] [bionic] [uses qt5 dev package instead of depends Qt to speed up build and avoid timeout]' + #env: >- + #HOST=x86_64-unknown-linux-gnu + #PACKAGES="python3-zmq qtbase5-dev qttools5-dev-tools protobuf-compiler libdbus-1-dev libharfbuzz-dev libprotobuf-dev" + #DEP_OPTS="NO_QT=1 NO_UPNP=1 DEBUG=1 ALLOW_HOST_PACKAGES=1" + #RUN_FUNCTIONAL_TESTS=true + ##TEST_RUNNER_EXTRA="--coverage --extended" # Run extended tests so that coverage does not fail, but exclude the very slow dbcrash + #GOAL="install" + #BITCOIN_CONFIG="--enable-zmq --with-gui=qt5 --enable-glibc-back-compat --enable-reduce-exports" + + - stage: test + name: 'x86_64 Linux [GOAL: install] [trusty] [no functional tests, no depends, only system libs]' + env: >- + HOST=x86_64-unknown-linux-gnu + DOCKER_NAME_TAG=ubuntu:14.04 + PACKAGES="python3-zmq qtbase5-dev qttools5-dev-tools libicu-dev libpng-dev libssl-dev libevent-dev bsdmainutils libboost-system-dev libboost-filesystem-dev libboost-chrono-dev libboost-program-options-dev libboost-test-dev libboost-thread-dev libdb5.1++-dev libminiupnpc-dev libzmq3-dev libprotobuf-dev protobuf-compiler libqrencode-dev libgmp-dev" + NO_DEPENDS=1 + RUN_FUNCTIONAL_TESTS=false + GOAL="install" + BITCOIN_CONFIG="--enable-zmq --with-incompatible-bdb --with-gui=no" + + - stage: test + name: 'x86_64 Linux [GOAL: install] [xenial] [no depends, only system libs]' + env: >- + HOST=x86_64-unknown-linux-gnu + DOCKER_NAME_TAG=ubuntu:16.04 + PACKAGES="python3-zmq qtbase5-dev qttools5-dev-tools libssl-dev libevent-dev bsdmainutils libboost-system-dev libboost-filesystem-dev libboost-chrono-dev libboost-program-options-dev libboost-test-dev libboost-thread-dev libdb5.3++-dev libminiupnpc-dev libzmq3-dev libprotobuf-dev protobuf-compiler libqrencode-dev libgmp-dev" + NO_DEPENDS=1 + GOAL="install" + BITCOIN_CONFIG="--enable-zmq --with-incompatible-bdb --with-gui=qt5 CPPFLAGS=-DDEBUG_LOCKORDER --disable-hardening --disable-asm" + + - stage: test + name: 'x86_64 Linux [GOAL: install] [bionic] [no depends, only system libs]' + env: >- + HOST=x86_64-unknown-linux-gnu + PACKAGES="python3-zmq qtbase5-dev qttools5-dev-tools libssl1.0-dev libevent-dev bsdmainutils libboost-system-dev libboost-filesystem-dev libboost-chrono-dev libboost-program-options-dev libboost-test-dev libboost-thread-dev libdb5.3++-dev libminiupnpc-dev libzmq3-dev libprotobuf-dev protobuf-compiler libqrencode-dev libgmp-dev" + NO_DEPENDS=1 + GOAL="install" + BITCOIN_CONFIG="--enable-zmq --with-incompatible-bdb --with-gui=qt5 CPPFLAGS=-DDEBUG_LOCKORDER" + +# - stage: test +# name: 'x86_64 Linux [GOAL: install] [bionic] [no depends, only system libs, sanitizers: fuzzer,address]' +# env: >- +# HOST=x86_64-unknown-linux-gnu +# PACKAGES="clang python3-zmq qtbase5-dev qttools5-dev-tools libssl1.0-dev libevent-dev bsdmainutils libboost-system-dev libboost-filesystem-dev libboost-chrono-dev libboost-program-options-dev libboost-test-dev libboost-thread-dev libdb5.3++-dev libminiupnpc-dev libzmq3-dev libprotobuf-dev protobuf-compiler libqrencode-dev libgmp-dev" +# NO_DEPENDS=1 +# RUN_UNIT_TESTS=false +# RUN_FUNCTIONAL_TESTS=false +# RUN_BENCH=true +# GOAL="install" +# BITCOIN_CONFIG="--enable-zmq --with-incompatible-bdb --enable-glibc-back-compat --enable-reduce-exports --with-gui=qt5 CPPFLAGS=-DDEBUG_LOCKORDER --with-sanitizers=undefined CC=clang CXX=clang++" + +# - stage: test +# name: 'x86_64 Linux [GOAL: install] [bionic] [no wallet]' +# env: >- +# HOST=x86_64-unknown-linux-gnu +# PACKAGES="python3-zmq" +# DEP_OPTS="NO_WALLET=1" +# GOAL="install" +# BITCOIN_CONFIG="--enable-glibc-back-compat --enable-reduce-exports" + + - stage: test + name: 'macOS 10.10 [GOAL: deploy] [no functional tests]' + env: >- + HOST=x86_64-apple-darwin14 + PACKAGES="cmake imagemagick libcap-dev librsvg2-bin libz-dev libbz2-dev libtiff-tools python3-dev python3-setuptools" + OSX_SDK=10.11 + RUN_UNIT_TESTS=false + RUN_FUNCTIONAL_TESTS=false + GOAL="deploy" + BITCOIN_CONFIG="--enable-gui --enable-reduce-exports --enable-werror" diff --git a/.travis/README.md b/.travis/README.md new file mode 100644 index 0000000..c837e19 --- /dev/null +++ b/.travis/README.md @@ -0,0 +1,7 @@ +## travis build scripts + +The `.travis` directory contains scripts for each build step in each build stage. +Currently the travis build defines two stages `lint` and `test`. Each stage has +it's own [lifecycle](https://docs.travis-ci.com/user/customizing-the-build/#the-build-lifecycle). +Every script in here is named and numbered according to which stage and lifecycle +step it belongs to. diff --git a/.travis/lint_04_install.sh b/.travis/lint_04_install.sh new file mode 100755 index 0000000..9a22773 --- /dev/null +++ b/.travis/lint_04_install.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C + +travis_retry pip install codespell==1.13.0 +travis_retry pip install flake8==3.5.0 +travis_retry pip install vulture==0.29 + +SHELLCHECK_VERSION=v0.6.0 +curl -s "https://storage.googleapis.com/shellcheck/shellcheck-${SHELLCHECK_VERSION}.linux.x86_64.tar.xz" | tar --xz -xf - --directory /tmp/ +export PATH="/tmp/shellcheck-${SHELLCHECK_VERSION}:${PATH}" diff --git a/.travis/lint_05_before_script.sh b/.travis/lint_05_before_script.sh new file mode 100755 index 0000000..5a4aab1 --- /dev/null +++ b/.travis/lint_05_before_script.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C + +#git fetch --unshallow diff --git a/.travis/lint_06_script.sh b/.travis/lint_06_script.sh new file mode 100755 index 0000000..deabc13 --- /dev/null +++ b/.travis/lint_06_script.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C + + +#contrib/devtools/git-subtree-check.sh src/secp256k1 +#contrib/devtools/git-subtree-check.sh src/univalue +#contrib/devtools/git-subtree-check.sh src/leveldb +contrib/devtools/check-doc.py +contrib/devtools/logprint-scanner.py + +if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then + contrib/devtools/lint-whitespace.sh +fi diff --git a/.travis/test_03_before_install.sh b/.travis/test_03_before_install.sh new file mode 100755 index 0000000..16d3154 --- /dev/null +++ b/.travis/test_03_before_install.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C.UTF-8 + +PATH=$(echo $PATH | tr ':' "\n" | sed '/\/opt\/python/d' | tr "\n" ":" | sed "s|::|:|g") +# Add llvm-symbolizer directory to PATH. Needed to get symbolized stack traces from the sanitizers. +PATH=$PATH:/usr/lib/llvm-6.0/bin/ +export PATH + +BEGIN_FOLD () { + echo "" + CURRENT_FOLD_NAME=$1 + echo "travis_fold:start:${CURRENT_FOLD_NAME}" +} + +END_FOLD () { + RET=$? + echo "travis_fold:end:${CURRENT_FOLD_NAME}" + if [ $RET != 0 ]; then + echo "${CURRENT_FOLD_NAME} failed with status code ${RET}" + fi +} diff --git a/.travis/test_04_install.sh b/.travis/test_04_install.sh new file mode 100755 index 0000000..451e921 --- /dev/null +++ b/.travis/test_04_install.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C.UTF-8 + +travis_retry docker pull "$DOCKER_NAME_TAG" +env | grep -E '^(BITCOIN_CONFIG|CCACHE_|WINEDEBUG|LC_ALL|BOOST_TEST_RANDOM|CONFIG_SHELL)' | tee /tmp/env +if [[ $HOST = *-mingw32 ]]; then + DOCKER_ADMIN="--cap-add SYS_ADMIN" +elif [[ $BITCOIN_CONFIG = *--with-sanitizers=*address* ]]; then # If ran with (ASan + LSan), Docker needs access to ptrace (https://github.com/google/sanitizers/issues/764) + DOCKER_ADMIN="--cap-add SYS_PTRACE" +fi +DOCKER_ID=$(docker run $DOCKER_ADMIN -idt --mount type=bind,src=$TRAVIS_BUILD_DIR,dst=$TRAVIS_BUILD_DIR --mount type=bind,src=$CCACHE_DIR,dst=$CCACHE_DIR -w $TRAVIS_BUILD_DIR --env-file /tmp/env $DOCKER_NAME_TAG) + +DOCKER_EXEC () { + docker exec $DOCKER_ID bash -c "cd $PWD && $*" +} + +if [ -n "$DPKG_ADD_ARCH" ]; then + DOCKER_EXEC dpkg --add-architecture "$DPKG_ADD_ARCH" +fi + +travis_retry DOCKER_EXEC apt-get update +travis_retry DOCKER_EXEC apt-get install --no-install-recommends --no-upgrade -qq $PACKAGES $DOCKER_PACKAGES diff --git a/.travis/test_05_before_script.sh b/.travis/test_05_before_script.sh new file mode 100755 index 0000000..64a3223 --- /dev/null +++ b/.travis/test_05_before_script.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C.UTF-8 + +#DOCKER_EXEC echo \> \$HOME/.simplicity # Make sure default datadir does not exist and is never read by creating a dummy file + +mkdir -p depends/SDKs depends/sdk-sources + +if [ -n "$OSX_SDK" -a ! -f depends/sdk-sources/MacOSX${OSX_SDK}.sdk.tar.gz ]; then + curl --location --fail $SDK_URL/MacOSX${OSX_SDK}.sdk.tar.gz -o depends/sdk-sources/MacOSX${OSX_SDK}.sdk.tar.gz +fi +if [ -n "$OSX_SDK" -a -f depends/sdk-sources/MacOSX${OSX_SDK}.sdk.tar.gz ]; then + tar -C depends/SDKs -xf depends/sdk-sources/MacOSX${OSX_SDK}.sdk.tar.gz +fi +if [[ $HOST = *-mingw32 ]]; then + DOCKER_EXEC update-alternatives --set $HOST-g++ \$\(which $HOST-g++-posix\) +fi +if [ -z "$NO_DEPENDS" ]; then + DOCKER_EXEC CONFIG_SHELL= make $MAKEJOBS -C depends HOST=$HOST $DEP_OPTS +fi diff --git a/.travis/test_06_script_a.sh b/.travis/test_06_script_a.sh new file mode 100755 index 0000000..89eabf1 --- /dev/null +++ b/.travis/test_06_script_a.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C.UTF-8 + +TRAVIS_COMMIT_LOG=$(git log --format=fuller -1) +export TRAVIS_COMMIT_LOG + +OUTDIR=$BASE_OUTDIR/$TRAVIS_PULL_REQUEST/$TRAVIS_JOB_NUMBER-$HOST +BITCOIN_CONFIG_ALL="--disable-dependency-tracking --prefix=$TRAVIS_BUILD_DIR/depends/$HOST --bindir=$OUTDIR/bin --libdir=$OUTDIR/lib" +if [ -z "$NO_DEPENDS" ]; then + DOCKER_EXEC ccache --max-size=$CCACHE_SIZE +fi + +BEGIN_FOLD autogen +if [ -n "$CONFIG_SHELL" ]; then + DOCKER_EXEC "$CONFIG_SHELL" -c "./autogen.sh" +else + DOCKER_EXEC ./autogen.sh +fi +END_FOLD + +mkdir build +cd build || (echo "could not enter build directory"; exit 1) + +BEGIN_FOLD configure +DOCKER_EXEC ../configure --cache-file=config.cache $BITCOIN_CONFIG_ALL $BITCOIN_CONFIG || ( cat config.log && false) +END_FOLD + +BEGIN_FOLD distdir +DOCKER_EXEC make distdir VERSION=$HOST +END_FOLD + +cd "simplicity-$HOST" || (echo "could not enter distdir simplicity-$HOST"; exit 1) + +BEGIN_FOLD configure +DOCKER_EXEC ./configure --cache-file=../config.cache $BITCOIN_CONFIG_ALL $BITCOIN_CONFIG || ( cat config.log && false) +END_FOLD + +set -o errtrace +trap 'DOCKER_EXEC "cat ${TRAVIS_BUILD_DIR}/sanitizer-output/* 2> /dev/null"' ERR + +BEGIN_FOLD build +DOCKER_EXEC make $MAKEJOBS $GOAL || ( echo "Build failure. Verbose build follows." && DOCKER_EXEC make $GOAL V=1 ; false ) +END_FOLD + +cd ${TRAVIS_BUILD_DIR} || (echo "could not enter travis build dir $TRAVIS_BUILD_DIR"; exit 1) diff --git a/.travis/test_06_script_b.sh b/.travis/test_06_script_b.sh new file mode 100755 index 0000000..6bacf99 --- /dev/null +++ b/.travis/test_06_script_b.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. + +export LC_ALL=C.UTF-8 + +cd "build/simplicity-$HOST" || (echo "could not enter distdir build/simplicity-$HOST"; exit 1) + +if [ "$RUN_UNIT_TESTS" = "true" ]; then + BEGIN_FOLD unit-tests + DOCKER_EXEC LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/depends/$HOST/lib make $MAKEJOBS check VERBOSE=1 + END_FOLD +fi + +if [ "$RUN_FUNCTIONAL_TESTS" = "true" ]; then + BEGIN_FOLD functional-tests + DOCKER_EXEC test/functional/test_runner.py --combinedlogslen=4000 ${TEST_RUNNER_EXTRA} + END_FOLD +fi + +cd ${TRAVIS_BUILD_DIR} || (echo "could not enter travis build dir $TRAVIS_BUILD_DIR"; exit 1) diff --git a/README.md b/README.md index f16e1a0..8ef614b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Simplicity integration/staging repository ===================================== -[![Build Status](https://travis-ci.org/Simplicity-Project/Simplicity.svg?branch=master)](https://travis-ci.org/Simplicity-Project/Simplicity) [![GitHub version](https://badge.fury.io/gh/Simplicity-Project%2FSimplicity.svg)](https://badge.fury.io/gh/Simplicity-Project%2FSimplicity) +[![Build Status](https://travis-ci.com/simplicity-coin/simplicity.svg?branch=master)](https://travis-ci.com/simplicity-coin/simplicity) [![GitHub version](https://badge.fury.io/gh/simplicity-coin%2Fsimplicity.svg)](https://badge.fury.io/gh/simplicity-coin%2Fsimplicity) Simplicity is an open source crypto-currency focused on fast transactions, with low transaction fees & environmental footprint. It utilizes multi algo PoW, PoS, and several masternode tiers for securing its network. The goal of Simplicity is to achieve a decentralized sustainable crypto currency with near instant transactions, fair governance and community intelligence. - Fast transactions featuring guaranteed zero confirmation transactions, we call it _SwiftX_. diff --git a/configure.ac b/configure.ac index 86938f3..d8567bd 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ AC_PREREQ([2.60]) define(_CLIENT_VERSION_MAJOR, 2) define(_CLIENT_VERSION_MINOR, 0) define(_CLIENT_VERSION_REVISION, 1) -define(_CLIENT_VERSION_BUILD, 1) +define(_CLIENT_VERSION_BUILD, 2) define(_CLIENT_VERSION_IS_RELEASE, true) define(_COPYRIGHT_YEAR, 2019) AC_INIT([Simplicity],[_CLIENT_VERSION_MAJOR._CLIENT_VERSION_MINOR._CLIENT_VERSION_REVISION],[www.simplicity.org],[simplicity]) @@ -58,6 +58,9 @@ case $host in *mingw*) lt_cv_deplibs_check_method="pass_all" ;; + *aarch64*) + CFLAGS="$CFLAGS -march=armv8.1-a+crypto" + ;; esac dnl Require C++11 compiler (no GNU extensions) AX_CXX_COMPILE_STDCXX([11], [noext], [mandatory], [nodefault]) @@ -189,7 +192,6 @@ AC_ARG_ENABLE([asm], if test "x$use_asm" = xyes; then AC_DEFINE(USE_ASM, 1, [Define this symbol to build in assembly routines]) - AC_DEFINE(USE_XOP, 1, [Define this symbol to enable optimization]) fi AC_ARG_WITH([system-univalue], @@ -326,7 +328,9 @@ fi # compatibility. AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]]) AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]]) -AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]]) +AX_CHECK_COMPILE_FLAG([-mavx],[[AVX_CXXFLAGS="-mavx"]],,[[$CXXFLAG_WERROR]]) +AX_CHECK_COMPILE_FLAG([-mxop],[[XOP_CXXFLAGS="-mxop"]],,[[$CXXFLAG_WERROR]]) +AX_CHECK_COMPILE_FLAG([-mavx2],[[AVX2_CXXFLAGS="-mavx2"]],,[[$CXXFLAG_WERROR]]) AX_CHECK_COMPILE_FLAG([-msse4 -msha],[[SHANI_CXXFLAGS="-msse4 -msha"]],,[[$CXXFLAG_WERROR]]) TEMP_CXXFLAGS="$CXXFLAGS" @@ -366,6 +370,30 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ) CXXFLAGS="$TEMP_CXXFLAGS" +TEMP_CXXFLAGS="$CXXFLAGS" +CXXFLAGS="$CXXFLAGS $AVX_CXXFLAGS" +AC_MSG_CHECKING(for AVX intrinsics) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include + #include + ]],[[ + __m256i l = _mm256_set1_epi32(0); + return _mm256_extract_epi32(l, 7); + ]])], + [ AC_MSG_RESULT(yes); enable_avx=yes; AC_DEFINE(ENABLE_AVX, 1, [Define this symbol to build code that uses AVX intrinsics]) ], + [ AC_MSG_RESULT(no)] +) +CXXFLAGS="$TEMP_CXXFLAGS" + +TEMP_CXXFLAGS="$CXXFLAGS" +CXXFLAGS="$CXXFLAGS $XOP_CXXFLAGS" +AC_MSG_CHECKING(for XOP intrinsics) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([asm ("vprotd \$7, %xmm0, %xmm1");])], + [ AC_MSG_RESULT(yes); enable_xop=yes; AC_DEFINE(ENABLE_XOP, 1, [Define this symbol to build code that uses XOP intrinsics]) ], + [ AC_MSG_RESULT(no)] +) +CXXFLAGS="$TEMP_CXXFLAGS" + TEMP_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS" AC_MSG_CHECKING(for AVX2 intrinsics) @@ -374,7 +402,8 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ #include ]],[[ __m256i l = _mm256_set1_epi32(0); - return _mm256_extract_epi32(l, 7); + __m256i m = _mm256_add_epi32(l, l); + return _mm256_extract_epi32(m, 7); ]])], [ AC_MSG_RESULT(yes); enable_avx2=yes; AC_DEFINE(ENABLE_AVX2, 1, [Define this symbol to build code that uses AVX2 intrinsics]) ], [ AC_MSG_RESULT(no)] @@ -1334,6 +1363,8 @@ AM_CONDITIONAL([GLIBC_BACK_COMPAT],[test x$use_glibc_compat = xyes]) AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes]) AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes]) AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes]) +AM_CONDITIONAL([ENABLE_AVX],[test x$enable_avx = xyes]) +AM_CONDITIONAL([ENABLE_XOP],[test x$enable_xop = xyes]) AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes]) AM_CONDITIONAL([ENABLE_SHANI],[test x$enable_shani = xyes]) AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes]) @@ -1373,6 +1404,8 @@ AC_SUBST(SANITIZER_CXXFLAGS) AC_SUBST(SANITIZER_LDFLAGS) AC_SUBST(SSE42_CXXFLAGS) AC_SUBST(SSE41_CXXFLAGS) +AC_SUBST(AVX_CXXFLAGS) +AC_SUBST(XOP_CXXFLAGS) AC_SUBST(AVX2_CXXFLAGS) AC_SUBST(SHANI_CXXFLAGS) AC_SUBST(LIBTOOL_APP_LDFLAGS) diff --git a/src/Makefile.am b/src/Makefile.am index 17e0101..21010c7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -297,13 +297,15 @@ crypto_libbitcoin_crypto_a_SOURCES = \ crypto/rfc6979_hmac_sha256.cpp \ crypto/hmac_sha512.cpp \ crypto/scrypt.cpp \ - crypto/scrypt2.cpp \ - crypto/scrypt-arm.S \ - crypto/scrypt-x64.S \ - crypto/scrypt-x86.S \ crypto/sha2-arm.S \ + crypto/sha2-armv8.c \ crypto/sha2-x64.S \ crypto/sha2-x86.S \ + crypto/scrypt-arm.S \ + crypto/scrypt-armv8.c \ + crypto/scrypt-x64.S \ + crypto/scrypt-x86.S \ + crypto/scrypt_opt.cpp \ crypto/ripemd160.cpp \ crypto/aes_helper.c \ crypto/blake.c \ @@ -319,7 +321,7 @@ crypto_libbitcoin_crypto_a_SOURCES = \ crypto/rfc6979_hmac_sha256.h \ crypto/hmac_sha512.h \ crypto/scrypt.h \ - crypto/scrypt2.h \ + crypto/scrypt_opt.h \ crypto/sha1.h \ crypto/ripemd160.h \ crypto/sph_blake.h \ diff --git a/src/activemasternode.cpp b/src/activemasternode.cpp index fd17141..2f6dc3b 100644 --- a/src/activemasternode.cpp +++ b/src/activemasternode.cpp @@ -473,9 +473,17 @@ std::vector CActiveMasternode::SelectCoinsMasternode() } // Filter - for (const COutput& out : vCoins) { - if (CMasternode::IsDepositCoins(out.tx->vout[out.i].nValue)) { - filteredCoins.push_back(out); + if (IsSporkActive(SPORK_18_NEW_MASTERNODE_TIERS)) { + for (const COutput& out : vCoins) { + if (CMasternode::IsDepositCoins(out.tx->vout[out.i].nValue)) { + filteredCoins.push_back(out); + } + } + } else { + for (const COutput& out : vCoins) { + if (CMasternode::Level(out.tx->vout[out.i].nValue, chainActive.Height()) == 3u) { + filteredCoins.push_back(out); + } } } return filteredCoins; diff --git a/src/chain.h b/src/chain.h index 48c2870..4889985 100644 --- a/src/chain.h +++ b/src/chain.h @@ -171,7 +171,7 @@ class CBlockIndex uint256 nStakeModifierV2; //! block header - int nVersion; + unsigned int nVersion; uint256 hashMerkleRoot; unsigned int nTime; unsigned int nBits; @@ -395,7 +395,7 @@ class CBlockIndex * in the last Params().ToCheckBlockUpgradeMajority() blocks, starting at pstart * and going backwards. */ - static bool IsSuperMajority(int minVersion, const CBlockIndex* pstart, unsigned int nRequired); + static bool IsSuperMajority(unsigned int minVersion, const CBlockIndex* pstart, unsigned int nRequired); std::string ToString() const { diff --git a/src/chainparams.cpp b/src/chainparams.cpp index 2a2c001..6744eb9 100644 --- a/src/chainparams.cpp +++ b/src/chainparams.cpp @@ -70,11 +70,13 @@ static Checkpoints::MapCheckpoints mapCheckpoints = (650000, uint256("cf6076eda981af1097e52f505c0c3dfefc60af9b93ba674fda8e22235ff50df3")) (700000, uint256("2f852dfbc9b767905400c0e706ff63eca8d5e4090d4d49f873f9be1a754cc243")) (950000, uint256("0cbb4dfac570e6cbf7aa10b3f8a138b3dda3e908ab78e301f12354731bbce560")) - (1030000, uint256("6435fc65c4b7dc50bf254124884d7787c99451b2fe8b604c5a8435849beba1f5")); + (1030000, uint256("6435fc65c4b7dc50bf254124884d7787c99451b2fe8b604c5a8435849beba1f5")) + (1040000, uint256("a8212fbda825a42ecd3a0d1251437626fbde53afc1ea4eea76d05b4898718a0f")) + (1050000, uint256("9ab97fa25881e95b4c22fb7515d90738054a43231231acf8d5fc3be581591192")); static const Checkpoints::CCheckpointData data = { &mapCheckpoints, - 1573624156, // * UNIX timestamp of last checkpoint block - 1710219, // * total number of transactions between genesis and last checkpoint + 1575775743, // * UNIX timestamp of last checkpoint block + 1743384, // * total number of transactions between genesis and last checkpoint // (the tx=... number in the SetBestChain debug.log lines) 2000 // * estimated number of transactions per day after checkpoint }; @@ -231,6 +233,9 @@ class CMainParams : public CChainParams genesis.nBits = 0x1f00ffff; genesis.nNonce = 561379; + //uint256 hashTarget = uint256().SetCompact(genesis.nBits); + //assert(genesis.GetPoWHash() <= hashTarget); + hashGenesisBlock = genesis.GetHash(); assert(genesis.hashMerkleRoot == uint256("0x40bdd3d5ae84b91a71190094a82948400eb3356e87c5376b64d79509cf552d84")); assert(hashGenesisBlock == uint256("0xf4bbfc518aa3622dbeb8d2818a606b82c2b8b1ac2f28553ebdb6fc04d7abaccf")); @@ -246,7 +251,7 @@ class CMainParams : public CChainParams base58Prefixes[EXT_PUBLIC_KEY] = boost::assign::list_of(0x04)(0x44)(0xD5)(0xBC).convert_to_container >(); base58Prefixes[EXT_SECRET_KEY] = boost::assign::list_of(0x04)(0x44)(0xF0)(0xA3).convert_to_container >(); // BIP44 coin type is from https://github.com/satoshilabs/slips/blob/master/slip-0044.md - base58Prefixes[EXT_COIN_TYPE] = boost::assign::list_of(0x80)(0x00)(0x00)(0x77).convert_to_container >(); + base58Prefixes[EXT_COIN_TYPE] = boost::assign::list_of(0x80)(0x00)(0x01)(0xc0).convert_to_container >(); convertSeed6(vFixedSeeds, pnSeed6_main, ARRAYLEN(pnSeed6_main)); @@ -263,7 +268,7 @@ class CMainParams : public CChainParams nBudgetCycleBlocks = 30 * 24 * 60 * 60 / nTargetSpacing; //!< Amount of blocks in a months period of time (using 1 minutes per) = (60*24*30) strSporkKey = "03fdfa718ec40be6ce1b5fadf36022a4f0ff2f1efc872291ffbe42af127bdd2859"; strSporkKeyOld = "03fdfa718ec40be6ce1b5fadf36022a4f0ff2f1efc872291ffbe42af127bdd2859"; - strObfuscationPoolDummyAddress = "D87q2gC9j6nNrnzCsg4aY6bHMLsT9nUhEw"; + strObfuscationPoolDummyAddress = "8JLdPguDU5HJkBvjfca7vD79wXvksPvJMz"; nStartMasternodePayments = 1403728576; //Wed, 25 Jun 2014 20:36:16 GMT /** Zerocoin */ @@ -311,8 +316,8 @@ class CTestNetParams : public CMainParams pchMessageStart[3] = 0xc6; vAlertPubKey = ParseHex("03b95000b2b06e391c058ea14d47ac3c525753c68460864f254ada5a63e27a8134"); nDefaultPort = 21957; - bnProofOfWorkLimit[POW_QUARK] = ~uint256(0) >> 12; - bnProofOfWorkLimit[POW_SCRYPT_SQUARED] = ~uint256(0) >> 8; + bnProofOfWorkLimit[POW_QUARK] = ~uint256(0) >> 16; + bnProofOfWorkLimit[POW_SCRYPT_SQUARED] = ~uint256(0) >> 9; nEnforceBlockUpgradeMajority = 3780; // 70% nRejectBlockOutdatedMajority = 4050; // 75% nToCheckBlockUpgradeMajority = 5400; // 4 days (1350*4) @@ -356,15 +361,18 @@ class CTestNetParams : public CMainParams //vBurnAddresses.emplace_back("xzd3LKsihYn1CKBESTQP7EresFECXEMivk"); //! Modify the testnet genesis block so the timestamp is valid for a later start. - //genesis.nTime = 1454124731; + genesis.nTime = 1574924400; genesis.nBits = 0x1f00ffff; - genesis.nNonce = 93481; + genesis.nNonce = 164084; + + uint256 hashTarget = uint256().SetCompact(genesis.nBits); + assert(genesis.GetPoWHash() <= hashTarget); hashGenesisBlock = genesis.GetHash(); //printf("Merkle hash test: %s\n", genesis.hashMerkleRoot.ToString().c_str()); //printf("Block hash test: %s\n", hashGenesisBlock.ToString().c_str()); assert(genesis.hashMerkleRoot == uint256("0x40bdd3d5ae84b91a71190094a82948400eb3356e87c5376b64d79509cf552d84")); - assert(hashGenesisBlock == uint256("0xfcfc1b5bc930bc0a74643462617264e4f7aa39e276c637353bda6960b5726fb8")); + assert(hashGenesisBlock == uint256("0x000037a145d6812571b0c413d868a43146d7159056afe7a06b344e9ee0de39fc")); vFixedSeeds.clear(); vSeeds.clear(); @@ -373,10 +381,8 @@ class CTestNetParams : public CMainParams base58Prefixes[PUBKEY_ADDRESS] = std::vector(1, 139); // Testnet simplicity addresses start with 'x' or 'y' base58Prefixes[SCRIPT_ADDRESS] = std::vector(1, 19); // Testnet simplicity script addresses start with '8' or '9' base58Prefixes[SECRET_KEY] = std::vector(1, 239); // Testnet private keys start with '9' or 'c' (Bitcoin defaults) - // Testnet simplicity BIP32 pubkeys start with 'DRKV' - base58Prefixes[EXT_PUBLIC_KEY] = boost::assign::list_of(0x3a)(0x80)(0x61)(0xa0).convert_to_container >(); - // Testnet simplicity BIP32 prvkeys start with 'DRKP' - base58Prefixes[EXT_SECRET_KEY] = boost::assign::list_of(0x3a)(0x80)(0x58)(0x37).convert_to_container >(); + base58Prefixes[EXT_PUBLIC_KEY] = boost::assign::list_of(0x05)(0x55)(0xCF)(0xB1).convert_to_container >(); + base58Prefixes[EXT_SECRET_KEY] = boost::assign::list_of(0x05)(0x55)(0xD4)(0x7A).convert_to_container >(); // Testnet simplicity BIP44 coin type is '1' (All coin's testnet default) base58Prefixes[EXT_COIN_TYPE] = boost::assign::list_of(0x80)(0x00)(0x00)(0x01).convert_to_container >(); @@ -393,7 +399,7 @@ class CTestNetParams : public CMainParams nBudgetCycleBlocks = 24 * 6 * 60 / nTargetSpacing; //!< Ten cycles per day on testnet strSporkKey = "03b95000b2b06e391c058ea14d47ac3c525753c68460864f254ada5a63e27a8134"; strSporkKeyOld = "03b95000b2b06e391c058ea14d47ac3c525753c68460864f254ada5a63e27a8134"; - strObfuscationPoolDummyAddress = "y6S5YPwPCXi2oemSRJGitNPwPjcFJfwbED"; + strObfuscationPoolDummyAddress = "yCQuB8kvJYRJyRFDJXrzVgVyfe2E68S8jb"; nStartMasternodePayments = 1420837558; //Fri, 09 Jan 2015 21:05:58 GMT nBudget_Fee_Confirmations = 3; // Number of confirmations for the finalization fee. We have to make this very short // here because we only have a 8 block finalization window on testnet @@ -448,15 +454,27 @@ class CRegTestParams : public CTestNetParams nPublicZCSpends = 350; //! Modify the regtest genesis block so the timestamp is valid for a later start. - genesis.nTime = 1454124731; - genesis.nBits = 0x207fffff; - genesis.nNonce = 12345; + genesis.nTime = 1574924400; + genesis.nBits = 0x1f00ffff; + genesis.nNonce = 164084; + + uint256 hashTarget = uint256().SetCompact(genesis.nBits); + /*while (true) { + uint256 hash = genesis.GetPoWHash(); + if (hash <= hashTarget) { + // Found a solution + printf("genesis block found\n hash: %s\n target: %s\n nonce: %i\n", hash.ToString().c_str(), hashTarget.ToString().c_str(), genesis.nNonce); + break; + } + genesis.nNonce += 1; + }*/ + assert(genesis.GetPoWHash() <= hashTarget); hashGenesisBlock = genesis.GetHash(); //printf("Merkle hash reg: %s\n", genesis.hashMerkleRoot.ToString().c_str()); //printf("Block hash reg: %s\n", hashGenesisBlock.ToString().c_str()); assert(genesis.hashMerkleRoot == uint256("0x40bdd3d5ae84b91a71190094a82948400eb3356e87c5376b64d79509cf552d84")); - assert(hashGenesisBlock == uint256("0xf38094d1d2ccb97e06248813ee887b48ee7326d68cb663ebf59eeca7a0bde2c4")); + assert(hashGenesisBlock == uint256("0x000037a145d6812571b0c413d868a43146d7159056afe7a06b344e9ee0de39fc")); vFixedSeeds.clear(); //! Testnet mode doesn't have any fixed seeds. vSeeds.clear(); //! Testnet mode doesn't have any DNS seeds. diff --git a/src/chainparams.h b/src/chainparams.h index 3d0c640..bf43bd5 100644 --- a/src/chainparams.h +++ b/src/chainparams.h @@ -63,7 +63,7 @@ class CChainParams const CBlock& GenesisBlock() const { return genesis; } /** Make miner wait to have peers to avoid wasting work */ bool MiningRequiresPeers() const { return fMiningRequiresPeers; } - /** Headers first syncing is disabled */ + /** Headers first syncing is enabled */ bool HeadersFirstSyncingActive() const { return fHeadersFirstSyncingActive; }; /** Default value for -checkmempool and -checkblockindex argument */ bool DefaultConsistencyChecks() const { return fDefaultConsistencyChecks; } @@ -137,7 +137,7 @@ class CChainParams /** Height or Time Based Activations **/ int ModifierUpgradeBlock() const { return nModifierUpdateBlock; } int WALLET_UPGRADE_BLOCK() const { return nMandatoryUpgradeBlock; } - int WALLET_UPGRADE_VERSION() const { return nUpgradeBlockVersion; } + uint32_t WALLET_UPGRADE_VERSION() const { return nUpgradeBlockVersion; } uint32_t BadScryptDiffTimeStart() const { return nBadScryptDiffTimeStart; } uint32_t BadScryptDiffTimeEnd() const { return nBadScryptDiffTimeEnd; } int Zerocoin_StartHeight() const { return nZerocoinStartHeight; } @@ -175,7 +175,7 @@ class CChainParams int64_t nTargetTimespan; int64_t nTargetSpacing; int nMandatoryUpgradeBlock; - int nUpgradeBlockVersion; + uint32_t nUpgradeBlockVersion; uint32_t nBadScryptDiffTimeStart; uint32_t nBadScryptDiffTimeEnd; int nMasternodeCountDrift; diff --git a/src/crypter.cpp b/src/crypter.cpp index 0c3253e..fd2e8d7 100644 --- a/src/crypter.cpp +++ b/src/crypter.cpp @@ -29,7 +29,7 @@ bool CCrypter::SetKeyFromPassphrase(const SecureString& strKeyData, const std::v if (nDerivationMethod == 1) { // Passphrase conversion - uint256 scryptHash = scrypt_salted_multiround_hash((const void*)strKeyData.c_str(), strKeyData.size(), &chSalt[0], 8, nRounds); + uint256 scryptHash = scrypt_salted_multiround_hash((const void*)strKeyData.c_str(), strKeyData.size(), &chSalt[0], WALLET_CRYPTO_SALT_SIZE, nRounds); i = EVP_BytesToKey(EVP_aes_256_cbc(), EVP_sha512(), &chSalt[0], (unsigned char *)&scryptHash, sizeof scryptHash, nRounds, chKey, chIV); memory_cleanse(&scryptHash, sizeof scryptHash); diff --git a/src/crypto/scrypt-arm.S b/src/crypto/scrypt-arm.S index 7e38639..528181f 100644 --- a/src/crypto/scrypt-arm.S +++ b/src/crypto/scrypt-arm.S @@ -11,7 +11,7 @@ #include "config/simplicity-config.h" #endif -#if defined(__arm__) && defined(__APCS_32__) +#if /*defined(USE_ASM) &&*/ defined(__arm__) && defined(__APCS_32__) #if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \ diff --git a/src/crypto/scrypt-armv8.c b/src/crypto/scrypt-armv8.c new file mode 100644 index 0000000..ec4e045 --- /dev/null +++ b/src/crypto/scrypt-armv8.c @@ -0,0 +1,1213 @@ +/* + * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include +#include +#include +#include + +#if defined(__aarch64__) + +#include +#include + +static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) +{ + uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15; + int i; + + x00 = (B[ 0] ^= Bx[ 0]); + x01 = (B[ 1] ^= Bx[ 1]); + x02 = (B[ 2] ^= Bx[ 2]); + x03 = (B[ 3] ^= Bx[ 3]); + x04 = (B[ 4] ^= Bx[ 4]); + x05 = (B[ 5] ^= Bx[ 5]); + x06 = (B[ 6] ^= Bx[ 6]); + x07 = (B[ 7] ^= Bx[ 7]); + x08 = (B[ 8] ^= Bx[ 8]); + x09 = (B[ 9] ^= Bx[ 9]); + x10 = (B[10] ^= Bx[10]); + x11 = (B[11] ^= Bx[11]); + x12 = (B[12] ^= Bx[12]); + x13 = (B[13] ^= Bx[13]); + x14 = (B[14] ^= Bx[14]); + x15 = (B[15] ^= Bx[15]); + for (i = 0; i < 8; i += 2) { +#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns. */ + x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); + x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7); + + x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); + x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9); + + x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); + x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13); + + x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); + x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18); + + /* Operate on rows. */ + x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); + x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7); + + x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); + x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9); + + x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); + x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13); + + x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); + x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18); +#undef R + } + B[ 0] += x00; + B[ 1] += x01; + B[ 2] += x02; + B[ 3] += x03; + B[ 4] += x04; + B[ 5] += x05; + B[ 6] += x06; + B[ 7] += x07; + B[ 8] += x08; + B[ 9] += x09; + B[10] += x10; + B[11] += x11; + B[12] += x12; + B[13] += x13; + B[14] += x14; + B[15] += x15; +} + +static inline void xor_salsa8_prefetch(uint32_t B[16], const uint32_t Bx[16], uint32_t* V, uint32_t N) +{ + uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15; + int i; + + x00 = (B[ 0] ^= Bx[ 0]); + x01 = (B[ 1] ^= Bx[ 1]); + x02 = (B[ 2] ^= Bx[ 2]); + x03 = (B[ 3] ^= Bx[ 3]); + x04 = (B[ 4] ^= Bx[ 4]); + x05 = (B[ 5] ^= Bx[ 5]); + x06 = (B[ 6] ^= Bx[ 6]); + x07 = (B[ 7] ^= Bx[ 7]); + x08 = (B[ 8] ^= Bx[ 8]); + x09 = (B[ 9] ^= Bx[ 9]); + x10 = (B[10] ^= Bx[10]); + x11 = (B[11] ^= Bx[11]); + x12 = (B[12] ^= Bx[12]); + x13 = (B[13] ^= Bx[13]); + x14 = (B[14] ^= Bx[14]); + x15 = (B[15] ^= Bx[15]); + for (i = 0; i < 8; i += 2) { +#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns. */ + x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); + x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7); + + x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); + x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9); + + x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); + x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13); + + x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); + x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18); + + /* Operate on rows. */ + x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); + x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7); + + x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); + x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9); + + x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); + x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13); + + x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); + x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18); +#undef R + } + B[ 0] += x00; + uint32_t one = 32 * (B[0] & (N - 1)); + __builtin_prefetch(&V[one + 0]); + __builtin_prefetch(&V[one + 8]); + __builtin_prefetch(&V[one + 16]); + __builtin_prefetch(&V[one + 24]); + asm("":::"memory"); + B[ 1] += x01; + B[ 2] += x02; + B[ 3] += x03; + B[ 4] += x04; + B[ 5] += x05; + B[ 6] += x06; + B[ 7] += x07; + B[ 8] += x08; + B[ 9] += x09; + B[10] += x10; + B[11] += x11; + B[12] += x12; + B[13] += x13; + B[14] += x14; + B[15] += x15; +} + +void scrypt_core(uint32_t *X, uint32_t *V, int N) +{ + int i; + + for (i = 0; i < N; i++) { + memcpy(&V[i * 32], X, 128); + xor_salsa8(&X[0], &X[16]); + xor_salsa8(&X[16], &X[0]); + } + for (i = 0; i < N; i++) { + uint32_t j = 32 * (X[16] & (N - 1)); + for (uint8_t k = 0; k < 32; k++) + X[k] ^= V[j + k]; + xor_salsa8(&X[0], &X[16]); + xor_salsa8_prefetch(&X[16], &X[0], V, N); + } +} + +static inline void scrypt_shuffle(uint32_t B[16]) +{ + uint32_t x0 = B[0]; + uint32_t x1 = B[1]; + uint32_t x2 = B[2]; + uint32_t x3 = B[3]; + uint32_t x4 = B[4]; + uint32_t x5 = B[5]; + uint32_t x6 = B[6]; + uint32_t x7 = B[7]; + uint32_t x8 = B[8]; + uint32_t x9 = B[9]; + uint32_t x10 = B[10]; + uint32_t x11 = B[11]; + uint32_t x12 = B[12]; + uint32_t x13 = B[13]; + uint32_t x14 = B[14]; + uint32_t x15 = B[15]; + + B[0] = x0; B[1] = x5; B[2] = x10; B[3] = x15; + B[4] = x12; B[5] = x1; B[6] = x6; B[7] = x11; + B[8] = x8; B[9] = x13; B[10] = x2; B[11] = x7; + B[12] = x4; B[13] = x9; B[14] = x14; B[15] = x3; +} + +void scrypt_core_3way(uint32_t *X, uint32_t *V, int N) +{ + uint32_t* W = V; + + scrypt_shuffle(&X[0 + 0]); + scrypt_shuffle(&X[16 + 0]); + scrypt_shuffle(&X[0 + 32]); + scrypt_shuffle(&X[16 + 32]); + scrypt_shuffle(&X[0 + 64]); + scrypt_shuffle(&X[16 + 64]); + + uint32x4x4_t q_a, q_b, q_c, q_tmp; + uint32x4x4_t ba_a, bb_a, bc_a, ba_b, bb_b, bc_b; + + ba_a.val[0] = vld1q_u32(&X[( 0) / 4]); + ba_a.val[1] = vld1q_u32(&X[(16) / 4]); + ba_a.val[2] = vld1q_u32(&X[(32) / 4]); + ba_a.val[3] = vld1q_u32(&X[(48) / 4]); + ba_b.val[0] = vld1q_u32(&X[(0 + 64 + 0) / 4]); + ba_b.val[1] = vld1q_u32(&X[(0 + 64 + 16) / 4]); + ba_b.val[2] = vld1q_u32(&X[(0 + 64 + 32) / 4]); + ba_b.val[3] = vld1q_u32(&X[(0 + 64 + 48) / 4]); + + bb_a.val[0] = vld1q_u32(&X[(128 + 0) / 4]); + bb_a.val[1] = vld1q_u32(&X[(128 + 16) / 4]); + bb_a.val[2] = vld1q_u32(&X[(128 + 32) / 4]); + bb_a.val[3] = vld1q_u32(&X[(128 + 48) / 4]); + bb_b.val[0] = vld1q_u32(&X[(128 + 64 + 0) / 4]); + bb_b.val[1] = vld1q_u32(&X[(128 + 64 + 16) / 4]); + bb_b.val[2] = vld1q_u32(&X[(128 + 64 + 32) / 4]); + bb_b.val[3] = vld1q_u32(&X[(128 + 64 + 48) / 4]); + + bc_a.val[0] = vld1q_u32(&X[(256 + 0) / 4]); + bc_a.val[1] = vld1q_u32(&X[(256 + 16) / 4]); + bc_a.val[2] = vld1q_u32(&X[(256 + 32) / 4]); + bc_a.val[3] = vld1q_u32(&X[(256 + 48) / 4]); + bc_b.val[0] = vld1q_u32(&X[(256 + 64 + 0) / 4]); + bc_b.val[1] = vld1q_u32(&X[(256 + 64 + 16) / 4]); + bc_b.val[2] = vld1q_u32(&X[(256 + 64 + 32) / 4]); + bc_b.val[3] = vld1q_u32(&X[(256 + 64 + 48) / 4]); + + // prep + + vst1q_u32(&V[( 0) / 4], ba_a.val[0]); + vst1q_u32(&V[(16) / 4], ba_a.val[1]); + vst1q_u32(&V[(32) / 4], ba_a.val[2]); + vst1q_u32(&V[(48) / 4], ba_a.val[3]); + vst1q_u32(&V[(64) / 4], ba_b.val[0]); + vst1q_u32(&V[(80) / 4], ba_b.val[1]); + vst1q_u32(&V[(96) / 4], ba_b.val[2]); + vst1q_u32(&V[(112) / 4], ba_b.val[3]); + + vst1q_u32(&V[(128 + 0) / 4], bb_a.val[0]); + vst1q_u32(&V[(128 + 16) / 4], bb_a.val[1]); + vst1q_u32(&V[(128 + 32) / 4], bb_a.val[2]); + vst1q_u32(&V[(128 + 48) / 4], bb_a.val[3]); + vst1q_u32(&V[(128 + 64) / 4], bb_b.val[0]); + vst1q_u32(&V[(128 + 80) / 4], bb_b.val[1]); + vst1q_u32(&V[(128 + 96) / 4], bb_b.val[2]); + vst1q_u32(&V[(128 + 112) / 4], bb_b.val[3]); + + vst1q_u32(&V[(256 + 0) / 4], bc_a.val[0]); + vst1q_u32(&V[(256 + 16) / 4], bc_a.val[1]); + vst1q_u32(&V[(256 + 32) / 4], bc_a.val[2]); + vst1q_u32(&V[(256 + 48) / 4], bc_a.val[3]); + vst1q_u32(&V[(256 + 64) / 4], bc_b.val[0]); + vst1q_u32(&V[(256 + 80) / 4], bc_b.val[1]); + vst1q_u32(&V[(256 + 96) / 4], bc_b.val[2]); + vst1q_u32(&V[(256 + 112) / 4],bc_b.val[3]); + + V += 96; + + for (int n = 0; n < N; n++) + { + // loop 1 part a + q_a.val[0] = veorq_u32(ba_b.val[0], ba_a.val[0]); + q_a.val[1] = veorq_u32(ba_b.val[1], ba_a.val[1]); + q_a.val[2] = veorq_u32(ba_b.val[2], ba_a.val[2]); + q_a.val[3] = veorq_u32(ba_b.val[3], ba_a.val[3]); + + q_b.val[0] = veorq_u32(bb_b.val[0], bb_a.val[0]); + q_b.val[1] = veorq_u32(bb_b.val[1], bb_a.val[1]); + q_b.val[2] = veorq_u32(bb_b.val[2], bb_a.val[2]); + q_b.val[3] = veorq_u32(bb_b.val[3], bb_a.val[3]); + + q_c.val[0] = veorq_u32(bc_b.val[0], bc_a.val[0]); + q_c.val[1] = veorq_u32(bc_b.val[1], bc_a.val[1]); + q_c.val[2] = veorq_u32(bc_b.val[2], bc_a.val[2]); + q_c.val[3] = veorq_u32(bc_b.val[3], bc_a.val[3]); + + ba_a = q_a; + bb_a = q_b; + bc_a = q_c; + + for (int i = 0; i < 4; i ++) + { + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 3); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 3); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 1); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 1); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 1); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 3); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 3); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 1); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 1); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 1); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + } + ba_a.val[0] = vaddq_u32(ba_a.val[0], q_a.val[0]); + ba_a.val[1] = vaddq_u32(ba_a.val[1], q_a.val[1]); + ba_a.val[2] = vaddq_u32(ba_a.val[2], q_a.val[2]); + ba_a.val[3] = vaddq_u32(ba_a.val[3], q_a.val[3]); + + q_a = ba_a; + + bb_a.val[0] = vaddq_u32(bb_a.val[0], q_b.val[0]); + bb_a.val[1] = vaddq_u32(bb_a.val[1], q_b.val[1]); + bb_a.val[2] = vaddq_u32(bb_a.val[2], q_b.val[2]); + bb_a.val[3] = vaddq_u32(bb_a.val[3], q_b.val[3]); + + q_b = bb_a; + + bc_a.val[0] = vaddq_u32(bc_a.val[0], q_c.val[0]); + bc_a.val[1] = vaddq_u32(bc_a.val[1], q_c.val[1]); + bc_a.val[2] = vaddq_u32(bc_a.val[2], q_c.val[2]); + bc_a.val[3] = vaddq_u32(bc_a.val[3], q_c.val[3]); + + q_c = bc_a; + + for (int i = 0; i < 4; i++) + { + vst1q_u32(&V[ (i * 4) ], ba_a.val[i]); + vst1q_u32(&V[(32 + (i * 4))], bb_a.val[i]); + vst1q_u32(&V[(64 + (i * 4))], bc_a.val[i]); + } + + // loop 1 part b + + q_a.val[0] = veorq_u32(ba_b.val[0], q_a.val[0]); + q_a.val[1] = veorq_u32(ba_b.val[1], q_a.val[1]); + q_a.val[2] = veorq_u32(ba_b.val[2], q_a.val[2]); + q_a.val[3] = veorq_u32(ba_b.val[3], q_a.val[3]); + ba_b = q_a; + + q_b.val[0] = veorq_u32(bb_b.val[0], q_b.val[0]); + q_b.val[1] = veorq_u32(bb_b.val[1], q_b.val[1]); + q_b.val[2] = veorq_u32(bb_b.val[2], q_b.val[2]); + q_b.val[3] = veorq_u32(bb_b.val[3], q_b.val[3]); + bb_b = q_b; + + q_c.val[0] = veorq_u32(bc_b.val[0], q_c.val[0]); + q_c.val[1] = veorq_u32(bc_b.val[1], q_c.val[1]); + q_c.val[2] = veorq_u32(bc_b.val[2], q_c.val[2]); + q_c.val[3] = veorq_u32(bc_b.val[3], q_c.val[3]); + bc_b = q_c; + + + for (int i = 0; i < 4; i ++) + { + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 3); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 3); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 1); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 1); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 1); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 3); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 3); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 1); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 1); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 1); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + } + + ba_b.val[0] = vaddq_u32(q_a.val[0], ba_b.val[0]); + ba_b.val[1] = vaddq_u32(q_a.val[1], ba_b.val[1]); + ba_b.val[2] = vaddq_u32(q_a.val[2], ba_b.val[2]); + ba_b.val[3] = vaddq_u32(q_a.val[3], ba_b.val[3]); + bb_b.val[0] = vaddq_u32(q_b.val[0], bb_b.val[0]); + bb_b.val[1] = vaddq_u32(q_b.val[1], bb_b.val[1]); + bb_b.val[2] = vaddq_u32(q_b.val[2], bb_b.val[2]); + bb_b.val[3] = vaddq_u32(q_b.val[3], bb_b.val[3]); + bc_b.val[0] = vaddq_u32(q_c.val[0], bc_b.val[0]); + bc_b.val[1] = vaddq_u32(q_c.val[1], bc_b.val[1]); + bc_b.val[2] = vaddq_u32(q_c.val[2], bc_b.val[2]); + bc_b.val[3] = vaddq_u32(q_c.val[3], bc_b.val[3]); + for (int i = 0; i < 4; i++) + { + vst1q_u32(&V[( 16 + (i * 4))], ba_b.val[i]); + vst1q_u32(&V[(32 + 16 + (i * 4))], bb_b.val[i]); + vst1q_u32(&V[(64 + 16 + (i * 4))], bc_b.val[i]); + } + V += 96; + } + V = W; + + // loop 2 + + uint32x4x4_t x; + + uint32_t one = 32 * (3 * (ba_b.val[0][0] & (N - 1)) + 0); + uint32_t two = 32 * (3 * (bb_b.val[0][0] & (N - 1)) + 1); + uint32_t three = 32 * (3 * (bc_b.val[0][0] & (N - 1)) + 2); + x.val[0] = vld1q_u32(&W[one + 0]); + x.val[1] = vld1q_u32(&W[one + 4]); + x.val[2] = vld1q_u32(&W[one + 8]); + x.val[3] = vld1q_u32(&W[one + 12]); + + for (int n = 0; n < N; n++) + { + // loop 2 part a + + ba_a.val[0] = veorq_u32(ba_a.val[0], x.val[0]); + x.val[0] = vld1q_u32(&W[one + 16 + 0]); + ba_a.val[1] = veorq_u32(ba_a.val[1], x.val[1]); + x.val[1] = vld1q_u32(&W[one + 16 + 4]); + ba_a.val[2] = veorq_u32(ba_a.val[2], x.val[2]); + x.val[2] = vld1q_u32(&W[one + 16 + 8]); + ba_a.val[3] = veorq_u32(ba_a.val[3], x.val[3]); + + ba_b.val[0] = veorq_u32(ba_b.val[0], x.val[0]); + ba_b.val[1] = veorq_u32(ba_b.val[1], x.val[1]); + x.val[3] = vld1q_u32(&W[one + 16 + 12]); + ba_b.val[2] = veorq_u32(ba_b.val[2], x.val[2]); + ba_b.val[3] = veorq_u32(ba_b.val[3], x.val[3]); + x.val[0] = vld1q_u32(&W[two + 0]); + q_a.val[0] = veorq_u32(ba_b.val[0], ba_a.val[0]); + q_a.val[1] = veorq_u32(ba_b.val[1], ba_a.val[1]); + x.val[1] = vld1q_u32(&W[two + 4]); + q_a.val[2] = veorq_u32(ba_b.val[2], ba_a.val[2]); + q_a.val[3] = veorq_u32(ba_b.val[3], ba_a.val[3]); + x.val[2] = vld1q_u32(&W[two + 8]); + ba_a = q_a; + + x.val[3] = vld1q_u32(&W[two + 12]); + + bb_a.val[0] = veorq_u32(bb_a.val[0], x.val[0]); + x.val[0] = vld1q_u32(&W[two + 16 + 0]); + bb_a.val[1] = veorq_u32(bb_a.val[1], x.val[1]); + x.val[1] = vld1q_u32(&W[two + 16 + 4]); + bb_a.val[2] = veorq_u32(bb_a.val[2], x.val[2]); + x.val[2] = vld1q_u32(&W[two + 16 + 8]); + bb_a.val[3] = veorq_u32(bb_a.val[3], x.val[3]); + bb_b.val[0] = veorq_u32(bb_b.val[0], x.val[0]); + x.val[3] = vld1q_u32(&W[two + 16 + 12]); + bb_b.val[1] = veorq_u32(bb_b.val[1], x.val[1]); + x.val[0] = vld1q_u32(&W[three + 0]); + bb_b.val[2] = veorq_u32(bb_b.val[2], x.val[2]); + bb_b.val[3] = veorq_u32(bb_b.val[3], x.val[3]); + x.val[1] = vld1q_u32(&W[three + 4]); + q_b.val[0] = veorq_u32(bb_b.val[0], bb_a.val[0]); + q_b.val[1] = veorq_u32(bb_b.val[1], bb_a.val[1]); + x.val[2] = vld1q_u32(&W[three + 8]); + q_b.val[2] = veorq_u32(bb_b.val[2], bb_a.val[2]); + q_b.val[3] = veorq_u32(bb_b.val[3], bb_a.val[3]); + x.val[3] = vld1q_u32(&W[three + 12]); + bb_a = q_b; + + bc_a.val[0] = veorq_u32(bc_a.val[0], x.val[0]); + x.val[0] = vld1q_u32(&W[three + 16 + 0]); + bc_a.val[1] = veorq_u32(bc_a.val[1], x.val[1]); + x.val[1] = vld1q_u32(&W[three + 16 + 4]); + bc_a.val[2] = veorq_u32(bc_a.val[2], x.val[2]); + x.val[2] = vld1q_u32(&W[three + 16 + 8]); + bc_a.val[3] = veorq_u32(bc_a.val[3], x.val[3]); + bc_b.val[0] = veorq_u32(bc_b.val[0], x.val[0]); + x.val[3] = vld1q_u32(&W[three + 16 + 12]); + bc_b.val[1] = veorq_u32(bc_b.val[1], x.val[1]); + bc_b.val[2] = veorq_u32(bc_b.val[2], x.val[2]); + bc_b.val[3] = veorq_u32(bc_b.val[3], x.val[3]); + q_c.val[0] = veorq_u32(bc_b.val[0], bc_a.val[0]); + q_c.val[1] = veorq_u32(bc_b.val[1], bc_a.val[1]); + q_c.val[2] = veorq_u32(bc_b.val[2], bc_a.val[2]); + q_c.val[3] = veorq_u32(bc_b.val[3], bc_a.val[3]); + bc_a = q_c; + + for (int i = 0; i < 4; i++) + { + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 3); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 3); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 1); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 1); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 1); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 3); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 3); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 1); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 1); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 1); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + } + ba_a.val[0] = vaddq_u32(ba_a.val[0], q_a.val[0]); + ba_a.val[1] = vaddq_u32(ba_a.val[1], q_a.val[1]); + ba_a.val[2] = vaddq_u32(ba_a.val[2], q_a.val[2]); + ba_a.val[3] = vaddq_u32(ba_a.val[3], q_a.val[3]); + + q_a = ba_a; + + bb_a.val[0] = vaddq_u32(bb_a.val[0], q_b.val[0]); + bb_a.val[1] = vaddq_u32(bb_a.val[1], q_b.val[1]); + bb_a.val[2] = vaddq_u32(bb_a.val[2], q_b.val[2]); + bb_a.val[3] = vaddq_u32(bb_a.val[3], q_b.val[3]); + q_b = bb_a; + + bc_a.val[0] = vaddq_u32(bc_a.val[0], q_c.val[0]); + bc_a.val[1] = vaddq_u32(bc_a.val[1], q_c.val[1]); + bc_a.val[2] = vaddq_u32(bc_a.val[2], q_c.val[2]); + bc_a.val[3] = vaddq_u32(bc_a.val[3], q_c.val[3]); + q_c = bc_a; + + // loop 2 b + + q_a.val[0] = veorq_u32(ba_b.val[0], q_a.val[0]); + q_a.val[1] = veorq_u32(ba_b.val[1], q_a.val[1]); + q_a.val[2] = veorq_u32(ba_b.val[2], q_a.val[2]); + q_a.val[3] = veorq_u32(ba_b.val[3], q_a.val[3]); + ba_b = q_a; + + q_b.val[0] = veorq_u32(bb_b.val[0], q_b.val[0]); + q_b.val[1] = veorq_u32(bb_b.val[1], q_b.val[1]); + q_b.val[2] = veorq_u32(bb_b.val[2], q_b.val[2]); + q_b.val[3] = veorq_u32(bb_b.val[3], q_b.val[3]); + bb_b = q_b; + + q_c.val[0] = veorq_u32(bc_b.val[0], q_c.val[0]); + q_c.val[1] = veorq_u32(bc_b.val[1], q_c.val[1]); + q_c.val[2] = veorq_u32(bc_b.val[2], q_c.val[2]); + q_c.val[3] = veorq_u32(bc_b.val[3], q_c.val[3]); + bc_b = q_c; + + + for (int i = 0; i < 3; i++) + { + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 3); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 3); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 1); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 1); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 1); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 3); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 3); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 3); + + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 1); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 1); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 1); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + } + { + //1 + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + //2 + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + //3 + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 3); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 3); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 3); + //4 + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + //5 + q_tmp.val[0] = vaddq_u32(q_a.val[0], q_a.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 7); + q_tmp.val[2] = vaddq_u32(q_b.val[0], q_b.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 25); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 1); + q_a.val[1] = veorq_u32(q_tmp.val[1], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 7); + q_tmp.val[3] = vaddq_u32(q_c.val[0], q_c.val[3]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 25); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 1); + q_b.val[1] = veorq_u32(q_tmp.val[1], q_b.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 7); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 25); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 1); + q_c.val[1] = veorq_u32(q_tmp.val[1], q_c.val[1]); + //6 + q_tmp.val[0] = vaddq_u32(q_a.val[1], q_a.val[0]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 9); + q_tmp.val[2] = vaddq_u32(q_b.val[1], q_b.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 23); + q_a.val[2] = veorq_u32(q_tmp.val[1], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 9); + q_tmp.val[3] = vaddq_u32(q_c.val[1], q_c.val[0]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 23); + q_b.val[2] = veorq_u32(q_tmp.val[1], q_b.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 9); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 23); + q_c.val[2] = veorq_u32(q_tmp.val[1], q_c.val[2]); + //7 + q_tmp.val[0] = vaddq_u32(q_a.val[2], q_a.val[1]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 13); + q_tmp.val[2] = vaddq_u32(q_b.val[2], q_b.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 19); + q_a.val[3] = veorq_u32(q_tmp.val[1], q_a.val[3]); + q_a.val[1] = vextq_u32(q_a.val[1], q_a.val[1], 3); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 13); + q_tmp.val[3] = vaddq_u32(q_c.val[2], q_c.val[1]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 19); + q_b.val[3] = veorq_u32(q_tmp.val[1], q_b.val[3]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 13); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 19); + q_c.val[3] = veorq_u32(q_tmp.val[1], q_c.val[3]); + q_b.val[1] = vextq_u32(q_b.val[1], q_b.val[1], 3); + q_c.val[1] = vextq_u32(q_c.val[1], q_c.val[1], 3); + + //8 + q_tmp.val[0] = vaddq_u32(q_a.val[3], q_a.val[2]); + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[0], 18); + q_tmp.val[2] = vaddq_u32(q_b.val[3], q_b.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[0], 14); + q_a.val[0] = veorq_u32(q_tmp.val[1], q_a.val[0]); + ba_b.val[0] = vaddq_u32(q_a.val[0], ba_b.val[0]); + one = 32 * (3 * (ba_b.val[0][0] & (N - 1)) + 0); + __builtin_prefetch(&W[one + 0]); + __builtin_prefetch(&W[one + 8]); + __builtin_prefetch(&W[one + 16]); + __builtin_prefetch(&W[one + 24]); + + q_a.val[2] = vextq_u32(q_a.val[2], q_a.val[2], 2); + q_b.val[2] = vextq_u32(q_b.val[2], q_b.val[2], 2); + + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[2], 18); + q_tmp.val[3] = vaddq_u32(q_c.val[3], q_c.val[2]); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[2], 14); + q_c.val[2] = vextq_u32(q_c.val[2], q_c.val[2], 2); + q_b.val[3] = vextq_u32(q_b.val[3], q_b.val[3], 1); + q_b.val[0] = veorq_u32(q_tmp.val[1], q_b.val[0]); + bb_b.val[0] = vaddq_u32(q_b.val[0], bb_b.val[0]); + two = 32 * (3 * (bb_b.val[0][0] & (N - 1)) + 1); + __builtin_prefetch(&W[two + 0]); + __builtin_prefetch(&W[two + 8]); + __builtin_prefetch(&W[two + 16]); + __builtin_prefetch(&W[two + 24]); + + q_tmp.val[1] = vshlq_n_u32(q_tmp.val[3], 18); + q_tmp.val[1] = vsriq_n_u32(q_tmp.val[1], q_tmp.val[3], 14); + q_a.val[3] = vextq_u32(q_a.val[3], q_a.val[3], 1); + q_c.val[3] = vextq_u32(q_c.val[3], q_c.val[3], 1); + q_c.val[0] = veorq_u32(q_tmp.val[1], q_c.val[0]); + bc_b.val[0] = vaddq_u32(q_c.val[0], bc_b.val[0]); + three = 32 * (3 * (bc_b.val[0][0] & (N - 1)) + 2); + __builtin_prefetch(&W[three + 0]); + __builtin_prefetch(&W[three + 8]); + __builtin_prefetch(&W[three + 16]); + __builtin_prefetch(&W[three + 24]); + } + + x.val[0] = vld1q_u32(&W[one + 0]); + ba_b.val[1] = vaddq_u32(q_a.val[1], ba_b.val[1]); + ba_b.val[2] = vaddq_u32(q_a.val[2], ba_b.val[2]); + ba_b.val[3] = vaddq_u32(q_a.val[3], ba_b.val[3]); + x.val[1] = vld1q_u32(&W[one + 4]); + bb_b.val[1] = vaddq_u32(q_b.val[1], bb_b.val[1]); + bb_b.val[2] = vaddq_u32(q_b.val[2], bb_b.val[2]); + bb_b.val[3] = vaddq_u32(q_b.val[3], bb_b.val[3]); + x.val[2] = vld1q_u32(&W[one + 8]); + bc_b.val[1] = vaddq_u32(q_c.val[1], bc_b.val[1]); + bc_b.val[2] = vaddq_u32(q_c.val[2], bc_b.val[2]); + bc_b.val[3] = vaddq_u32(q_c.val[3], bc_b.val[3]); + x.val[3] = vld1q_u32(&W[one + 12]); + } + + vst1q_u32(&X[0], ba_a.val[0]); + vst1q_u32(&X[4], ba_a.val[1]); + vst1q_u32(&X[8], ba_a.val[2]); + vst1q_u32(&X[12], ba_a.val[3]); + vst1q_u32(&X[16 + 0], ba_b.val[0]); + vst1q_u32(&X[16 + 4], ba_b.val[1]); + vst1q_u32(&X[16 + 8], ba_b.val[2]); + vst1q_u32(&X[16 + 12], ba_b.val[3]); + + vst1q_u32(&X[32 + 0], bb_a.val[0]); + vst1q_u32(&X[32 + 4], bb_a.val[1]); + vst1q_u32(&X[32 + 8], bb_a.val[2]); + vst1q_u32(&X[32 + 12], bb_a.val[3]); + vst1q_u32(&X[32 + 16 + 0], bb_b.val[0]); + vst1q_u32(&X[32 + 16 + 4], bb_b.val[1]); + vst1q_u32(&X[32 + 16 + 8], bb_b.val[2]); + vst1q_u32(&X[32 + 16 + 12], bb_b.val[3]); + + vst1q_u32(&X[64 + 0], bc_a.val[0]); + vst1q_u32(&X[64 + 4], bc_a.val[1]); + vst1q_u32(&X[64 + 8], bc_a.val[2]); + vst1q_u32(&X[64 + 12], bc_a.val[3]); + vst1q_u32(&X[64 + 16 + 0], bc_b.val[0]); + vst1q_u32(&X[64 + 16 + 4], bc_b.val[1]); + vst1q_u32(&X[64 + 16 + 8], bc_b.val[2]); + vst1q_u32(&X[64 + 16 + 12], bc_b.val[3]); + + scrypt_shuffle(&X[0 + 0]); + scrypt_shuffle(&X[16 + 0]); + scrypt_shuffle(&X[0 + 32]); + scrypt_shuffle(&X[16 + 32]); + scrypt_shuffle(&X[0 + 64]); + scrypt_shuffle(&X[16 + 64]); +} +#endif diff --git a/src/crypto/scrypt-x64.S b/src/crypto/scrypt-x64.S index 11afd15..31dd2e2 100644 --- a/src/crypto/scrypt-x64.S +++ b/src/crypto/scrypt-x64.S @@ -38,7 +38,7 @@ .section .note.GNU-stack,"",%progbits #endif -#if defined(__x86_64__) +#if /*defined(USE_ASM) &&*/ defined(__x86_64__) .text .p2align 6 @@ -2224,9 +2224,9 @@ scrypt_core_xmm_loop2: ret -#if defined(ENABLE_AVX2) +#if defined(ENABLE_AVX) -#endif /* USE_AVX */ +#endif /* ENABLE_AVX */ .text .p2align 6 @@ -2258,7 +2258,7 @@ _scrypt_core_3way: subq $392, %rsp -#if !defined(ENABLE_AVX2) +#if !defined(ENABLE_AVX) jmp scrypt_core_3way_xmm #else /* Check for AVX and OSXSAVE support */ @@ -2273,7 +2273,7 @@ _scrypt_core_3way: andl $0x00000006, %eax cmpl $0x00000006, %eax jne scrypt_core_3way_xmm -#if defined(USE_XOP) +#if defined(ENABLE_XOP) /* Check for XOP support */ movl $0x80000001, %eax cpuid @@ -5311,7 +5311,7 @@ scrypt_core_3way_avx_loop2: popq %rbx ret -#if defined(USE_XOP) +#if defined(ENABLE_XOP) .p2align 6 @@ -7575,8 +7575,8 @@ scrypt_core_3way_xop_loop2: popq %rbp popq %rbx ret -#endif /* USE_XOP */ -#endif /* USE_AVX */ +#endif /* ENABLE_XOP */ +#endif /* ENABLE_AVX */ @@ -14401,6 +14401,6 @@ scrypt_core_6way_avx2_loop2: popq %rbx ret -#endif /* USE_AVX2 */ +#endif /* ENABLE_AVX2 */ #endif diff --git a/src/crypto/scrypt-x86.S b/src/crypto/scrypt-x86.S index 9ad7519..4ab86ef 100644 --- a/src/crypto/scrypt-x86.S +++ b/src/crypto/scrypt-x86.S @@ -32,7 +32,7 @@ .section .note.GNU-stack,"",%progbits #endif -#if defined(__i386__) +#if /*defined(USE_ASM) &&*/ defined(__i386__) diff --git a/src/crypto/scrypt.cpp b/src/crypto/scrypt.cpp index 42be66b..0f05068 100644 --- a/src/crypto/scrypt.cpp +++ b/src/crypto/scrypt.cpp @@ -28,6 +28,7 @@ */ #include "crypto/scrypt.h" +#include "crypto/scrypt_opt.h" #include "uint256.h" #include "utilstrencodings.h" #include @@ -363,22 +364,27 @@ void scrypt(const char* pass, unsigned int pLen, const char* salt, unsigned int { //containers void* V0 = malloc(128 * r * N + 63); - void* XY0 = malloc(256 * r + 64 + 63); void* B1 = malloc(128 * r * p + 63); - uint8_t* B = (uint8_t *)(((uintptr_t)(B1) + 63) & ~ (uintptr_t)(63)); - uint32_t* V = (uint32_t *)(((uintptr_t)(V0) + 63) & ~ (uintptr_t)(63)); - uint32_t* XY = (uint32_t *)(((uintptr_t)(XY0) + 63) & ~ (uintptr_t)(63)); + uint8_t* B = (uint8_t *)(((uintptr_t)(B1) + 63) & ~(uintptr_t)(63)); + uint32_t* V = (uint32_t *)(((uintptr_t)(V0) + 63) & ~(uintptr_t)(63)); PBKDF2_SHA256((const uint8_t *)pass, pLen, (const uint8_t *)salt, sLen, 1, B, p * 128 * r); - for(unsigned int i = 0; i < p; i++) - { - SMix(&B[i * 128 * r], r, N, V, XY); + if (r == 1 && p == 1) { + scrypt_core((uint32_t*)B, V, N); + } else { + void* XY0 = malloc(256 * r + 64 + 63); + uint32_t* XY = (uint32_t *)(((uintptr_t)(XY0) + 63) & ~(uintptr_t)(63)); + + for (unsigned int i = 0; i < p; i++) { + SMix(&B[i * 128 * r], r, N, V, XY); + } + + free(XY0); } PBKDF2_SHA256((const uint8_t *)pass, pLen, B, p * 128 * r, 1, (uint8_t *)output, dkLen); free(V0); - free(XY0); free(B1); } diff --git a/src/crypto/scrypt2.cpp b/src/crypto/scrypt_opt.cpp similarity index 93% rename from src/crypto/scrypt2.cpp rename to src/crypto/scrypt_opt.cpp index e37fca4..e08badd 100644 --- a/src/crypto/scrypt2.cpp +++ b/src/crypto/scrypt_opt.cpp @@ -27,11 +27,40 @@ * online backup system. */ -#include "crypto/scrypt2.h" +#include "crypto/scrypt_opt.h" #include "compat.h" #include #include #include +#include + +static bool HAVE_AVX2 = false; + +#if defined(__x86_64__) +static inline void __attribute__((constructor)) check_avx2() +{ + int a, b, c, d, AVX_mask = (1<<28) | (1<<26) | (1<<27); + asm volatile("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(1), "c"(0)); //avx + if ((c & AVX_mask) == AVX_mask) { + printf("Have AVX\n"); + } else { + printf("Do not have AVX\n"); + } + + asm volatile("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(7), "c"(0)); //avx2 + if (b & (1<<5)) { + HAVE_AVX2 = true; + printf("Have AVX2\n"); + } else { + printf("Do not have AVX2\n"); + } +} +#elif defined(__ARM_NEON) +static inline void __attribute__((constructor)) display_neon() +{ + printf("Have NEON\n"); +} +#endif static const uint32_t sha256_h[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, @@ -44,7 +73,6 @@ void sha256_init(uint32_t *state) } #if defined(__i386__) - static const uint32_t sha256_k[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, @@ -564,22 +592,22 @@ static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate, #ifndef SCRYPT_MAX_WAYS #define SCRYPT_MAX_WAYS 1 -#define scrypt_best_throughput() 1 +#define SCRYPT_BEST_THROUGHPUT 1 #endif -unsigned char *scrypt_buffer_alloc(int N) +unsigned char *scrypt_buffer_alloc(int N, bool multiWay) { - return (unsigned char*)malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63); + return (unsigned char*)malloc((size_t)N * (multiWay ? (HAVE_AVX2 ? 2 * SCRYPT_MAX_WAYS : SCRYPT_MAX_WAYS) : 1) * 128 + 63); } -static void scrypt_N_1_1_256(const uint32_t *input, uint32_t *output, - uint32_t *midstate, unsigned char *scratchpad, int N) +static void scrypt_N_1_1_256(const uint32_t *input, + uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N) { uint32_t tstate[8], ostate[8]; uint32_t X[32] __attribute__((aligned(128))); uint32_t *V; - V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~(uintptr_t)(63)); memcpy(tstate, midstate, 32); HMAC_SHA256_80_init(input, tstate, ostate); @@ -601,7 +629,7 @@ static void scrypt_N_1_1_256_4way(const uint32_t *input, uint32_t *V; int i, k; - V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~(uintptr_t)(63)); for (i = 0; i < 20; i++) for (k = 0; k < 4; k++) @@ -637,7 +665,7 @@ static void scrypt_N_1_1_256_3way(const uint32_t *input, uint32_t X[3 * 32] __attribute__((aligned(64))); uint32_t *V; - V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~(uintptr_t)(63)); memcpy(tstate + 0, midstate, 32); memcpy(tstate + 8, midstate, 32); @@ -667,7 +695,7 @@ static void scrypt_N_1_1_256_12way(const uint32_t *input, uint32_t *V; int i, j, k; - V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~(uintptr_t)(63)); for (j = 0; j < 3; j++) for (i = 0; i < 20; i++) @@ -718,7 +746,7 @@ static void scrypt_N_1_1_256_24way(const uint32_t *input, uint32_t *V; int i, j, k; - V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~(uintptr_t)(63)); for (j = 0; j < 3; j++) for (i = 0; i < 20; i++) @@ -763,7 +791,7 @@ bool fulltest(const uint32_t *hash, const uint32_t *target) if (hash[i] > target[i]) { return false; } - if (hash[i] < target[i]) { + if (hash[i] <= target[i]) { return true; } } @@ -774,14 +802,14 @@ bool fulltest(const uint32_t *hash, const uint32_t *target) bool scrypt_N_1_1_256_multi(void *input, uint256 hashTarget, int *nHashesDone, unsigned char *scratchbuf, int N) { uint32_t pdata[20]; - uint32_t data[SCRYPT_MAX_WAYS * 20]; - uint32_t dhash[SCRYPT_MAX_WAYS * 8]; + uint32_t data[(2 * SCRYPT_MAX_WAYS) * 20]; + uint32_t dhash[(2 * SCRYPT_MAX_WAYS) * 8]; uint32_t midstate[8]; uint32_t n; - int throughput = scrypt_best_throughput(); + int throughput = (HAVE_AVX2 ? 2 * SCRYPT_BEST_THROUGHPUT : SCRYPT_BEST_THROUGHPUT); int i; - for (int i = 0; i < 20; i++) + for (i = 0; i < 20; i++) pdata[i] = be32dec(&((const uint32_t *)input)[i]); n = pdata[19]; @@ -836,7 +864,7 @@ bool scryptHash(const void *input, char *output, int N) { uint32_t midstate[8]; uint32_t data[20]; - unsigned char *scratchbuf = scrypt_buffer_alloc(N); + unsigned char *scratchbuf = scrypt_buffer_alloc(N, false); memset(output, 0, 32); if (!scratchbuf) diff --git a/src/crypto/scrypt2.h b/src/crypto/scrypt_opt.h similarity index 51% rename from src/crypto/scrypt2.h rename to src/crypto/scrypt_opt.h index ca1814f..5a370bf 100644 --- a/src/crypto/scrypt2.h +++ b/src/crypto/scrypt_opt.h @@ -1,5 +1,9 @@ -#ifndef SCRYPT2_H -#define SCRYPT2_H +// Copyright (c) 2018-2019 The Simplicity developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef SCRYPT_OPT_H +#define SCRYPT_OPT_H #if defined HAVE_CONFIG_H #include "config/simplicity-config.h" @@ -9,27 +13,27 @@ #include #include #include +#include #include "uint256.h" #include "utilstrencodings.h" -static const int SCRYPT_SCRATCHPAD_SIZE = 134218239; +//static const int SCRYPT_SCRATCHPAD_SIZE = 134218239; //static const int N = 1048576; -int scrypt_best_throughput(); - -bool scrypt_N_1_1_256_multi(void *input, uint256 hashTarget, int *nHashesDone, unsigned char *scratchbuf); +bool scrypt_N_1_1_256_multi(void *input, uint256 hashTarget, int *nHashesDone, unsigned char *scratchbuf, int N); bool scryptHash(const void *input, char *output, int N); -extern unsigned char *scrypt_buffer_alloc(int N); +extern unsigned char *scrypt_buffer_alloc(int N, bool multiWay = true); extern "C" void scrypt_core(uint32_t *X, uint32_t *V, int N); +void sha256_init(uint32_t *state); extern "C" void sha256_transform(uint32_t *state, const uint32_t *block, int swap); #if defined(__x86_64__) && !defined(ENABLE_AVX2) #define SCRYPT_MAX_WAYS 12 #define HAVE_SCRYPT_3WAY 1 #define HAVE_SHA256_4WAY 1 -#define scrypt_best_throughput() 3; +#define SCRYPT_BEST_THROUGHPUT 3 extern "C" int sha256_use_4way(); extern "C" void sha256_init_4way(uint32_t *state); extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); @@ -37,35 +41,63 @@ extern "C" void scrypt_core_3way(uint32_t *X, uint32_t *V, int N); #endif #if defined(__x86_64__) && defined(ENABLE_AVX2) -#define SCRYPT_MAX_WAYS 24 -#define HAVE_SCRYPT_6WAY 1 +#define SCRYPT_MAX_WAYS 12 +#define HAVE_SCRYPT_3WAY 1 #define HAVE_SHA256_4WAY 1 +#define SCRYPT_BEST_THROUGHPUT 3 +extern "C" int sha256_use_4way(); +extern "C" void sha256_init_4way(uint32_t *state); +extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); +extern "C" void scrypt_core_3way(uint32_t *X, uint32_t *V, int N); + +//#define SCRYPT_MAX_WAYS 24 +#define HAVE_SCRYPT_6WAY 1 +//#define HAVE_SHA256_4WAY 1 #define HAVE_SHA256_8WAY 1 -#define scrypt_best_throughput() 6; +//#define SCRYPT_BEST_THROUGHPUT 6 extern "C" int sha256_use_8way(); extern "C" void sha256_init_8way(uint32_t *state); extern "C" void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap); -extern "C" int sha256_use_4way(); -extern "C" void sha256_init_4way(uint32_t *state); -extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); +//extern "C" int sha256_use_4way(); +//extern "C" void sha256_init_4way(uint32_t *state); +//extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); extern "C" void scrypt_core_6way(uint32_t *X, uint32_t *V, int N); #endif #if defined(__i386__) #define SCRYPT_MAX_WAYS 4 #define HAVE_SHA256_4WAY 1 -#define scrypt_best_throughput() 1 -extern "C" void scrypt_core(uint32_t *X, uint32_t *V, int N); +#define SCRYPT_BEST_THROUGHPUT 1 +extern "C" int sha256_use_4way(); +extern "C" void sha256_init_4way(uint32_t *state); +extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); +#endif + +#if defined(__arm__) && defined(__APCS_32__) +#if !defined(__ARM_NEON) +#define SCRYPT_MAX_WAYS 1 +#define SCRYPT_BEST_THROUGHPUT 1 +#else +#define SCRYPT_MAX_WAYS 12 +#define HAVE_SCRYPT_3WAY 1 +#define HAVE_SHA256_4WAY 1 +#define SCRYPT_BEST_THROUGHPUT 3 extern "C" int sha256_use_4way(); extern "C" void sha256_init_4way(uint32_t *state); extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); +extern "C" void scrypt_core_3way(uint32_t *X, uint32_t *V, int N); +#endif #endif -#define bswap_32_scrypt(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \ - | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) +#if defined(__aarch64__) +#define SCRYPT_MAX_WAYS 3 +#define HAVE_SCRYPT_3WAY 1 +#define SCRYPT_BEST_THROUGHPUT 3 +extern "C" void scrypt_core_3way(uint32_t *X, uint32_t *V, int N); +#endif static inline uint32_t swab32(uint32_t v) { - return bswap_32_scrypt(v); + return bswap_32(v); } #endif diff --git a/src/crypto/sha2-arm.S b/src/crypto/sha2-arm.S index 58e883d..e86f3de 100644 --- a/src/crypto/sha2-arm.S +++ b/src/crypto/sha2-arm.S @@ -11,7 +11,7 @@ #include "config/simplicity-config.h" #endif -#if defined(__arm__) && defined(__APCS_32__) +#if /*defined(USE_ASM) &&*/ defined(__arm__) && defined(__APCS_32__) diff --git a/src/crypto/sha2-armv8.c b/src/crypto/sha2-armv8.c new file mode 100644 index 0000000..99732f7 --- /dev/null +++ b/src/crypto/sha2-armv8.c @@ -0,0 +1,142 @@ +/* + * ARMv8-A Cryptography Extension SHA256 support functions + * + * Copyright (C) 2016, CriticalBlue Limited, All Rights Reserved + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of mbed TLS (https://tls.mbed.org) + */ + +#include +#include + +#if defined(__aarch64__) + +#include + +static const uint32_t sha256_k[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define Rx(T0, T1, K, W0, W1, W2, W3) \ + W0 = vsha256su0q_u32( W0, W1 ); \ + d2 = d0; \ + T1 = vaddq_u32( W1, K ); \ + d0 = vsha256hq_u32( d0, d1, T0 ); \ + d1 = vsha256h2q_u32( d1, d2, T0 ); \ + W0 = vsha256su1q_u32( W0, W2, W3 ); + +#define Ry(T0, T1, K, W1) \ + d2 = d0; \ + T1 = vaddq_u32( W1, K ); \ + d0 = vsha256hq_u32( d0, d1, T0 ); \ + d1 = vsha256h2q_u32( d1, d2, T0 ); + +#define Rz(T0) \ + d2 = d0; \ + d0 = vsha256hq_u32( d0, d1, T0 ); \ + d1 = vsha256h2q_u32( d1, d2, T0 ); + +void sha256_transform(uint32_t *state, const uint32_t *block, int swap) +{ + /* declare variables */ + uint32x4_t k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, ka, kb, kc, kd, ke, kf; + uint32x4_t s0, s1; + uint32x4_t w0, w1, w2, w3; + uint32x4_t d0, d1, d2; + uint32x4_t t0, t1; + + /* set K0..Kf constants */ + k0 = vld1q_u32(&sha256_k[0x00]); + k1 = vld1q_u32(&sha256_k[0x04]); + k2 = vld1q_u32(&sha256_k[0x08]); + k3 = vld1q_u32(&sha256_k[0x0c]); + k4 = vld1q_u32(&sha256_k[0x10]); + k5 = vld1q_u32(&sha256_k[0x14]); + k6 = vld1q_u32(&sha256_k[0x18]); + k7 = vld1q_u32(&sha256_k[0x1c]); + k8 = vld1q_u32(&sha256_k[0x20]); + k9 = vld1q_u32(&sha256_k[0x24]); + ka = vld1q_u32(&sha256_k[0x28]); + kb = vld1q_u32(&sha256_k[0x2c]); + kc = vld1q_u32(&sha256_k[0x30]); + kd = vld1q_u32(&sha256_k[0x34]); + ke = vld1q_u32(&sha256_k[0x38]); + kf = vld1q_u32(&sha256_k[0x3c]); + + /* load state */ + s0 = vld1q_u32(&state[0]); + s1 = vld1q_u32(&state[4]); + + /* load message */ + w0 = vld1q_u32(block); + w1 = vld1q_u32(block + 4); + w2 = vld1q_u32(block + 8); + w3 = vld1q_u32(block + 12); + + if (swap) { + w0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w0))); + w1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w1))); + w2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w2))); + w3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w3))); + } + + /* initialize t0, d0, d1 */ + t0 = vaddq_u32(w0, k0); + d0 = s0; + d1 = s1; + + /* perform rounds of four */ + Rx(t0, t1, k1, w0, w1, w2, w3); + Rx(t1, t0, k2, w1, w2, w3, w0); + Rx(t0, t1, k3, w2, w3, w0, w1); + Rx(t1, t0, k4, w3, w0, w1, w2); + Rx(t0, t1, k5, w0, w1, w2, w3); + Rx(t1, t0, k6, w1, w2, w3, w0); + Rx(t0, t1, k7, w2, w3, w0, w1); + Rx(t1, t0, k8, w3, w0, w1, w2); + Rx(t0, t1, k9, w0, w1, w2, w3); + Rx(t1, t0, ka, w1, w2, w3, w0); + Rx(t0, t1, kb, w2, w3, w0, w1); + Rx(t1, t0, kc, w3, w0, w1, w2); + Ry(t0, t1, kd, w1); + Ry(t1, t0, ke, w2); + Ry(t0, t1, kf, w3); + Rz(t1); + + /* update state */ + s0 = vaddq_u32(s0, d0); + s1 = vaddq_u32(s1, d1); + + /* save state */ + vst1q_u32(&state[0], s0); + vst1q_u32(&state[4], s1); +} +#endif diff --git a/src/crypto/sha2-x64.S b/src/crypto/sha2-x64.S index 74b2325..90bde71 100644 --- a/src/crypto/sha2-x64.S +++ b/src/crypto/sha2-x64.S @@ -15,7 +15,7 @@ .section .note.GNU-stack,"",%progbits #endif -#if defined(__x86_64__) +#if /*defined(USE_ASM) &&*/ defined(__x86_64__) .data .p2align 4 @@ -1626,7 +1626,7 @@ sha256d_8preext2_24: sha256d_8preext2_30: .long 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022 -#endif /* USE_AVX2 */ +#endif /* ENABLE_AVX2 */ .text @@ -1692,7 +1692,7 @@ _sha256_init_8way: popq %rdi #endif ret -#endif /* USE_AVX2 */ +#endif /* ENABLE_AVX2 */ @@ -1700,13 +1700,13 @@ _sha256_init_8way: -#if defined(ENABLE_AVX2) +#if defined(ENABLE_AVX) -#endif /* USE_AVX */ +#endif /* ENABLE_AVX */ #if defined(ENABLE_AVX2) @@ -1715,16 +1715,16 @@ _sha256_init_8way: -#endif /* USE_AVX2 */ +#endif /* ENABLE_AVX2 */ -#if defined(USE_XOP) +#if defined(ENABLE_XOP) -#endif /* USE_XOP */ +#endif /* ENABLE_XOP */ .text @@ -1879,7 +1879,7 @@ sha256_transform_4way_sse2_main_loop: jmp sha256_transform_4way_finish -#if defined(ENABLE_AVX2) +#if defined(ENABLE_AVX) .text .p2align 6 sha256_transform_4way_core_avx: @@ -5745,10 +5745,10 @@ sha256_transform_4way_core_avx: vpxor %xmm2, %xmm7, %xmm7 vpaddd %xmm6, %xmm7, %xmm7 jmp sha256_transform_4way_finish -#endif /* USE_AVX */ +#endif /* ENABLE_AVX */ -#if defined(USE_XOP) +#if defined(ENABLE_XOP) .text .p2align 6 sha256_transform_4way_core_xop: @@ -8462,7 +8462,7 @@ sha256_transform_4way_core_xop: vpxor %xmm2, %xmm7, %xmm7 vpaddd %xmm6, %xmm7, %xmm7 jmp sha256_transform_4way_finish -#endif /* USE_XOP */ +#endif /* ENABLE_XOP */ .data @@ -12795,7 +12795,7 @@ sha256_transform_8way_finish: #endif ret -#endif /* USE_AVX2 */ +#endif /* ENABLE_AVX2 */ .data @@ -18451,7 +18451,7 @@ sha256d_ms_4way_sse2_finish: ret -#if defined(ENABLE_AVX2) +#if defined(ENABLE_AVX) .p2align 6 sha256d_ms_4way_avx: @@ -22566,10 +22566,10 @@ sha256d_ms_4way_avx_finish: #endif ret -#endif /* USE_AVX */ +#endif /* ENABLE_AVX */ -#if defined(USE_XOP) +#if defined(ENABLE_XOP) .p2align 6 sha256d_ms_4way_xop: @@ -25489,7 +25489,7 @@ sha256d_ms_4way_xop_finish: #endif ret -#endif /* USE_XOP */ +#endif /* ENABLE_XOP */ .text @@ -25502,7 +25502,7 @@ _sha256_use_4way: pushq %rcx pushq %rdx -#if defined(ENABLE_AVX2) +#if defined(ENABLE_AVX) /* Check for AVX and OSXSAVE support */ movl $1, %eax cpuid @@ -25515,7 +25515,7 @@ _sha256_use_4way: andl $0x00000006, %eax cmpl $0x00000006, %eax jne sha256_use_4way_base -#if defined(USE_XOP) +#if defined(ENABLE_XOP) /* Check for XOP support */ movl $0x80000001, %eax cpuid @@ -25526,13 +25526,13 @@ sha256_use_4way_xop: leaq sha256d_ms_4way_xop(%rip), %rcx leaq sha256_transform_4way_core_xop(%rip), %rdx jmp sha256_use_4way_done -#endif /* USE_XOP */ +#endif /* ENABLE_XOP */ sha256_use_4way_avx: leaq sha256d_ms_4way_avx(%rip), %rcx leaq sha256_transform_4way_core_avx(%rip), %rdx jmp sha256_use_4way_done -#endif /* USE_AVX */ +#endif /* ENABLE_AVX */ sha256_use_4way_base: leaq sha256d_ms_4way_sse2(%rip), %rcx @@ -29713,6 +29713,6 @@ sha256_use_8way_done: popq %rbx ret -#endif /* USE_AVX2 */ +#endif /* ENABLE_AVX2 */ #endif diff --git a/src/crypto/sha2-x86.S b/src/crypto/sha2-x86.S index def2d51..16664c0 100644 --- a/src/crypto/sha2-x86.S +++ b/src/crypto/sha2-x86.S @@ -15,7 +15,7 @@ .section .note.GNU-stack,"",%progbits #endif -#if defined(__i386__) +#if /*defined(USE_ASM) &&*/ defined(__i386__) .data .p2align 7 diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp index 8410e59..53ab277 100644 --- a/src/crypto/sha256.cpp +++ b/src/crypto/sha256.cpp @@ -3,13 +3,14 @@ // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "crypto/sha256.h" +#include "crypto/scrypt_opt.h" #include "crypto/common.h" #include // Internal implementation code. -namespace +/*namespace { /// Internal SHA-256 implementation. namespace sha256 @@ -19,19 +20,19 @@ uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); } uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); } uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); } -uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); } +uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); }*/ /** One round of SHA-256. */ -void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k, uint32_t w) +/*void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k, uint32_t w) { uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k + w; uint32_t t2 = Sigma0(a) + Maj(a, b, c); d += t1; h = t1 + t2; -} +}*/ /** Initialize SHA-256 state. */ -void inline Initialize(uint32_t* s) +/*void inline Initialize(uint32_t* s) { s[0] = 0x6a09e667ul; s[1] = 0xbb67ae85ul; @@ -41,10 +42,10 @@ void inline Initialize(uint32_t* s) s[5] = 0x9b05688cul; s[6] = 0x1f83d9abul; s[7] = 0x5be0cd19ul; -} +}*/ /** Perform one SHA-256 transformation, processing a 64-byte chunk. */ -void Transform(uint32_t* s, const unsigned char* chunk) +/*void Transform(uint32_t* s, const unsigned char* chunk) { uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; @@ -128,14 +129,15 @@ void Transform(uint32_t* s, const unsigned char* chunk) } } // namespace sha256 -} // namespace +}*/ // namespace ////// SHA-256 CSHA256::CSHA256() : bytes(0) { - sha256::Initialize(s); + //sha256::Initialize(s); + sha256_init(s); } CSHA256& CSHA256::Write(const unsigned char* data, size_t len) @@ -147,12 +149,14 @@ CSHA256& CSHA256::Write(const unsigned char* data, size_t len) memcpy(buf + bufsize, data, 64 - bufsize); bytes += 64 - bufsize; data += 64 - bufsize; - sha256::Transform(s, buf); + //sha256::Transform(s, buf); + sha256_transform(s, (uint32_t*)buf, 1); bufsize = 0; } while (end >= data + 64) { // Process full chunks directly from the source. - sha256::Transform(s, data); + //sha256::Transform(s, data); + sha256_transform(s, (uint32_t*)data, 1); bytes += 64; data += 64; } @@ -184,6 +188,7 @@ void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE]) CSHA256& CSHA256::Reset() { bytes = 0; - sha256::Initialize(s); + //sha256::Initialize(s); + sha256_init(s); return *this; } diff --git a/src/hash.cpp b/src/hash.cpp index f470c11..58b307e 100644 --- a/src/hash.cpp +++ b/src/hash.cpp @@ -78,46 +78,4 @@ void BIP32Hash(const ChainCode chainCode, unsigned int nChild, unsigned char hea num[2] = (nChild >> 8) & 0xFF; num[3] = (nChild >> 0) & 0xFF; CHMAC_SHA512(chainCode.begin(), chainCode.size()).Write(&header, 1).Write(data, 32).Write(num, 4).Finalize(output); -} - -uint256 scrypt_hash(const void* input, size_t inputlen) -{ - uint256 result = 0; - - scrypt((const char*)input, inputlen, (const char*)input, inputlen, (char*)&result, 1024, 1, 1, 32); - - return result; -} - -uint256 scrypt_salted_hash(const void* input, size_t inputlen, const void* salt, size_t saltlen) -{ - uint256 result = 0; - - scrypt((const char*)input, inputlen, (const char*)salt, saltlen, (char*)&result, 1024, 1, 1, 32); - - return result; -} - -uint256 scrypt_salted_multiround_hash(const void* input, size_t inputlen, const void* salt, size_t saltlen, const unsigned int nRounds) -{ - uint256 resultHash = scrypt_salted_hash(input, inputlen, salt, saltlen); - uint256 transitionalHash = resultHash; - - for (unsigned int i = 1; i < nRounds; i++) - { - resultHash = scrypt_salted_hash(input, inputlen, (const void*)&transitionalHash, 32); - transitionalHash = resultHash; - } - - return resultHash; -} - -uint256 scrypt_blockhash(const void* input) -{ - return scrypt_hash(input, 80); -} - -void scrypt_hash(const char* pass, unsigned int pLen, const char* salt, unsigned int sLen, char* output, unsigned int N, unsigned int r, unsigned int p, unsigned int dkLen) -{ - scrypt(pass, pLen, salt, sLen, output, N, r, p, dkLen); } \ No newline at end of file diff --git a/src/hash.h b/src/hash.h index 327fd7a..4cbfca4 100644 --- a/src/hash.h +++ b/src/hash.h @@ -22,9 +22,10 @@ #include "crypto/sph_jh.h" #include "crypto/sph_keccak.h" #include "crypto/sph_skein.h" +#include "crypto/sha1.h" #include "crypto/sha512.h" #include "crypto/scrypt.h" -#include "crypto/scrypt2.h" +#include "crypto/scrypt_opt.h" #include #include @@ -33,6 +34,8 @@ typedef uint256 ChainCode; +static const unsigned char PBLANK[1] = {}; +static const uint256 ZERO = uint256(0); /** A hasher class for Bitcoin's 256-bit hash (double SHA-256). */ class CHash256 @@ -63,6 +66,34 @@ class CHash256 } }; +class CHash1 +{ +private: + CSHA1 sha; + +public: + static const size_t OUTPUT_SIZE = CSHA1::OUTPUT_SIZE; + + void Finalize(unsigned char hash[OUTPUT_SIZE]) + { + unsigned char buf[CSHA1::OUTPUT_SIZE]; + sha.Finalize(buf); + sha.Reset().Write(buf, CSHA1::OUTPUT_SIZE).Finalize(hash); + } + + CHash1& Write(const unsigned char* data, size_t len) + { + sha.Write(data, len); + return *this; + } + + CHash1& Reset() + { + sha.Reset(); + return *this; + } +}; + class CHash512 { private: @@ -179,17 +210,35 @@ inline void Hash(void* in, unsigned int len, unsigned char* out) template inline uint512 Hash512(const T1 pbegin, const T1 pend) { - static const unsigned char pblank[1] = {}; uint512 result; - CHash512().Write(pbegin == pend ? pblank : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0])).Finalize((unsigned char*)&result); + CHash512().Write(pbegin == pend ? PBLANK : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0])).Finalize((unsigned char*)&result); return result; } + +/** Compute the 512-bit hash of the concatenation of two objects. */ template inline uint512 Hash512(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 p2end) { - static const unsigned char pblank[1] = {}; uint512 result; - CHash512().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Finalize((unsigned char*)&result); + CHash512().Write(p1begin == p1end ? PBLANK : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? PBLANK : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Finalize((unsigned char*)&result); + return result; +} + +/** Compute the 160-bit hash of an object. */ +template +inline uint256 Hash1(const T1 pbegin, const T1 pend) +{ + uint256 result; + CHash1().Write(pbegin == pend ? PBLANK : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0])).Finalize((unsigned char*)&result); + return result; +} + +/** Compute the 160-bit hash of the concatenation of two objects. */ +template +inline uint256 Hash1(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 p2end) +{ + uint256 result; + CHash1().Write(p1begin == p1end ? PBLANK : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? PBLANK : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Finalize((unsigned char*)&result); return result; } @@ -197,9 +246,8 @@ inline uint512 Hash512(const T1 p1begin, const T1 p1end, const T2 p2begin, const template inline uint256 Hash(const T1 pbegin, const T1 pend) { - static const unsigned char pblank[1] = {}; uint256 result; - CHash256().Write(pbegin == pend ? pblank : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0])).Finalize((unsigned char*)&result); + CHash256().Write(pbegin == pend ? PBLANK : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0])).Finalize((unsigned char*)&result); return result; } @@ -207,9 +255,8 @@ inline uint256 Hash(const T1 pbegin, const T1 pend) template inline uint256 Hash(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 p2end) { - static const unsigned char pblank[1] = {}; uint256 result; - CHash256().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Finalize((unsigned char*)&result); + CHash256().Write(p1begin == p1end ? PBLANK : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? PBLANK : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Finalize((unsigned char*)&result); return result; } @@ -217,49 +264,44 @@ inline uint256 Hash(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 template inline uint256 Hash(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 p2end, const T3 p3begin, const T3 p3end) { - static const unsigned char pblank[1] = {}; uint256 result; - CHash256().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? pblank : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Finalize((unsigned char*)&result); + CHash256().Write(p1begin == p1end ? PBLANK : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? PBLANK : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? PBLANK : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Finalize((unsigned char*)&result); return result; } -/** Compute the 256-bit hash of the concatenation of three objects. */ +/** Compute the 256-bit hash of the concatenation of four objects. */ template inline uint256 Hash(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 p2end, const T3 p3begin, const T3 p3end, const T4 p4begin, const T4 p4end) { - static const unsigned char pblank[1] = {}; uint256 result; - CHash256().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? pblank : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Write(p4begin == p4end ? pblank : (const unsigned char*)&p4begin[0], (p4end - p4begin) * sizeof(p4begin[0])).Finalize((unsigned char*)&result); + CHash256().Write(p1begin == p1end ? PBLANK : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? PBLANK : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? PBLANK : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Write(p4begin == p4end ? PBLANK : (const unsigned char*)&p4begin[0], (p4end - p4begin) * sizeof(p4begin[0])).Finalize((unsigned char*)&result); return result; } -/** Compute the 256-bit hash of the concatenation of three objects. */ +/** Compute the 256-bit hash of the concatenation of five objects. */ template inline uint256 Hash(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 p2end, const T3 p3begin, const T3 p3end, const T4 p4begin, const T4 p4end, const T5 p5begin, const T5 p5end) { - static const unsigned char pblank[1] = {}; uint256 result; - CHash256().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? pblank : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Write(p4begin == p4end ? pblank : (const unsigned char*)&p4begin[0], (p4end - p4begin) * sizeof(p4begin[0])).Write(p5begin == p5end ? pblank : (const unsigned char*)&p5begin[0], (p5end - p5begin) * sizeof(p5begin[0])).Finalize((unsigned char*)&result); + CHash256().Write(p1begin == p1end ? PBLANK : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? PBLANK : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? PBLANK : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Write(p4begin == p4end ? PBLANK : (const unsigned char*)&p4begin[0], (p4end - p4begin) * sizeof(p4begin[0])).Write(p5begin == p5end ? PBLANK : (const unsigned char*)&p5begin[0], (p5end - p5begin) * sizeof(p5begin[0])).Finalize((unsigned char*)&result); return result; } -/** Compute the 256-bit hash of the concatenation of three objects. */ +/** Compute the 256-bit hash of the concatenation of six objects. */ template inline uint256 Hash(const T1 p1begin, const T1 p1end, const T2 p2begin, const T2 p2end, const T3 p3begin, const T3 p3end, const T4 p4begin, const T4 p4end, const T5 p5begin, const T5 p5end, const T6 p6begin, const T6 p6end) { - static const unsigned char pblank[1] = {}; uint256 result; - CHash256().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? pblank : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Write(p4begin == p4end ? pblank : (const unsigned char*)&p4begin[0], (p4end - p4begin) * sizeof(p4begin[0])).Write(p5begin == p5end ? pblank : (const unsigned char*)&p5begin[0], (p5end - p5begin) * sizeof(p5begin[0])).Write(p6begin == p6end ? pblank : (const unsigned char*)&p6begin[0], (p6end - p6begin) * sizeof(p6begin[0])).Finalize((unsigned char*)&result); + CHash256().Write(p1begin == p1end ? PBLANK : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0])).Write(p2begin == p2end ? PBLANK : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0])).Write(p3begin == p3end ? PBLANK : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0])).Write(p4begin == p4end ? PBLANK : (const unsigned char*)&p4begin[0], (p4end - p4begin) * sizeof(p4begin[0])).Write(p5begin == p5end ? PBLANK : (const unsigned char*)&p5begin[0], (p5end - p5begin) * sizeof(p5begin[0])).Write(p6begin == p6end ? PBLANK : (const unsigned char*)&p6begin[0], (p6end - p6begin) * sizeof(p6begin[0])).Finalize((unsigned char*)&result); return result; } -/** Compute the 160-bit hash an object. */ +/** Compute the 160-bit hash of an object. */ template inline uint160 Hash160(const T1 pbegin, const T1 pend) { - static unsigned char pblank[1] = {}; uint160 result; - CHash160().Write(pbegin == pend ? pblank : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0])).Finalize((unsigned char*)&result); + CHash160().Write(pbegin == pend ? PBLANK : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0])).Finalize((unsigned char*)&result); return result; } @@ -317,10 +359,47 @@ unsigned int MurmurHash3(unsigned int nHashSeed, const std::vector(&pbegin[0])), (pend - pbegin) * sizeof(pbegin[0])); + sph_blake512(&ctx_blake, (pbegin == pend ? PBLANK : static_cast(&pbegin[0])), (pend - pbegin) * sizeof(pbegin[0])); sph_blake512_close(&ctx_blake, static_cast(&hash[0])); sph_bmw512_init(&ctx_bmw); @@ -415,22 +493,29 @@ inline uint256 HashQuark(const T1 pbegin, const T1 pend) template inline uint256 HashScrypt(const T1 pbegin, const T1 pend) { - static unsigned char pblank[1]; - return scrypt_hash((pbegin == pend ? pblank : static_cast(&pbegin[0])), (pend - pbegin) * sizeof(pbegin[0])); + uint256 result; + if ((pend - pbegin) * sizeof(pbegin[0]) != 80 || !scryptHash(static_cast(&pbegin[0]), (char*)&result, 1024) || result == ZERO) { + LogPrintf("Falling back to original implementation to generate normal scrypt hash\n"); + return scrypt_hash((pbegin == pend ? PBLANK : static_cast(&pbegin[0])), (pend - pbegin) * sizeof(pbegin[0])); + } + return result; } -/* ----------- Scrypt^2 Hash ------------------------------------------------ */ +/* ----------- Scrypt² Hash ------------------------------------------------ */ template inline uint256 HashScryptSquared(const T1 pbegin, const T1 pend) { - static unsigned char pblank[1]; - //return scrypt_hash((pbegin == pend ? pblank : static_cast(&pbegin[0])), (pend - pbegin) * sizeof(pbegin[0]), 1048576); - uint256 result = ~uint256(0); - if (!scryptHash((pbegin == pend ? pblank : static_cast(&pbegin[0])), (char*)&result, 1048576)) - LogPrintf("Failed to generate scrypt² hash!\n"); + uint256 result; + if ((pend - pbegin) * sizeof(pbegin[0]) != 80 || !scryptHash(static_cast(&pbegin[0]), (char*)&result, 1048576) || result == ZERO) { + LogPrintf("Falling back to original implementation to generate scrypt² hash\n"); + return scrypt_hash((pbegin == pend ? PBLANK : static_cast(&pbegin[0])), (pend - pbegin) * sizeof(pbegin[0]), 1048576); + } return result; } -void scrypt_hash(const char* pass, unsigned int pLen, const char* salt, unsigned int sLen, char* output, unsigned int N, unsigned int r, unsigned int p, unsigned int dkLen); +inline void scrypt_hash(const char* pass, unsigned int pLen, const char* salt, unsigned int sLen, char* output, unsigned int N, unsigned int r, unsigned int p, unsigned int dkLen) +{ + scrypt(pass, pLen, salt, sLen, output, N, r, p, dkLen); +} #endif // SIMPLICITY_HASH_H \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 815e996..79dc8e4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2095,7 +2095,7 @@ bool ReadBlockFromDisk(CBlock& block, const CDiskBlockPos& pos) // Check the header // treat PoW and PoS blocks the same - don't waste time on redundant PoW checks that won't catch invalid PoS blocks anyway - if (block.GetHash() != Params().HashGenesisBlock() && block.IsProofOfWork() && CBlockHeader::GetAlgo(block.nVersion) != POW_SCRYPT_SQUARED && !CheckProofOfWork(&block)) + if (block.IsProofOfWork() && CBlockHeader::GetAlgo(block.nVersion) != POW_SCRYPT_SQUARED && !CheckProofOfWork(&block)) return error("ReadBlockFromDisk : Errors in block header"); return true; @@ -3079,7 +3079,7 @@ bool ConnectBlock(const CBlock& block, CValidationState& state, CBlockIndex* pin // return state.DoS(100, error("ConnectBlock() : PoW period ended"), // REJECT_INVALID, "PoW-ended"); - if (block.nVersion < Params().WALLET_UPGRADE_VERSION() && /*block.GetHash() != Params().HashGenesisBlock() &&*/ !CheckWork(block, pindex->pprev)) + if ((fVerifyingBlocks || fReindex || block.nVersion < Params().WALLET_UPGRADE_VERSION()) && /*block.GetHash() != Params().HashGenesisBlock() &&*/ !CheckWork(block, pindex->pprev)) return false; if (block.IsProofOfStake()) { @@ -3582,14 +3582,13 @@ void static UpdateTip(CBlockIndex* pindexNew) int nUpgraded = 0; const CBlockIndex* pindex = chainActive.Tip(); for (int i = 0; i < 100 && pindex != NULL; i++) { - if (pindex->nVersion > ALGO_POW_SCRYPT_SQUARED) + if (pindex->nVersion > (uint32_t)ALGO_POW_SCRYPT_SQUARED) ++nUpgraded; pindex = pindex->pprev; } if (nUpgraded > 0) - LogPrintf("%s: %d of last 100 blocks above version %d\n", __func__, nUpgraded, ALGO_POW_SCRYPT_SQUARED); - if (nUpgraded > 100/2) - { + LogPrintf("%s: %i of last 100 blocks above version %u\n", __func__, nUpgraded, ALGO_POW_SCRYPT_SQUARED); + if (nUpgraded > 100/2) { // strMiscWarning is read by GetWarnings(), called by Qt and the JSON-RPC code to warn the user: strMiscWarning = _("Warning: This version is obsolete; upgrade required!"); CAlert::Notify(strMiscWarning, true); @@ -4320,7 +4319,7 @@ bool CheckBlockHeader(const CBlockHeader& block, CValidationState& state, bool f return state.DoS(100, error("%s : block %s has an invalid type", __func__, block.GetHash().GetHex())); // Check proof of work matches claimed amount - if (block.GetHash() != Params().HashGenesisBlock() && (fVerifyingBlocks || fReindex || block.nTime >= nBlockCheckTime || CBlockHeader::GetAlgo(block.nVersion) != POW_SCRYPT_SQUARED) && fCheckPOW && block.IsProofOfWork() && !CheckProofOfWork(&block)) + if ((fVerifyingBlocks || fReindex || block.nTime >= nBlockCheckTime || CBlockHeader::GetAlgo(block.nVersion) != POW_SCRYPT_SQUARED) && fCheckPOW && block.IsProofOfWork() && !CheckProofOfWork(&block)) return state.DoS(50, error("%s : proof of work failed", __func__), REJECT_INVALID, "high-hash"); @@ -4544,7 +4543,7 @@ bool ContextualCheckBlockHeader(const CBlockHeader& block, CValidationState& sta if (Params().NetworkID() != CBaseChainParams::REGTEST && nHeight >= 10 + Params().WALLET_UPGRADE_BLOCK() + Params().COINSTAKE_MIN_DEPTH()) { int end = std::max(std::min(nHeight - 9 - Params().WALLET_UPGRADE_BLOCK() - Params().COINSTAKE_MIN_DEPTH(), 10), 0); // start checking one more at a time until we can enforce on all new blocks - int typeCount[ALGO_COUNT] = { }; + int typeCount[ALGO_COUNT] = {}; //int proofOfWorkCount = 0; if (CBlockHeader::GetAlgo(block.nVersion) == -1) return false; @@ -4616,7 +4615,7 @@ bool ContextualCheckBlockHeader(const CBlockHeader& block, CValidationState& sta return state.DoS(0, error("%s : forked chain older than last checkpoint (height %d)", __func__, nHeight)); // Reject block.nVersion=1, ..., CURRENT_VERSION-1 blocks when 95% (75% on testnet) of the network has upgraded: - for (int version = 2; version <= CBlockHeader::CURRENT_VERSION; version++) { + for (unsigned int version = 2; version <= CBlockHeader::CURRENT_VERSION; version++) { if (block.nVersion < version && CBlockIndex::IsSuperMajority(version, pindexPrev, Params().RejectBlockOutdatedMajority())) { return state.Invalid(error("%s : rejected nVersion=%d block", __func__, block.nVersion), REJECT_OBSOLETE, "bad-version"); } @@ -4693,7 +4692,7 @@ static bool AcceptBlockHeader(const CBlockHeader& block, CValidationState& state return true; } - if (block.nVersion >= Params().WALLET_UPGRADE_VERSION() && !CheckBlockHeader(block, state, !fAlreadyCheckedHeader)) { + if (!CheckBlockHeader(block, state, !fAlreadyCheckedHeader && (block.nNonce != 0 || block.nVersion >= Params().WALLET_UPGRADE_VERSION()))) { //nNonce = 0 for PoS blocks LogPrintf("%s : CheckBlockHeader failed\n", __func__); return false; } @@ -4792,7 +4791,7 @@ static bool AcceptBlock(CBlock& block, CValidationState& state, CBlockIndex** pp return true; } -bool CBlockIndex::IsSuperMajority(int minVersion, const CBlockIndex* pstart, unsigned int nRequired) +bool CBlockIndex::IsSuperMajority(unsigned int minVersion, const CBlockIndex* pstart, unsigned int nRequired) { unsigned int nToCheck = Params().ToCheckBlockUpgradeMajority(); unsigned int nFound = 0; @@ -4886,7 +4885,7 @@ bool ProcessNewBlock(CValidationState& state, CNode* pfrom, CBlock* pblock, bool if (!ActivateBestChain(state, pblock, checked)) return error("%s : ActivateBestChain failed", __func__); - LogPrintf("%s : ACCEPTED Block %ld in %ld milliseconds with size=%d\n", __func__, GetHeight(), GetTimeMillis() - nStartTime, + LogPrint("net", "%s : ACCEPTED Block %ld in %ld milliseconds with size=%d\n", __func__, GetHeight(), GetTimeMillis() - nStartTime, pblock->GetSerializeSize(SER_DISK, CLIENT_VERSION)); return true; @@ -6616,7 +6615,6 @@ bool static ProcessMessage(CNode* pfrom, std::string strCommand, CDataStream& vR } if (GetBoolArg("-headerspamfilter", DEFAULT_HEADER_SPAM_FILTER) && !IsInitialBlockDownload()) { - LOCK(cs_main); CValidationState state; CNodeState *nodestate = State(pfrom->GetId()); nodestate->headers.addHeaders(nFirst, nLast); diff --git a/src/miner.cpp b/src/miner.cpp index 28f59f0..b6de4bd 100644 --- a/src/miner.cpp +++ b/src/miner.cpp @@ -108,7 +108,7 @@ CBlockTemplate* CreateNewBlock(const CScript& scriptPubKeyIn, CWallet* pwallet, if (!pblocktemplate.get()) return NULL; CBlock* pblock = &pblocktemplate->block; // pointer for convenience - int ver = 0; + uint32_t ver = 0; // Tip CBlockIndex* pindexPrev = nullptr; @@ -475,7 +475,7 @@ CBlockTemplate* CreateNewBlock(const CScript& scriptPubKeyIn, CWallet* pwallet, nLastBlockTx = nBlockTx; nLastBlockSize = nBlockSize; - LogPrintf("CreateNewBlock(): total size %u\n", nBlockSize); + LogPrint("simplicity", "CreateNewBlock(): total size %u\n", nBlockSize); // Compute final coinbase transaction. if (!fProofOfStake) { @@ -581,7 +581,7 @@ void IncrementExtraNonce(CBlock* pblock, CBlockIndex* pindexPrev, unsigned int& // // Internal miner // -double dHashesPerSec = 0.0; +double dHashesPerMin = 0.0; int64_t nHPSTimerStart = 0; CBlockTemplate* CreateNewBlockWithKey(CReserveKey& reservekey, CWallet* pwallet) @@ -647,179 +647,224 @@ void BitcoinMiner(CWallet* pwallet, bool fProofOfStake) SetThreadPriority(THREAD_PRIORITY_LOWEST); RenameThread("simplicity-miner"); + // Build buffer and check for memory availability + bool memory = true; + unsigned char *scratchbuf = nullptr; + if (nCreateBlockAlgo == POW_SCRYPT_SQUARED) { + scratchbuf = scrypt_buffer_alloc(1048576); + if (!scratchbuf) { + memory = false; + LogPrintf("Failed to allocate memory for scrypt² mining thread!\n"); + } + } + // Each thread has its own key and counter CReserveKey reservekey(pwallet); unsigned int nExtraNonce = 0; bool fLastLoopOrphan = false; - while (fGenerateBitcoins || fProofOfStake) { - if (fProofOfStake) { - //control the amount of times the client will check for mintable coins - if ((GetTime() - nMintableLastCheck > 5 * 60)) // 5 minute check time - { - nMintableLastCheck = GetTime(); - fMintableCoins = pwallet->MintableCoins(); - } - - if (chainActive.Height() + 1 < Params().WALLET_UPGRADE_BLOCK() && Params().NetworkID() == CBaseChainParams::MAIN) { - MilliSleep(5000); - continue; // Do not stake until the upgrade block - } - - while (pwallet->IsLocked() || !fMintableCoins || (pwallet->GetBalance() > 0 && nReserveBalance >= pwallet->GetBalance()) || - ((vNodes.empty() || masternodeSync.NotCompleted()) && Params().MiningRequiresPeers())) { - nLastCoinStakeSearchInterval = 0; - MilliSleep(5000); - // Do a separate 1 minute check here to ensure fMintableCoins is updated - if (!fMintableCoins && (GetTime() - nMintableLastCheck > 1 * 60)) // 1 minute check time + try { + while ((fGenerateBitcoins && memory) || fProofOfStake) { + if (fProofOfStake) { + //control the amount of times the client will check for mintable coins + if ((GetTime() - nMintableLastCheck > 5 * 60)) // 5 minute check time { nMintableLastCheck = GetTime(); fMintableCoins = pwallet->MintableCoins(); } - } - //search our map of hashed blocks, see if bestblock has been hashed yet - if (mapHashedBlocks.count(chainActive.Tip()->nHeight) && !fLastLoopOrphan) - { - // wait half of the nHashDrift with max wait of 3 minutes - if (GetTime() - mapHashedBlocks[chainActive.Tip()->nHeight] < std::max(pwallet->nHashInterval, (unsigned int)1)) - { + if (chainActive.Height() + 1 < Params().WALLET_UPGRADE_BLOCK() && Params().NetworkID() == CBaseChainParams::MAIN) { MilliSleep(5000); - continue; + continue; // Do not stake until the upgrade block + } + + while (pwallet->IsLocked() || !fMintableCoins || (pwallet->GetBalance() > 0 && nReserveBalance >= pwallet->GetBalance()) || + ((vNodes.empty() || masternodeSync.NotCompleted()) && Params().MiningRequiresPeers())) { + nLastCoinStakeSearchInterval = 0; + MilliSleep(5000); + // Do a separate 1 minute check here to ensure fMintableCoins is updated + if (!fMintableCoins && (GetTime() - nMintableLastCheck > 1 * 60)) // 1 minute check time + { + nMintableLastCheck = GetTime(); + fMintableCoins = pwallet->MintableCoins(); + } + } + + //search our map of hashed blocks, see if bestblock has been hashed yet + if (mapHashedBlocks.count(chainActive.Tip()->nHeight) && !fLastLoopOrphan) + { + // wait half of the nHashDrift with max wait of 3 minutes + if (GetTime() - mapHashedBlocks[chainActive.Tip()->nHeight] < std::max(pwallet->nHashInterval, (unsigned int)1)) + { + MilliSleep(5000); + continue; + } } } - } - MilliSleep(1000); + //MilliSleep(1000); - // - // Create new block - // - unsigned int nTransactionsUpdatedLast = mempool.GetTransactionsUpdated(); - CBlockIndex* pindexPrev = chainActive.Tip(); - if (!pindexPrev) - continue; + // + // Create new block + // + unsigned int nTransactionsUpdatedLast = mempool.GetTransactionsUpdated(); + CBlockIndex* pindexPrev = chainActive.Tip(); + if (!pindexPrev) + continue; - std::unique_ptr pblocktemplate( - fProofOfStake ? CreateNewBlock(CScript(), pwallet, fProofOfStake) : CreateNewBlockWithKey(reservekey, pwallet) - ); - if (!pblocktemplate.get()) - continue; + std::unique_ptr pblocktemplate( + fProofOfStake ? CreateNewBlock(CScript(), pwallet, fProofOfStake) : CreateNewBlockWithKey(reservekey, pwallet) + ); + if (!pblocktemplate.get()) + continue; - CBlock* pblock = &pblocktemplate->block; - IncrementExtraNonce(pblock, pindexPrev, nExtraNonce); + CBlock* pblock = &pblocktemplate->block; + IncrementExtraNonce(pblock, pindexPrev, nExtraNonce); - //Stake miner main - if (fProofOfStake) { - LogPrintf("CPUMiner : proof-of-stake block found %s\n", pblock->GetHash().ToString().c_str()); - if (pblock->IsZerocoinStake()) { - //Find the key associated with the zerocoin that is being staked - libzerocoin::CoinSpend spend = TxInToZerocoinSpend(pblock->vtx[1].vin[0]); - CBigNum bnSerial = spend.getCoinSerialNumber(); - CKey key; - if (!pwallet->GetZerocoinKey(bnSerial, key)) { - LogPrintf("%s: failed to find zSPL with serial %s, unable to sign block\n", __func__, bnSerial.GetHex()); + //Stake miner main + if (fProofOfStake) { + LogPrintf("CPUMiner : proof-of-stake block found %s\n", pblock->GetHash().ToString().c_str()); + if (pblock->IsZerocoinStake()) { + //Find the key associated with the zerocoin that is being staked + libzerocoin::CoinSpend spend = TxInToZerocoinSpend(pblock->vtx[1].vin[0]); + CBigNum bnSerial = spend.getCoinSerialNumber(); + CKey key; + if (!pwallet->GetZerocoinKey(bnSerial, key)) { + LogPrintf("%s: failed to find zSPL with serial %s, unable to sign block\n", __func__, bnSerial.GetHex()); + continue; + } + + //Sign block with the zSPL key + if (!SignBlockWithKey(*pblock, key)) { + LogPrintf("%s: Signing new block with zSPL key failed\n", __func__); + continue; + } + } else if (!SignBlock(*pblock, *pwallet)) { + LogPrintf("%s: Signing new block with UTXO key failed\n", __func__); continue; } - //Sign block with the zSPL key - if (!SignBlockWithKey(*pblock, key)) { - LogPrintf("%s: Signing new block with zSPL key failed\n", __func__); + LogPrintf("CPUMiner : proof-of-stake block was signed %s\n", pblock->GetHash().ToString().c_str()); + SetThreadPriority(THREAD_PRIORITY_NORMAL); + if (!ProcessBlockFound(pblock, *pwallet, reservekey)) { + fLastLoopOrphan = true; continue; } - } else if (!SignBlock(*pblock, *pwallet)) { - LogPrintf("%s: Signing new block with UTXO key failed\n", __func__); - continue; - } + SetThreadPriority(THREAD_PRIORITY_LOWEST); - LogPrintf("CPUMiner : proof-of-stake block was signed %s\n", pblock->GetHash().ToString().c_str()); - SetThreadPriority(THREAD_PRIORITY_NORMAL); - if (!ProcessBlockFound(pblock, *pwallet, reservekey)) { - fLastLoopOrphan = true; continue; } - SetThreadPriority(THREAD_PRIORITY_LOWEST); - - continue; - } - - LogPrintf("Running SimplicityMiner with %u transactions in block (%u bytes)\n", pblock->vtx.size(), - ::GetSerializeSize(*pblock, SER_NETWORK, PROTOCOL_VERSION)); - // - // Search - // - int64_t nStart = GetTime(); - uint256 hashTarget = uint256().SetCompact(pblock->nBits); - while (true) { - unsigned int nHashesDone = 0; + LogPrint("simplicity", "Running SimplicityMiner with %u transactions in block (%u bytes)\n", pblock->vtx.size(), + ::GetSerializeSize(*pblock, SER_NETWORK, PROTOCOL_VERSION)); - uint256 hash; + // + // Search + // + int64_t nStart = GetTime(); + uint256 hashTarget = uint256().SetCompact(pblock->nBits); while (true) { - hash = pblock->GetPoWHash(); - if (hash <= hashTarget) { - // Found a solution - SetThreadPriority(THREAD_PRIORITY_NORMAL); - LogPrintf("%s:\n", __func__); - LogPrintf("proof-of-work found \n hash: %s \ntarget: %s\n", hash.GetHex(), hashTarget.GetHex()); - ProcessBlockFound(pblock, *pwallet, reservekey); - SetThreadPriority(THREAD_PRIORITY_LOWEST); - - // In regression test mode, stop mining after a block is found. This - // allows developers to controllably generate a block on demand. - if (Params().MineBlocksOnDemand()) - throw boost::thread_interrupted(); + unsigned int nHashesDone = 0; + + if (nCreateBlockAlgo == POW_SCRYPT_SQUARED) { + unsigned int runs = 0; + while (true) { + int nHashes = 0; + if (scrypt_N_1_1_256_multi(BEGIN(pblock->nVersion), hashTarget, &nHashes, scratchbuf, 1048576)) { + // Found a solution + SetThreadPriority(THREAD_PRIORITY_NORMAL); + LogPrintf("%s:\n", __func__); + LogPrintf("proof-of-work found\n hash: %s\n target: %s\n nonce: %i\n", pblock->GetPoWHash().GetHex(), hashTarget.GetHex(), pblock->nNonce); + ProcessBlockFound(pblock, *pwallet, reservekey); + SetThreadPriority(THREAD_PRIORITY_LOWEST); + + // In regression test mode, stop mining after a block is found. This + // allows developers to controllably generate a block on demand. + if (Params().MineBlocksOnDemand()) + throw boost::thread_interrupted(); - break; + break; + } + pblock->nNonce += nHashes; + nHashesDone += nHashes; + if (runs & 0x1) + break; + runs++; + } + } else { + uint256 hash; + while (true) { + hash = pblock->GetPoWHash(); + if (hash <= hashTarget) { + // Found a solution + SetThreadPriority(THREAD_PRIORITY_NORMAL); + LogPrintf("%s:\n", __func__); + LogPrintf("proof-of-work found\n hash: %s\n target: %s\n nonce: %i\n", hash.GetHex(), hashTarget.GetHex(), pblock->nNonce); + ProcessBlockFound(pblock, *pwallet, reservekey); + SetThreadPriority(THREAD_PRIORITY_LOWEST); + + // In regression test mode, stop mining after a block is found. This + // allows developers to controllably generate a block on demand. + if (Params().MineBlocksOnDemand()) + throw boost::thread_interrupted(); + + break; + } + pblock->nNonce += 1; + nHashesDone += 1; + if ((pblock->nNonce & 0xFF) == 0) + break; + } } - pblock->nNonce += 1; - nHashesDone += 1; - if ((pblock->nNonce & 0xFF) == 0) - break; - } - // Meter hashes/sec - static int64_t nHashCounter; - if (nHPSTimerStart == 0) { - nHPSTimerStart = GetTimeMillis(); - nHashCounter = 0; - } else - nHashCounter += nHashesDone; - if (GetTimeMillis() - nHPSTimerStart > 4000) { - static CCriticalSection cs; + // Meter hashes/sec + static int64_t nHashCounter; { - LOCK(cs); - if (GetTimeMillis() - nHPSTimerStart > 4000) { - dHashesPerSec = 1000.0 * nHashCounter / (GetTimeMillis() - nHPSTimerStart); - nHPSTimerStart = GetTimeMillis(); - nHashCounter = 0; - static int64_t nLogTime; - if (GetTime() - nLogTime > 30 * 60) { - nLogTime = GetTime(); - LogPrintf("hashmeter %6.0f khash/s\n", dHashesPerSec / 1000.0); + static CCriticalSection cs; + { + LOCK(cs); + if (nHPSTimerStart == 0) { + nHPSTimerStart = GetTimeMillis(); + nHashCounter = 0; + } else + nHashCounter += nHashesDone; + + if (GetTimeMillis() - nHPSTimerStart > 30000) { + dHashesPerMin = 60000.0 * nHashCounter / (GetTimeMillis() - nHPSTimerStart); + nHPSTimerStart = GetTimeMillis(); + nHashCounter = 0; + static int64_t nLogTime; + if (GetTime() - nLogTime > 120) { + nLogTime = GetTime(); + LogPrintf("Total local hashrate %6.1f khash/min\n", dHashesPerMin/1000.0); + } } } } - } - // Check for stop or if block needs to be rebuilt - boost::this_thread::interruption_point(); - // Regtest mode doesn't require peers - if (vNodes.empty() && Params().MiningRequiresPeers()) - break; - if (pblock->nNonce >= 0xffff0000) - break; - if (mempool.GetTransactionsUpdated() != nTransactionsUpdatedLast && GetTime() - nStart > 60) - break; - if (pindexPrev != chainActive.Tip()) - break; - - // Update nTime every few seconds - UpdateTime(pblock, pindexPrev, pblock->IsProofOfStake()); - if (Params().AllowMinDifficultyBlocks()) { - // Changing pblock->nTime can change work required on testnet: - hashTarget.SetCompact(pblock->nBits); + // Check for stop or if block needs to be rebuilt + boost::this_thread::interruption_point(); + // Regtest mode doesn't require peers + if (vNodes.empty() && Params().MiningRequiresPeers()) + break; + if (pblock->nNonce >= 0xffff0000) + break; + if (mempool.GetTransactionsUpdated() != nTransactionsUpdatedLast && GetTime() - nStart > 60) + break; + if (pindexPrev != chainActive.Tip()) + break; + + // Update nTime every few seconds + UpdateTime(pblock, pindexPrev, fProofOfStake); + if (Params().AllowMinDifficultyBlocks()) { + // Changing pblock->nTime can change work required on testnet: + hashTarget.SetCompact(pblock->nBits); + } } } + } catch (boost::thread_interrupted) { + free(scratchbuf); + //LogPrintf("SimplicityMiner terminated\n"); + throw boost::thread_interrupted(); } } @@ -831,9 +876,9 @@ void static ThreadBitcoinMiner(void* parg) BitcoinMiner(pwallet, false); boost::this_thread::interruption_point(); } catch (std::exception& e) { - LogPrintf("SimplicityMiner exception"); + LogPrintf("SimplicityMiner exception\n"); } catch (...) { - LogPrintf("SimplicityMiner exception"); + LogPrintf("SimplicityMiner exception\n"); } LogPrintf("SimplicityMiner exiting\n"); diff --git a/src/miner.h b/src/miner.h index fe552ba..198dc46 100644 --- a/src/miner.h +++ b/src/miner.h @@ -38,7 +38,7 @@ void UpdateTime(CBlockHeader* block, const CBlockIndex* pindexPrev, bool fProofO void ThreadStakeMinter(); #endif // ENABLE_WALLET -extern double dHashesPerSec; +extern double dHashesPerMin; extern int64_t nHPSTimerStart; #endif // BITCOIN_MINER_H diff --git a/src/pow.cpp b/src/pow.cpp index e8602eb..b881a24 100644 --- a/src/pow.cpp +++ b/src/pow.cpp @@ -26,25 +26,26 @@ const CBlockIndex* GetLastBlockIndex(const CBlockIndex* pindex, bool fProofOfSta const CBlockIndex* GetLastBlockIndex(const CBlockIndex* pindex, int algo) { - bool newDiff = pindex->nTime >= Params().BadScryptDiffTimeEnd(); - while (pindex && pindex->pprev && (CBlockHeader::GetAlgo(pindex->nVersion) != algo || (newDiff && algo == POW_SCRYPT_SQUARED && pindex->nTime < Params().BadScryptDiffTimeEnd() && pindex->nTime >= Params().BadScryptDiffTimeStart()))) + bool newDiff = algo == POW_SCRYPT_SQUARED && pindex->nTime >= Params().BadScryptDiffTimeEnd(); + while (pindex && pindex->pprev && (CBlockHeader::GetAlgo(pindex->nVersion) != algo || (newDiff && pindex->nTime < Params().BadScryptDiffTimeEnd() && pindex->nTime >= Params().BadScryptDiffTimeStart()))) pindex = pindex->pprev; return pindex; } unsigned int GetNextWorkRequired(const CBlockIndex* pindexLast, const CBlockHeader* pblock, bool fProofOfStake) { - uint256 bnTargetLimit = fProofOfStake ? Params().ProofOfStakeLimit() : Params().ProofOfWorkLimit(pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? CBlockHeader::GetAlgo(pblock->nVersion) : POW_QUARK); + int algo = CBlockHeader::GetAlgo(pblock->nVersion); + uint256 bnTargetLimit = fProofOfStake ? Params().ProofOfStakeLimit() : Params().ProofOfWorkLimit(pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? algo : POW_QUARK); //if (Params().NetworkID() != CBaseChainParams::MAIN && !fProofOfStake) return bnTargetLimit.GetCompact(); // for testing if (pindexLast == NULL) return bnTargetLimit.GetCompact(); // genesis block - const CBlockIndex* pindexPrev = pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? GetLastBlockIndex(pindexLast, CBlockHeader::GetAlgo(pblock->nVersion)) : GetLastBlockIndex(pindexLast, fProofOfStake); + const CBlockIndex* pindexPrev = pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? GetLastBlockIndex(pindexLast, algo) : GetLastBlockIndex(pindexLast, fProofOfStake); if (pindexPrev->pprev == NULL) return bnTargetLimit.GetCompact(); // first block - const CBlockIndex* pindexPrevPrev = pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? GetLastBlockIndex(pindexPrev->pprev, CBlockHeader::GetAlgo(pblock->nVersion)) : GetLastBlockIndex(pindexPrev->pprev, fProofOfStake); + const CBlockIndex* pindexPrevPrev = pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? GetLastBlockIndex(pindexPrev->pprev, algo) : GetLastBlockIndex(pindexPrev->pprev, fProofOfStake); if (pindexPrevPrev->pprev == NULL) return bnTargetLimit.GetCompact(); // second block @@ -123,22 +124,26 @@ bool CheckProofOfWork(const CBlockHeader* pblock) if (Params().SkipProofOfWorkCheck()) return true; + int algo = CBlockHeader::GetAlgo(pblock->nVersion); bnTarget.SetCompact(pblock->nBits, &fNegative, &fOverflow); // Check range - if (fNegative || bnTarget == 0 || fOverflow || bnTarget > Params().ProofOfWorkLimit(pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? CBlockHeader::GetAlgo(pblock->nVersion) : POW_QUARK)) + if (fNegative || bnTarget == 0 || fOverflow || bnTarget > Params().ProofOfWorkLimit(pblock->nVersion >= Params().WALLET_UPGRADE_VERSION() ? algo : POW_QUARK)) return error("CheckProofOfWork() : nBits below minimum work"); - if (CBlockHeader::GetAlgo(pblock->nVersion) == POW_SCRYPT_SQUARED && pblock->nTime < Params().BadScryptDiffTimeEnd() && pblock->nTime >= Params().BadScryptDiffTimeStart()) { - LogPrintf("CheckProofOfWork() : skipping block %s affected by difficulty bug\n", pblock->GetHash().GetHex()); + if (algo == POW_SCRYPT_SQUARED && pblock->nTime < Params().BadScryptDiffTimeEnd() && pblock->nTime >= Params().BadScryptDiffTimeStart()) { + LogPrintf("CheckProofOfWork() : skipping block %s affected by scrypt difficulty bug\n", pblock->GetHash().GetHex()); return true; } - + // Check proof of work matches claimed amount if (pblock->GetPoWHash() > bnTarget) { if (Params().MineBlocksOnDemand()) return false; - else + else if (pblock->GetHash() == Params().HashGenesisBlock() && Params().NetworkID() == CBaseChainParams::MAIN) { + LogPrintf("CheckProofOfWork() : accepting genesis block\n"); + return true; + } else return error("CheckProofOfWork() : hash doesn't match nBits"); } diff --git a/src/primitives/block.h b/src/primitives/block.h index 1a27d9d..b99941b 100644 --- a/src/primitives/block.h +++ b/src/primitives/block.h @@ -44,8 +44,8 @@ class CBlockHeader { public: // header - static const int32_t CURRENT_VERSION=8; - int32_t nVersion; + static const uint32_t CURRENT_VERSION = 8; + uint32_t nVersion; uint256 hashPrevBlock; uint256 hashMerkleRoot; uint32_t nTime; @@ -121,7 +121,7 @@ class CBlockHeader } } - static int GetVer(int algo) + static uint32_t GetVer(int algo) { switch (algo) { case POS: diff --git a/src/qt/splashscreen.cpp b/src/qt/splashscreen.cpp index e543b8e..b29702d 100644 --- a/src/qt/splashscreen.cpp +++ b/src/qt/splashscreen.cpp @@ -26,9 +26,9 @@ SplashScreen::SplashScreen(Qt::WindowFlags f, const NetworkStyle* networkStyle) { // set reference point, paddings int paddingLeft = 14; - int paddingTop = 470; + int paddingTop = 34; int titleVersionVSpace = 17; - int titleCopyrightVSpace = 32; + //int titleCopyrightVSpace = 32; float fontFactor = 1.0; @@ -68,10 +68,10 @@ SplashScreen::SplashScreen(Qt::WindowFlags f, const NetworkStyle* networkStyle) // draw copyright stuff pixPaint.setFont(QFont(font, 10 * fontFactor)); - pixPaint.drawText(paddingLeft, paddingTop + titleCopyrightVSpace, copyrightTextBtc); - pixPaint.drawText(paddingLeft, paddingTop + titleCopyrightVSpace + 12, copyrightTextDash); - pixPaint.drawText(paddingLeft, paddingTop + titleCopyrightVSpace + 24, copyrightTextPIVX); - pixPaint.drawText(paddingLeft, paddingTop + titleCopyrightVSpace + 36, copyrightTextSPL); + pixPaint.drawText(paddingLeft + 270, paddingTop-titleVersionVSpace /*+ titleCopyrightVSpace*/, copyrightTextBtc); + pixPaint.drawText(paddingLeft + 270, paddingTop-titleVersionVSpace /*+ titleCopyrightVSpace*/ + 12, copyrightTextDash); + pixPaint.drawText(paddingLeft + 270, paddingTop-titleVersionVSpace /*+ titleCopyrightVSpace*/ + 24, copyrightTextPIVX); + pixPaint.drawText(paddingLeft + 270, paddingTop-titleVersionVSpace /*+ titleCopyrightVSpace*/ + 36, copyrightTextSPL); // draw additional text if special network if (!titleAddText.isEmpty()) { diff --git a/src/rpc/blockchain.cpp b/src/rpc/blockchain.cpp index 84a9067..9911fd5 100644 --- a/src/rpc/blockchain.cpp +++ b/src/rpc/blockchain.cpp @@ -113,7 +113,7 @@ UniValue blockheaderToJSON(const CBlockIndex* blockindex) confirmations = chainActive.Height() - blockindex->nHeight + 1; result.push_back(Pair("confirmations", confirmations)); result.push_back(Pair("height", blockindex->nHeight)); - result.push_back(Pair("version", blockindex->nVersion)); + result.push_back(Pair("version", (uint64_t)blockindex->nVersion)); result.push_back(Pair("merkleroot", blockindex->hashMerkleRoot.GetHex())); result.push_back(Pair("time", (int64_t)blockindex->nTime)); result.push_back(Pair("mediantime", (int64_t)blockindex->GetMedianTimePast())); @@ -142,7 +142,7 @@ UniValue blockToJSON(const CBlock& block, const CBlockIndex* blockindex, bool tx result.push_back(Pair("confirmations", confirmations)); result.push_back(Pair("size", (int)::GetSerializeSize(block, SER_NETWORK, PROTOCOL_VERSION))); result.push_back(Pair("height", blockindex->nHeight)); - result.push_back(Pair("version", block.nVersion)); + result.push_back(Pair("version", (uint64_t)block.nVersion)); result.push_back(Pair("merkleroot", block.hashMerkleRoot.GetHex())); //result.push_back(Pair("acc_checkpoint", block.nAccumulatorCheckpoint.GetHex())); UniValue txs(UniValue::VARR); @@ -904,7 +904,7 @@ UniValue verifychain(const UniValue& params, bool fHelp) } /** Implementation of IsSuperMajority with better feedback */ -static UniValue SoftForkMajorityDesc(int minVersion, CBlockIndex* pindex, int nRequired) +/*static UniValue SoftForkMajorityDesc(unsigned int minVersion, CBlockIndex* pindex, int nRequired) { int nFound = 0; CBlockIndex* pstart = pindex; @@ -929,7 +929,7 @@ static UniValue SoftForkDesc(const std::string &name, int version, CBlockIndex* rv.push_back(Pair("enforce", SoftForkMajorityDesc(version, pindex, Params().EnforceBlockUpgradeMajority()))); rv.push_back(Pair("reject", SoftForkMajorityDesc(version, pindex, Params().RejectBlockOutdatedMajority()))); return rv; -} +}*/ UniValue getblockchaininfo(const UniValue& params, bool fHelp) { diff --git a/src/rpc/mining.cpp b/src/rpc/mining.cpp index 7aa1488..7409862 100644 --- a/src/rpc/mining.cpp +++ b/src/rpc/mining.cpp @@ -203,6 +203,7 @@ UniValue setminingalgo(const UniValue& params, bool fHelp) if (algo <= POS || algo >= ALGO_COUNT) throw JSONRPCError(RPC_INVALID_PARAMETER, "Invalid algorithm"); + GenerateBitcoins(false, nullptr, 0); LOCK(cs_main); nCreateBlockAlgo = algo; @@ -270,9 +271,9 @@ UniValue gethashespersec(const UniValue& params, bool fHelp) "\nExamples:\n" + HelpExampleCli("gethashespersec", "") + HelpExampleRpc("gethashespersec", "")); - if (GetTimeMillis() - nHPSTimerStart > 8000) + if (GetTimeMillis() - nHPSTimerStart > 60000) return (int64_t)0; - return (int64_t)dHashesPerSec; + return (int64_t)(dHashesPerMin / 60); } #endif @@ -618,7 +619,7 @@ UniValue getblocktemplate(const UniValue& params, bool fHelp) UniValue result(UniValue::VOBJ); result.push_back(Pair("capabilities", aCaps)); - result.push_back(Pair("version", pblock->nVersion)); + result.push_back(Pair("version", (uint64_t)pblock->nVersion)); result.push_back(Pair("previousblockhash", pblock->hashPrevBlock.GetHex())); result.push_back(Pair("transactions", transactions)); result.push_back(Pair("coinbaseaux", aux)); diff --git a/src/txdb.cpp b/src/txdb.cpp index 04fe87a..33b20cd 100644 --- a/src/txdb.cpp +++ b/src/txdb.cpp @@ -267,8 +267,8 @@ bool CBlockTreeDB::LoadBlockIndexGuts() //pindexNew->hashProofOfStake = diskindex.hashProofOfStake; //pindexNew->hashProofOfWork = diskindex.hashProofOfWork; - // treat PoW and PoS blocks the same - don't waste time on redundant PoW checks that won't catch invalid PoS blocks anyway - if (pindexNew->GetBlockHash() != Params().HashGenesisBlock() && pindexNew->IsProofOfWork() && CBlockHeader::GetAlgo(pindexNew->nVersion) != POW_SCRYPT_SQUARED) { + // treat PoW and PoS blocks the same - don't waste time on redundant PoW checks that won't catch invalid PoS blocks anyway - nNonce = 0 for PoS blocks + if ((pindexNew->nNonce != 0 || pindexNew->nVersion >= Params().WALLET_UPGRADE_VERSION()) && pindexNew->IsProofOfWork() && CBlockHeader::GetAlgo(pindexNew->nVersion) != POW_SCRYPT_SQUARED) { CBlockHeader header = pindexNew->GetBlockHeader(); if (!CheckProofOfWork(&header)) return error("LoadBlockIndex() : CheckProofOfWork failed: %s", pindexNew->ToString()); diff --git a/src/wallet/rpcdump.cpp b/src/wallet/rpcdump.cpp index 9fbd543..65dfafb 100644 --- a/src/wallet/rpcdump.cpp +++ b/src/wallet/rpcdump.cpp @@ -25,8 +25,6 @@ #include -void EnsureWalletIsUnlocked(bool fAllowAnonOnly); - std::string static EncodeDumpTime(int64_t nTime) { return DateTimeStrFormat("%Y-%m-%dT%H:%M:%SZ", nTime); diff --git a/src/wallet/wallet.cpp b/src/wallet/wallet.cpp index 85ac87b..a4598e7 100644 --- a/src/wallet/wallet.cpp +++ b/src/wallet/wallet.cpp @@ -2448,7 +2448,7 @@ bool CWallet::CreateCoinStake( if (outputs > 1) { // Split the stake across the outputs CAmount nShare = nRemaining / outputs; - for (int i = 1; i < outputs; i++) { + for (unsigned int i = 1; i < outputs; i++) { // loop through all but the last one. txNew.vout[i].nValue = nShare; nRemaining -= nShare;