From ab4b0bf1bfc4aea53a18b5cc4667219fba2fe15f Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Fri, 3 May 2024 08:42:10 +0300 Subject: [PATCH] threads_win: fix build error with VS2010 x86 InterlockedAnd64 and InterlockedAdd64 are not available on VS2010 x86. We already have implemented replacements for other functions, such as InterlockedOr64. Apply the same approach to fix the errors. A CRYPTO_RWLOCK rw_lock is added to rcu_lock_st. Replace InterlockedOr64 and InterlockedOr with CRYPTO_atomic_load and CRYPTO_atomic_load_int, using the existing design pattern. Add documentation and tests for the new atomic functions CRYPTO_atomic_add64, CRYPTO_atomic_and Fixes: libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedAdd64 referenced in function _get_hold_current_qp libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedOr referenced in function _get_hold_current_qp libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedAnd64 referenced in function _update_qp libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedOr64 referenced in function _ossl_synchronize_rcu Signed-off-by: Georgi Valkov --- crypto/threads_none.c | 18 ++++++++ crypto/threads_pthread.c | 52 +++++++++++++++++++++ crypto/threads_win.c | 72 +++++++++++++++++++++++++---- doc/man3/CRYPTO_THREAD_run_once.pod | 27 ++++++++++- include/openssl/crypto.h.in | 4 ++ test/threadstest.c | 46 ++++++++++++++++++ util/libcrypto.num | 2 + 7 files changed, 210 insertions(+), 11 deletions(-) diff --git a/crypto/threads_none.c b/crypto/threads_none.c index e0387650fece80..b3c4442264240f 100644 --- a/crypto/threads_none.c +++ b/crypto/threads_none.c @@ -210,6 +210,24 @@ int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) return 1; } +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ + *val += op; + *ret = *val; + + return 1; +} + +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ + *val &= op; + *ret = *val; + + return 1; +} + int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock) { diff --git a/crypto/threads_pthread.c b/crypto/threads_pthread.c index 8e411671d9f76c..bcf57df4fa6047 100644 --- a/crypto/threads_pthread.c +++ b/crypto/threads_pthread.c @@ -870,6 +870,58 @@ int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) return 1; } +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +# if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) + if (__atomic_is_lock_free(sizeof(*val), val)) { + *ret = __atomic_add_fetch(val, op, __ATOMIC_ACQ_REL); + return 1; + } +# elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) + /* This will work for all future Solaris versions. */ + if (ret != NULL) { + *ret = atomic_add_64_nv(val, op); + return 1; + } +# endif + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val += op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +} + +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +# if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) + if (__atomic_is_lock_free(sizeof(*val), val)) { + *ret = __atomic_and_fetch(val, op, __ATOMIC_ACQ_REL); + return 1; + } +# elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) + /* This will work for all future Solaris versions. */ + if (ret != NULL) { + *ret = atomic_and_64_nv(val, op); + return 1; + } +# endif + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val &= op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +} + int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock) { diff --git a/crypto/threads_win.c b/crypto/threads_win.c index a36e3752f655fb..a6d3ce2db517e2 100644 --- a/crypto/threads_win.c +++ b/crypto/threads_win.c @@ -103,6 +103,7 @@ struct rcu_lock_st { CRYPTO_CONDVAR *alloc_signal; CRYPTO_MUTEX *prior_lock; CRYPTO_CONDVAR *prior_signal; + CRYPTO_RWLOCK *rw_lock; }; static struct rcu_qp *allocate_new_qp_group(struct rcu_lock_st *lock, @@ -132,6 +133,7 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) return NULL; new->ctx = ctx; + new->rw_lock = CRYPTO_THREAD_lock_new(); new->write_lock = ossl_crypto_mutex_new(); new->alloc_signal = ossl_crypto_condvar_new(); new->prior_signal = ossl_crypto_condvar_new(); @@ -143,7 +145,9 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) || new->prior_signal == NULL || new->write_lock == NULL || new->alloc_lock == NULL - || new->prior_lock == NULL) { + || new->prior_lock == NULL + || new->rw_lock == NULL) { + CRYPTO_THREAD_lock_free(new->rw_lock); OPENSSL_free(new->qp_group); ossl_crypto_condvar_free(&new->alloc_signal); ossl_crypto_condvar_free(&new->prior_signal); @@ -159,6 +163,7 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) { + CRYPTO_THREAD_lock_free(lock->rw_lock); OPENSSL_free(lock->qp_group); ossl_crypto_condvar_free(&lock->alloc_signal); ossl_crypto_condvar_free(&lock->prior_signal); @@ -171,14 +176,20 @@ void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) static ossl_inline struct rcu_qp *get_hold_current_qp(CRYPTO_RCU_LOCK *lock) { uint32_t qp_idx; + uint32_t tmp; + uint64_t tmp64; /* get the current qp index */ for (;;) { - qp_idx = InterlockedOr(&lock->reader_idx, 0); - InterlockedAdd64(&lock->qp_group[qp_idx].users, VAL_READER); - if (qp_idx == InterlockedOr(&lock->reader_idx, 0)) + CRYPTO_atomic_load_int(&lock->reader_idx, (int *)&qp_idx, + lock->rw_lock); + CRYPTO_atomic_add64(&lock->qp_group[qp_idx].users, VAL_READER, &tmp64, + lock->rw_lock); + CRYPTO_atomic_load_int(&lock->reader_idx, (int *)&tmp, lock->rw_lock); + if (qp_idx == tmp) break; - InterlockedAdd64(&lock->qp_group[qp_idx].users, -VAL_READER); + CRYPTO_atomic_add64(&lock->qp_group[qp_idx].users, -VAL_READER, &tmp64, + lock->rw_lock); } return &lock->qp_group[qp_idx]; @@ -253,7 +264,9 @@ void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock) if (data->thread_qps[i].lock == lock) { data->thread_qps[i].depth--; if (data->thread_qps[i].depth == 0) { - ret = InterlockedAdd64(&data->thread_qps[i].qp->users, -VAL_READER); + CRYPTO_atomic_add64(&data->thread_qps[i].qp->users, + -VAL_READER, (uint64_t *)&ret, + lock->rw_lock); OPENSSL_assert(ret >= 0); data->thread_qps[i].qp = NULL; data->thread_qps[i].lock = NULL; @@ -268,6 +281,7 @@ static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) uint64_t new_id; uint32_t current_idx; uint32_t tmp; + uint64_t tmp64; ossl_crypto_mutex_lock(lock->alloc_lock); /* @@ -291,8 +305,10 @@ static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) lock->id_ctr++; new_id = VAL_ID(new_id); - InterlockedAnd64(&lock->qp_group[current_idx].users, ID_MASK); - InterlockedAdd64(&lock->qp_group[current_idx].users, new_id); + CRYPTO_atomic_and(&lock->qp_group[current_idx].users, ID_MASK, &tmp64, + lock->rw_lock); + CRYPTO_atomic_add64(&lock->qp_group[current_idx].users, new_id, &tmp64, + lock->rw_lock); /* update the reader index to be the prior qp */ tmp = lock->current_alloc_idx; @@ -327,7 +343,7 @@ void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock) /* wait for the reader count to reach zero */ do { - count = InterlockedOr64(&qp->users, 0); + CRYPTO_atomic_load(&qp->users, &count, lock->rw_lock); } while (READER_COUNT(count) != 0); /* retire in order */ @@ -558,6 +574,44 @@ int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) return 1; } +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +#if (defined(NO_INTERLOCKEDOR64)) + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val += op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +#else + *ret = (uint64_t)InterlockedAdd64((LONG64 volatile *)val, (LONG64)op); + return 1; +#endif +} + +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +#if (defined(NO_INTERLOCKEDOR64)) + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val &= op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +#else + *ret = (uint64_t)InterlockedAnd64((LONG64 volatile *)val, (LONG64)op) & op; + return 1; +#endif +} + int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock) { diff --git a/doc/man3/CRYPTO_THREAD_run_once.pod b/doc/man3/CRYPTO_THREAD_run_once.pod index 4fb3774484c3b9..4c1d9d701faf6f 100644 --- a/doc/man3/CRYPTO_THREAD_run_once.pod +++ b/doc/man3/CRYPTO_THREAD_run_once.pod @@ -5,8 +5,8 @@ CRYPTO_THREAD_run_once, CRYPTO_THREAD_lock_new, CRYPTO_THREAD_read_lock, CRYPTO_THREAD_write_lock, CRYPTO_THREAD_unlock, CRYPTO_THREAD_lock_free, -CRYPTO_atomic_add, CRYPTO_atomic_or, CRYPTO_atomic_load, CRYPTO_atomic_store, -CRYPTO_atomic_load_int, +CRYPTO_atomic_add, CRYPTO_atomic_add64, CRYPTO_atomic_and, CRYPTO_atomic_or, +CRYPTO_atomic_load, CRYPTO_atomic_store, CRYPTO_atomic_load_int, OSSL_set_max_threads, OSSL_get_max_threads, OSSL_get_thread_support_flags, OSSL_THREAD_SUPPORT_FLAG_THREAD_POOL, OSSL_THREAD_SUPPORT_FLAG_DEFAULT_SPAWN - OpenSSL thread support @@ -25,6 +25,10 @@ OSSL_THREAD_SUPPORT_FLAG_DEFAULT_SPAWN - OpenSSL thread support void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock); int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock); + int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); + int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock); int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock); @@ -94,6 +98,25 @@ supported and I is NULL, then the function will fail. =item * +CRYPTO_atomic_add64() atomically adds I to I<*val> and returns the +result of the operation in I<*ret>. I will be locked, unless atomic +operations are supported on the specific platform. Because of this, if a +variable is modified by CRYPTO_atomic_add64() then CRYPTO_atomic_add64() must +be the only way that the variable is modified. If atomic operations are not +supported and I is NULL, then the function will fail. + +=item * + +CRYPTO_atomic_and() performs an atomic bitwise and of I and I<*val> and stores +the result back in I<*val>. It also returns the result of the operation in +I<*ret>. I will be locked, unless atomic operations are supported on the +specific platform. Because of this, if a variable is modified by +CRYPTO_atomic_and() or read by CRYPTO_atomic_load() then CRYPTO_atomic_and() must +be the only way that the variable is modified. If atomic operations are not +supported and I is NULL, then the function will fail. + +=item * + CRYPTO_atomic_or() performs an atomic bitwise or of I and I<*val> and stores the result back in I<*val>. It also returns the result of the operation in I<*ret>. I will be locked, unless atomic operations are supported on the diff --git a/include/openssl/crypto.h.in b/include/openssl/crypto.h.in index 034f150cb65cec..c492b9c7c25c52 100644 --- a/include/openssl/crypto.h.in +++ b/include/openssl/crypto.h.in @@ -86,6 +86,10 @@ int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock); void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock); int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock); int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock); diff --git a/test/threadstest.c b/test/threadstest.c index 2d05255132d3b6..58ea025339b258 100644 --- a/test/threadstest.c +++ b/test/threadstest.c @@ -654,6 +654,52 @@ static int test_atomic(void) || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) goto err; + ret64 = 0; + + if (CRYPTO_atomic_and(&val64, 5, &ret64, NULL)) { + /* This succeeds therefore we're on a platform with lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 1) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + } else { + /* This failed therefore we're on a platform without lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 3) + || !TEST_int_eq((unsigned int)ret64, 0)) + goto err; + } + val64 = 3; + ret64 = 0; + + if (!TEST_true(CRYPTO_atomic_and(&val64, 5, &ret64, lock))) + goto err; + + if (!TEST_uint_eq((unsigned int)val64, 1) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + + ret64 = 0; + + if (CRYPTO_atomic_add64(&val64, 2, &ret64, NULL)) { + /* This succeeds therefore we're on a platform with lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 3) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + } else { + /* This failed therefore we're on a platform without lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 1) + || !TEST_int_eq((unsigned int)ret64, 0)) + goto err; + } + val64 = 1; + ret64 = 0; + + if (!TEST_true(CRYPTO_atomic_add64(&val64, 2, &ret64, lock))) + goto err; + + if (!TEST_uint_eq((unsigned int)val64, 3) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + testresult = 1; err: CRYPTO_THREAD_lock_free(lock); diff --git a/util/libcrypto.num b/util/libcrypto.num index a5f9bb32cb17fa..08c7fd46b431ff 100644 --- a/util/libcrypto.num +++ b/util/libcrypto.num @@ -5646,3 +5646,5 @@ OSSL_IETF_ATTR_SYNTAX_print ? 3_4_0 EXIST::FUNCTION: X509_ACERT_add_attr_nconf ? 3_4_0 EXIST::FUNCTION: OSSL_LIB_CTX_get_conf_diagnostics ? 3_4_0 EXIST::FUNCTION: OSSL_LIB_CTX_set_conf_diagnostics ? 3_4_0 EXIST::FUNCTION: +CRYPTO_atomic_add64 ? 3_4_0 EXIST::FUNCTION: +CRYPTO_atomic_and ? 3_4_0 EXIST::FUNCTION: