Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race condition issues between the main thread and module threads #12817

Merged
merged 41 commits into from Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
ecdf99c
Fix race condition issues between the main thread and module threads
sundb Nov 28, 2023
716ef44
Let RM_Reply*() and RM_UnblockClient() inside GIL
sundb Dec 4, 2023
6e4de39
Add `from_module` paramter to `moduleBlockedClientTimeOut()` to make …
sundb Dec 5, 2023
c09ff05
cleanup
sundb Dec 5, 2023
152e7f2
Make sure don't touch anything before acquire GIL
sundb Dec 6, 2023
e6a08af
cleanup
sundb Dec 6, 2023
49937df
Upwaord the free of bg
sundb Dec 8, 2023
761f5dd
Merge branch 'unstable' into thread-safe-freeclient
sundb Dec 8, 2023
15db3fc
Fix adding fake client to server.clients_pending_write
sundb Dec 12, 2023
14c0aa9
Add c->conn null assertion for updateClientMemoryUsage() and updateCl…
sundb Dec 12, 2023
1bad868
improve comment
sundb Dec 12, 2023
4b48f42
lock the ae poll block and cache modules size
sundb Dec 16, 2023
dfb5665
Added __thread specifier for ProcessingEventsWhileBlocked
sundb Dec 18, 2023
f9c27e9
Optimize el mutex variable names, and add comments
sundb Dec 19, 2023
0f1e787
Add comment for modules_count
sundb Dec 19, 2023
d5ded67
Remove unnecessary line and update comment
sundb Dec 19, 2023
07f8de1
update comment
sundb Dec 19, 2023
0741699
change lock_ae to lock_el
sundb Dec 19, 2023
1ed674f
update comment
sundb Dec 19, 2023
eeda5e6
Add comment about freeing argvs in module thread
sundb Dec 20, 2023
eaa21da
Merge branch 'unstable' into thread-safe-freeclient
sundb Jan 7, 2024
e26eeb9
Revert some codes after #12905
sundb Jan 7, 2024
c660f6e
Remove unnecessary change
sundb Jan 7, 2024
d272368
Call RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTi…
sundb Jan 9, 2024
70b78ae
Add comment about thread safe for RM_FreeString, RM_RetainString and …
sundb Jan 9, 2024
8bc235f
cleanup
sundb Jan 9, 2024
496d0db
Merge branch 'unstable' into thread-safe-freeclient
sundb Jan 9, 2024
a47cba0
Revert "Call RM_BlockedClientMeasureTimeStart() and RM_BlockedClientM…
sundb Jan 10, 2024
7be865f
Terminate module block thread first in timeout callback
sundb Jan 10, 2024
54524ca
Revert "Terminate module block thread first in timeout callback"
sundb Jan 10, 2024
17efc14
Avoid calling RM_BlockedClientMeasureTimeEnd in the main thread
sundb Jan 11, 2024
54cdf01
Indent
sundb Jan 11, 2024
588eff4
Revert "Indent"
sundb Jan 12, 2024
cd8b6a3
Revert "Avoid calling RM_BlockedClientMeasureTimeEnd in the main thread"
sundb Jan 12, 2024
0bc0796
Using local lock instead of GIL to protect RM_BlockedClientMeasureTim…
sundb Jan 12, 2024
a06d7dd
Add comments
sundb Jan 12, 2024
d9c18af
fix crash
sundb Jan 12, 2024
42de713
Add new method blockClientPrivdataInit
sundb Jan 14, 2024
393af9e
Add api comment for RM_BlockedClientMeasureTimeStart() and RM_Blocked…
sundb Jan 14, 2024
70d5348
Update src/module.c
sundb Jan 19, 2024
1359fad
Update comments about retained string
sundb Jan 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/blocked.c
Expand Up @@ -239,7 +239,7 @@ void replyToBlockedClientTimedOut(client *c) {
addReplyLongLong(c,server.fsynced_reploff >= c->bstate.reploffset);
addReplyLongLong(c,replicationCountAOFAcksByOffset(c->bstate.reploffset));
} else if (c->bstate.btype == BLOCKED_MODULE) {
moduleBlockedClientTimedOut(c);
moduleBlockedClientTimedOut(c, 0);
} else {
serverPanic("Unknown btype in replyToBlockedClientTimedOut().");
}
Expand Down
61 changes: 46 additions & 15 deletions src/module.c
Expand Up @@ -306,7 +306,6 @@ static size_t moduleTempClientMinCount = 0; /* Min client count in pool since
* allow thread safe contexts to execute commands at a safe moment. */
static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER;


/* Function pointer type for keyspace event notification subscriptions from modules. */
typedef int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key);

Expand Down Expand Up @@ -2338,7 +2337,10 @@ ustime_t RM_CachedMicroseconds(void) {
* Within the same command, you can call multiple times
* RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd()
* to accumulate independent time intervals to the background duration.
* This method always return REDISMODULE_OK. */
* This method always return REDISMODULE_OK.
*
* This function is not thread safe, If used in module thread and blocked callback (possibly main thread)
* simultaneously, it's recommended to protect them with lock owned by caller instead of GIL. */
int RM_BlockedClientMeasureTimeStart(RedisModuleBlockedClient *bc) {
elapsedStart(&(bc->background_timer));
return REDISMODULE_OK;
Expand All @@ -2348,7 +2350,10 @@ int RM_BlockedClientMeasureTimeStart(RedisModuleBlockedClient *bc) {
* to calculate the elapsed execution time.
* On success REDISMODULE_OK is returned.
* This method only returns REDISMODULE_ERR if no start time was
* previously defined ( meaning RM_BlockedClientMeasureTimeStart was not called ). */
* previously defined ( meaning RM_BlockedClientMeasureTimeStart was not called ).
*
* This function is not thread safe, If used in module thread and blocked callback (possibly main thread)
* simultaneously, it's recommended to protect them with lock owned by caller instead of GIL. */
int RM_BlockedClientMeasureTimeEnd(RedisModuleBlockedClient *bc) {
// If the counter is 0 then we haven't called RM_BlockedClientMeasureTimeStart
if (!bc->background_timer)
Expand Down Expand Up @@ -2717,7 +2722,10 @@ RedisModuleString *RM_CreateStringFromStreamID(RedisModuleCtx *ctx, const RedisM
* pass ctx as NULL when releasing the string (but passing a context will not
* create any issue). Strings created with a context should be freed also passing
* the context, so if you want to free a string out of context later, make sure
* to create it using a NULL context. */
* to create it using a NULL context.
*
* This API is not thread safe, access to these retained strings (if they originated
* from a client command arguments) must be done with GIL locked. */
void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
decrRefCount(str);
if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str);
Expand Down Expand Up @@ -2754,7 +2762,10 @@ void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
*
* Threaded modules that reference retained strings from other threads *must*
* explicitly trim the allocation as soon as the string is retained. Not doing
* so may result with automatic trimming which is not thread safe. */
* so may result with automatic trimming which is not thread safe.
*
* This API is not thread safe, access to these retained strings (if they originated
* from a client command arguments) must be done with GIL locked. */
void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
if (ctx == NULL || !autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str)) {
/* Increment the string reference counting only if we can't
Expand Down Expand Up @@ -2796,7 +2807,10 @@ void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
*
* Threaded modules that reference held strings from other threads *must*
* explicitly trim the allocation as soon as the string is held. Not doing
* so may result with automatic trimming which is not thread safe. */
* so may result with automatic trimming which is not thread safe.
*
* This API is not thread safe, access to these retained strings (if they originated
* from a client command arguments) must be done with GIL locked. */
RedisModuleString* RM_HoldString(RedisModuleCtx *ctx, RedisModuleString *str) {
if (str->refcount == OBJ_STATIC_REFCOUNT) {
return RM_CreateStringFromString(ctx, str);
Expand Down Expand Up @@ -8228,7 +8242,7 @@ int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) {
* argument, but better to be safe than sorry. */
if (bc->timeout_callback == NULL) return REDISMODULE_ERR;
if (bc->unblocked) return REDISMODULE_OK;
if (bc->client) moduleBlockedClientTimedOut(bc->client);
if (bc->client) moduleBlockedClientTimedOut(bc->client, 1);
}
moduleUnblockClientByHandle(bc,privdata);
return REDISMODULE_OK;
Expand Down Expand Up @@ -8327,8 +8341,10 @@ void moduleHandleBlockedClients(void) {
* This needs to be out of the reply callback above given that a
* module might not define any callback and still do blocking ops.
*/
if (c && !clientHasModuleAuthInProgress(c) && !bc->blocked_on_keys) {
updateStatsOnUnblock(c, bc->background_duration, reply_us, server.stat_total_error_replies != prev_error_replies);
if (c && !clientHasModuleAuthInProgress(c)) {
int had_errors = c->deferred_reply_errors ? !!listLength(c->deferred_reply_errors) :
(server.stat_total_error_replies != prev_error_replies);
updateStatsOnUnblock(c, bc->background_duration, reply_us, had_errors);
Comment on lines +8344 to +8347
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

following #12817 (comment)
we call updateStatsOnUnblock() here when from RM_UnblockClient().

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it mean that before this PR it was called twice?
maybe we should add a comment in moduleBlockedClientTimedOut, explaining the if statement by referring to this call.

Copy link
Collaborator Author

@sundb sundb Dec 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before this PR, moduleHandleBlockedClients() ignored updating the block status when the client was blocked on keys.

origin code:

if (c && !clientHasModuleAuthInProgress(c) && !bc->blocked_on_keys) {
    updateStatsOnUnblock();
}

}

if (c != NULL) {
Expand All @@ -8346,7 +8362,7 @@ void moduleHandleBlockedClients(void) {
* if there are pending replies here. This is needed since
* during a non blocking command the client may receive output. */
if (!clientHasModuleAuthInProgress(c) && clientHasPendingReplies(c) &&
!(c->flags & CLIENT_PENDING_WRITE))
!(c->flags & CLIENT_PENDING_WRITE) && c->conn)
{
c->flags |= CLIENT_PENDING_WRITE;
listLinkNodeHead(server.clients_pending_write, &c->clients_pending_write_node);
Expand Down Expand Up @@ -8381,8 +8397,15 @@ int moduleBlockedClientMayTimeout(client *c) {
/* Called when our client timed out. After this function unblockClient()
* is called, and it will invalidate the blocked client. So this function
* does not need to do any cleanup. Eventually the module will call the
* API to unblock the client and the memory will be released. */
void moduleBlockedClientTimedOut(client *c) {
* API to unblock the client and the memory will be released.
*
* If this function is called from a module, we handle the timeout callback
* and the update of the unblock status in a thread-safe manner to avoid race
* conditions with the main thread.
* If this function is called from the main thread, we must handle the unblocking
* of the client synchronously. This ensures that we can reply to the client before
* resetClient() is called. */
void moduleBlockedClientTimedOut(client *c, int from_module) {
RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;

/* Protect against re-processing: don't serve clients that are already
Expand All @@ -8391,14 +8414,22 @@ void moduleBlockedClientTimedOut(client *c) {
if (bc->unblocked) return;

RedisModuleCtx ctx;
moduleCreateContext(&ctx, bc->module, REDISMODULE_CTX_BLOCKED_TIMEOUT);
int flags = REDISMODULE_CTX_BLOCKED_TIMEOUT;
if (from_module) flags |= REDISMODULE_CTX_THREAD_SAFE;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm trying to figure out this change.
in the top comment i see this change (an addition of REDISMODULE_CTX_THREAD_SAFE) is listed together with the fact we don't call updateStatsOnUnblock (which is a different change in this function).

  1. can you please help me understand why it was needed.
  2. what are the other side effects of this change

Copy link
Collaborator Author

@sundb sundb Dec 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is because both of them are fixed to ensure that moduleBlockedClientTimedOut() is thread-safe.

The reason is:

When error reply is called in timeout_callback(), ctx is not thread-safe, eventually lead to race conditions in afterErrorReply.

If the ctx is not to be REDISMODULE_CTX_THREAD_SAFE, afterErrorReply() will be triggered if replying an error.
If REDISMODULE_CTX_THREAD_SAFE is used, all replies will be deferred.

redis/src/networking.c

Lines 501 to 508 in 27a8e3b

if (c->flags & CLIENT_MODULE) {
if (!c->deferred_reply_errors) {
c->deferred_reply_errors = listCreate();
listSetFreeMethod(c->deferred_reply_errors, (void (*)(void*))sdsfree);
}
listAddNodeTail(c->deferred_reply_errors, sdsnewlen(s, len));
return;
}

moduleCreateContext(&ctx, bc->module, flags);
ctx.client = bc->client;
ctx.blocked_client = bc;
ctx.blocked_privdata = bc->privdata;
long long prev_error_replies = server.stat_total_error_replies;

long long prev_error_replies;
if (!from_module)
prev_error_replies = server.stat_total_error_replies;

bc->timeout_callback(&ctx,(void**)c->argv,c->argc);
moduleFreeContext(&ctx);
updateStatsOnUnblock(c, bc->background_duration, 0, server.stat_total_error_replies != prev_error_replies);

if (!from_module)
updateStatsOnUnblock(c, bc->background_duration, 0, server.stat_total_error_replies != prev_error_replies);

/* For timeout events, we do not want to call the disconnect callback,
* because the blocked client will be automatically disconnected in
Expand Down
21 changes: 12 additions & 9 deletions src/networking.c
Expand Up @@ -414,8 +414,9 @@ void _addReplyToBufferOrList(client *c, const char *s, size_t len) {
* to a channel which we are subscribed to, then we wanna postpone that message to be added
* after the command's reply (specifically important during multi-exec). the exception is
* the SUBSCRIBE command family, which (currently) have a push message instead of a proper reply.
* The check for executing_client also avoids affecting push messages that are part of eviction. */
if (c == server.current_client && (c->flags & CLIENT_PUSHING) &&
* The check for executing_client also avoids affecting push messages that are part of eviction.
* Check CLIENT_PUSHING first to avoid race conditions, as it's absent in module's fake client. */
if ((c->flags & CLIENT_PUSHING) && c == server.current_client &&
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

start version: 7.2.0
introduced: #12326
reason: RM_Reply* is not thread-safe

WARNING: ThreadSanitizer: data race (pid=1969717)
  Write of size 8 at 0x558d01162be0 by main thread (mutexes: write M86, write M82, write M84, write M64):
    #0 processCommandAndResetClient /data/redis_fork/src/networking.c:2466 (redis-server+0xe1db4)
    #1 processInputBuffer /data/redis_fork/src/networking.c:2575 (redis-server+0xe1db4)
    #2 readQueryFromClient /data/redis_fork/src/networking.c:2715 (redis-server+0xe28da)
    #3 callHandler /data/redis_fork/src/connhelpers.h:79 (redis-server+0x2943f2)
    #4 connSocketEventHandler /data/redis_fork/src/socket.c:298 (redis-server+0x2943f2)
    #5 aeProcessEvents /data/redis_fork/src/ae.c:436 (redis-server+0x97245)
    #6 aeMain /data/redis_fork/src/ae.c:496 (redis-server+0x97245)
    #7 main /data/redis_fork/src/server.c:7212 (redis-server+0x846d5)

  Previous read of size 8 at 0x558d01162be0 by thread T14:
    #0 _addReplyToBufferOrList /data/redis_fork/src/networking.c:418 (redis-server+0xdc0d6)
    #1 addReplyProto /data/redis_fork/src/networking.c:474 (redis-server+0xdd0a7)
    #2 RM_ReplyWithCallReply /data/redis_fork/src/module.c:3424 (redis-server+0x212732)
    #3 bg_call_worker /data/redis_fork/tests/modules/blockedclient.c:145 (blockedclient.so+0x9c0d)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

like #12817 (comment), this is not about the use of RM_AddReply, it's about using the argv strings (changing their refcount).
p.s. how did you conclude it's related to the above mentioned PR?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they're not related.
this is because main thread many modify server.current_client when module thread read it.
however, c->flags & CLIENT_PUSHING is always false for module threads, so this is harmless.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sundb this one at the top comment (number 2), says:

Harm Level: Low

but if that's just an access to a variable and then ignoring what we read from it, isn't that "Harm Level: None"?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, it should be None.

server.executing_client && !cmdHasPushAsReply(server.executing_client->cmd))
{
_addReplyProtoToList(c,server.pending_push_messages,s,len);
Expand Down Expand Up @@ -1450,7 +1451,7 @@ void unlinkClient(client *c) {
listNode *ln;

/* If this is marked as current client unset it. */
if (server.current_client == c) server.current_client = NULL;
if (c->conn && server.current_client == c) server.current_client = NULL;

/* Certain operations must be done only if the client has an active connection.
* If the client was already unlinked or if it's a "fake client" the
Expand Down Expand Up @@ -1494,7 +1495,7 @@ void unlinkClient(client *c) {
}

/* Remove from the list of pending reads if needed. */
serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
serverAssert(!c->conn || io_threads_op == IO_THREADS_OP_IDLE);
if (c->pending_read_list_node != NULL) {
listDelNode(server.clients_pending_read,c->pending_read_list_node);
c->pending_read_list_node = NULL;
Expand Down Expand Up @@ -1649,6 +1650,12 @@ void freeClient(client *c) {
reqresReset(c, 1);
#endif

/* Remove the contribution that this client gave to our
* incrementally computed memory usage. */
if (c->conn)
server.stat_clients_type_memory[c->last_memory_type] -=
c->last_memory_usage;
Comment on lines +1653 to +1657
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

start version: 7.0.0
introduced: #8687
reason: touch server.stat_clients_type_memory without GIL

WARNING: ThreadSanitizer: data race (pid=90167)
  Write of size 8 at 0x0001006f41e0 by main thread (mutexes: write M0, write M1, write M2, write M3):
    #0 updateClientMemoryUsage server.c:956 (redis-server:arm64+0x10001f760)
    #1 clientsCron server.c:1116 (redis-server:arm64+0x1000201fc)
    #2 serverCron server.c:1451 (redis-server:arm64+0x100021b10)
    #3 processTimeEvents ae.c:331 (redis-server:arm64+0x1000100b8)
    #4 aeProcessEvents ae.c:466 (redis-server:arm64+0x10000f614)
    #5 aeMain ae.c:496 (redis-server:arm64+0x1000103bc)
    #6 main server.c:7212 (redis-server:arm64+0x10003e76c)

  Previous write of size 8 at 0x0001006f41e0 by thread T7:
    #0 freeClient networking.c:1684 (redis-server:arm64+0x10005f30c)
    #1 moduleFreeContext module.c:834 (redis-server:arm64+0x1001821ac)
    #2 RM_FreeThreadSafeContext module.c:8494 (redis-server:arm64+0x10019dcf8)
    #3 worker <null>:17631300 (blockedclient.so:arm64+0x828)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one is resolved by modifying the freeClient code, right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and make updateClientMemoryUsage() and clientEvictionAllowed() no longer keep track of non-conn user memory and whether eviction.
However, this has the side effect that server.stat_clients_type_memory[CLIENT_TYPE_NORMAL] will be lower than it was before this fix was made.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can't / shouldn't evict them anyway.
if we tracked them, it was wrong to do that.
we can list this as a fix (not about thread race) in the top comment.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've never been able to remember how I reproduced it.
I remember using RM_Call to reproduce it, but I forget which command, not the following client no-evict command.

Config:

maxmemory-clients 1g

Command:

RedisModule_Call(ctx,"client","cc","no-evict","off");

Patch:

int clientEvictionAllowed(client *c) {
    serverAssert(c->conn);
    if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT) {
        return 0;
    }
    int type = getClientType(c);
    return (type == CLIENT_TYPE_NORMAL || type == CLIENT_TYPE_PUBSUB);
}

serverAssert(c->conn); will be triggered.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please als have a look at #12817 (comment) and top comment(7).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think using RM_Call to call the CLIENT command is valid.
specifically the ones manipulating the current client, like enabling tracking, and so on.
the user may be wanting to operate on the calling client, not the fake client, but that's not currently supported, and i think we should just disallow or disregard this case.


/* Unlink the client: this will close the socket, remove the I/O
* handlers, and remove references of the client from different
* places where active clients may be referenced. */
Expand Down Expand Up @@ -1697,10 +1704,6 @@ void freeClient(client *c) {
* we lost the connection with the master. */
if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection();

/* Remove the contribution that this client gave to our
* incrementally computed memory usage. */
server.stat_clients_type_memory[c->last_memory_type] -=
c->last_memory_usage;
/* Remove client from memory usage buckets */
if (c->mem_usage_bucket) {
c->mem_usage_bucket->mem_usage_sum -= c->last_memory_usage;
Expand Down Expand Up @@ -2487,7 +2490,7 @@ int processCommandAndResetClient(client *c) {
commandProcessed(c);
/* Update the client's memory to include output buffer growth following the
* processed command. */
updateClientMemUsageAndBucket(c);
if (c->conn) updateClientMemUsageAndBucket(c);
Copy link
Collaborator Author

@sundb sundb Dec 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@oranagra this is another place to record the memory usage of fake clients.
when unblocking a fake client that blocks on keys, it triggers.

=== REDIS BUG REPORT START: Cut & paste starting from here ===
21845:M 12 Dec 2023 13:25:36.347 # === ASSERTION FAILED ===
21845:M 12 Dec 2023 13:25:36.347 # ==> server.c:1023 'io_threads_op == IO_THREADS_OP_IDLE && c->conn' is not true

------ STACK TRACE ------

Backtrace:
0   redis-server                        0x0000000102d73374 updateClientMemUsageAndBucket.cold.1 + 32
1   redis-server                        0x0000000102c425dc updateClientMemUsageAndBucket + 412
2   redis-server                        0x0000000102c6b604 processCommandAndResetClient + 60
3   redis-server                        0x0000000102ceed08 handleClientsBlockedOnKeys + 792
4   redis-server                        0x0000000102c48f60 processCommand + 2816
5   redis-server                        0x0000000102c6b864 processInputBuffer + 312
6   redis-server                        0x0000000102c635d4 readQueryFromClient + 1364
7   redis-server                        0x0000000102d36380 connSocketEventHandler + 220
8   redis-server                        0x0000000102c3b720 aeProcessEvents + 1100
9   redis-server                        0x0000000102c3b840 aeMain + 32
10  redis-server                        0x0000000102c4f998 main + 2036
11  dyld                                0x00000001908a5058 start + 2224

failed test:

    test "Blpop on async RM_Call fire and forget" {
        assert_equal {Blocked} [r do_rm_call_fire_and_forget blpop l 0]
        r lpush l a
        assert_equal {0} [r llen l]
    }

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think that's a bug, they're not "evictable", and the user (application) doesn't control them (and their amount).
we don't show them in CLIENT LIST, and i think we should not count their memory in that mechanism (maybe we should count them elsewhere)

}

if (server.current_client == NULL) deadclient = 1;
Expand Down
5 changes: 3 additions & 2 deletions src/server.c
Expand Up @@ -994,6 +994,7 @@ static inline clientMemUsageBucket *getMemUsageBucket(size_t mem) {
* usage bucket.
*/
void updateClientMemoryUsage(client *c) {
serverAssert(c->conn);
size_t mem = getClientMemoryUsage(c, NULL);
int type = getClientType(c);
/* Now that we have the memory used by the client, remove the old
Expand All @@ -1006,7 +1007,7 @@ void updateClientMemoryUsage(client *c) {
}

int clientEvictionAllowed(client *c) {
if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT) {
if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT || !c->conn) {
return 0;
}
int type = getClientType(c);
Expand Down Expand Up @@ -1046,7 +1047,7 @@ void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
* returns 1 if client eviction for this client is allowed, 0 otherwise.
*/
int updateClientMemUsageAndBucket(client *c) {
serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
serverAssert(io_threads_op == IO_THREADS_OP_IDLE && c->conn);
int allow_eviction = clientEvictionAllowed(c);
removeClientFromMemUsageBucket(c, allow_eviction);

Expand Down
2 changes: 1 addition & 1 deletion src/server.h
Expand Up @@ -2533,7 +2533,7 @@ void moduleFreeContext(struct RedisModuleCtx *ctx);
void moduleCallCommandUnblockedHandler(client *c);
void unblockClientFromModule(client *c);
void moduleHandleBlockedClients(void);
void moduleBlockedClientTimedOut(client *c);
void moduleBlockedClientTimedOut(client *c, int from_module);
void modulePipeReadable(aeEventLoop *el, int fd, void *privdata, int mask);
size_t moduleCount(void);
void moduleAcquireGIL(void);
Expand Down
15 changes: 8 additions & 7 deletions tests/modules/blockedclient.c
Expand Up @@ -102,6 +102,7 @@ typedef struct {

void *bg_call_worker(void *arg) {
bg_call_data *bg = arg;
RedisModuleBlockedClient *bc = bg->bc;

// Get Redis module context
RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bg->bc);
Expand Down Expand Up @@ -135,6 +136,12 @@ void *bg_call_worker(void *arg) {
RedisModuleCallReply *rep = RedisModule_Call(ctx, cmd, format, bg->argv + cmd_pos + 1, bg->argc - cmd_pos - 1);
RedisModule_FreeString(NULL, format_redis_str);

/* Free the arguments within GIL to prevent simultaneous freeing in main thread. */
for (int i=0; i<bg->argc; i++)
RedisModule_FreeString(ctx, bg->argv[i]);
RedisModule_Free(bg->argv);
RedisModule_Free(bg);

// Release GIL
RedisModule_ThreadSafeContextUnlock(ctx);

Expand All @@ -147,13 +154,7 @@ void *bg_call_worker(void *arg) {
}

// Unblock client
RedisModule_UnblockClient(bg->bc, NULL);

/* Free the arguments */
for (int i=0; i<bg->argc; i++)
RedisModule_FreeString(ctx, bg->argv[i]);
RedisModule_Free(bg->argv);
RedisModule_Free(bg);
RedisModule_UnblockClient(bc, NULL);

// Free the Redis module context
RedisModule_FreeThreadSafeContext(ctx);
Expand Down