From 3956fb1bf32d00d922cada6aa2c3ae224beb774d Mon Sep 17 00:00:00 2001 From: Oleg Babin Date: Sat, 30 Oct 2021 03:28:47 +0300 Subject: [PATCH] fix use-after-free in producer/consumer destructors Before this patch it was assumed that between event queues destroying and producer/consumer close there were no any event. However it's not so and tests catched it: ``` Process 46864 stopped * thread #4, name = 'coio', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT) frame #0: 0x00007fff203fed86 libsystem_pthread.dylib`pthread_mutex_lock + 4 libsystem_pthread.dylib`pthread_mutex_lock: -> 0x7fff203fed86 <+4>: cmpq $0x4d55545a, (%rdi) ; imm = 0x4D55545A 0x7fff203fed8d <+11>: jne 0x7fff203fee02 ; <+128> 0x7fff203fed8f <+13>: movl 0xc(%rdi), %eax 0x7fff203fed92 <+16>: movl %eax, %ecx Target 0: (tarantool) stopped. (lldb) bt * thread #4, name = 'coio', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT) * frame #0: 0x00007fff203fed86 libsystem_pthread.dylib`pthread_mutex_lock + 4 frame #1: 0x0000000102fdbba5 tntkafka.dylib`queue_push(queue=0x6d726554203a5d70, value=0x0000000102c1e0c0) at queue.c:92:5 [opt] frame #2: 0x0000000102fd7f9a tntkafka.dylib`log_callback(rd_kafka=, level=7, fac="DESTROY", buf=) at callbacks.c:56:17 [opt] frame #3: 0x000000011e5456f3 librdkafka.1.dylib`rd_kafka_log0 + 563 frame #4: 0x000000011e5469a0 librdkafka.1.dylib`rd_kafka_destroy_app + 832 frame #5: 0x0000000102fdb3be tntkafka.dylib`wait_producer_destroy(args=) at producer.c:414:5 [opt] frame #6: 0x000000010015d81e tarantool`coio_on_call + 23 frame #7: 0x00000001003019a7 tarantool`etp_proc + 395 frame #8: 0x00007fff204038fc libsystem_pthread.dylib`_pthread_start + 224 frame #9: 0x00007fff203ff443 libsystem_pthread.dylib`thread_start + 15 ``` To fix this issue let's reoreder code a bit and start to close producer/consumer before event queues destruction. This patch seems to help to solve an issue. Also I'd want to highlight that not all destructors should be placed after wait_producer_destroy/wait_consumer_destroy it seems topics they owns should be destroyed before. Otherwise it leads to an assertion. Also small code changes were performed to adjust code to Tarantool code-style and to make consumer_destroy and producer_destroy more consistent in sence of return values. Closes #44 --- kafka/consumer.c | 29 ++++++++++++++--------------- kafka/producer.c | 27 +++++++++++++-------------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/kafka/consumer.c b/kafka/consumer.c index 5f68b4c..8c6ed10 100644 --- a/kafka/consumer.c +++ b/kafka/consumer.c @@ -532,31 +532,30 @@ wait_consumer_destroy(va_list args) { return 0; } -static rd_kafka_resp_err_t +void consumer_destroy(struct lua_State *L, consumer_t *consumer) { - rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR; - - if (consumer->topics != NULL) { + if (consumer->topics != NULL) rd_kafka_topic_partition_list_destroy(consumer->topics); - } - - if (consumer->poller != NULL) { - destroy_consumer_poller(consumer->poller); - } - - if (consumer->event_queues != NULL) { - destroy_event_queues(L, consumer->event_queues); - } + /* + * Here we close consumer and only then destroys other stuff. + * Otherwise raise condition is possible when e.g. + * event queue is destroyed but consumer still receives logs, errors, etc. + * Only topics should be destroyed. + */ if (consumer->rd_consumer != NULL) { /* Destroy handle */ // FIXME: kafka_destroy hangs forever coio_call(wait_consumer_destroy, consumer->rd_consumer); } - free(consumer); + if (consumer->poller != NULL) + destroy_consumer_poller(consumer->poller); - return err; + if (consumer->event_queues != NULL) + destroy_event_queues(L, consumer->event_queues); + + free(consumer); } int diff --git a/kafka/producer.c b/kafka/producer.c index 4334f0f..bcd1a67 100644 --- a/kafka/producer.c +++ b/kafka/producer.c @@ -417,27 +417,26 @@ wait_producer_destroy(va_list args) { void destroy_producer(struct lua_State *L, producer_t *producer) { - if (producer->rd_producer != NULL) { - coio_call(producer_flush, producer->rd_producer); - } - - if (producer->poller != NULL) { - destroy_producer_poller(producer->poller); - } - - if (producer->topics != NULL) { + if (producer->topics != NULL) destroy_producer_topics(producer->topics); - } - - if (producer->event_queues != NULL) { - destroy_event_queues(L, producer->event_queues); - } + /* + * Here we close producer and only then destroys other stuff. + * Otherwise raise condition is possible when e.g. + * event queue is destroyed but producer still receives logs, errors, etc. + * Only topics should be destroyed. + */ if (producer->rd_producer != NULL) { /* Destroy handle */ coio_call(wait_producer_destroy, producer->rd_producer); } + if (producer->poller != NULL) + destroy_producer_poller(producer->poller); + + if (producer->event_queues != NULL) + destroy_event_queues(L, producer->event_queues); + free(producer); }