Skip to content

Commit

Permalink
Event Loop shutdown stuck due to uncompleted connect (#Fixes 192)
Browse files Browse the repository at this point in the history
Motivation:

uncompleted CONNECT_SCHEDULE prevent force closing and issuing IORING_OP_CLOSE, causing a graceful shutdown
to complete its quiet period and ignore any later CONNECT completion (at the mercy of OS configuration for connect timeout)
due to already shutdown executor.
This will cause the executor to not be able to complete its cleanup because of unclosed channels and making it to hang awaiting
an already happened CQE.

Modification:

Allow force close to initiate a close despite the uncompleted CONNECT_SCHEDULE, allowing it to issue a IORING_OP_CLOSE.
When the FD close would complete, a later CQE of CONNECT_SCHEDULE on the closed FD will be ignored because its channel is already deregistered.

Result:

Prompt channel close on short timeout (regardless peer presence)
  • Loading branch information
franz1981 committed Feb 18, 2023
1 parent 6cbdf78 commit 018c251
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 1 deletion.
Expand Up @@ -336,7 +336,8 @@ abstract class AbstractUringUnsafe extends AbstractUnsafe {

@Override
public void close(ChannelPromise promise) {
if ((ioState & (WRITE_SCHEDULED | READ_SCHEDULED | CONNECT_SCHEDULED)) == 0) {
if ((ioState & (WRITE_SCHEDULED | READ_SCHEDULED)) == 0) {
// force close can ignore any CONNECT_SCHEDULED
forceClose(promise);
} else {
if (delayedClose == null || delayedClose.isVoid()) {
Expand Down
@@ -0,0 +1,86 @@
/*
* Copyright 2021 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.incubator.channel.uring;

import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;

import io.netty.bootstrap.Bootstrap;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelOption;
import io.netty.channel.ConnectTimeoutException;
import io.netty.channel.DefaultEventLoopGroup;
import io.netty.channel.SimpleChannelInboundHandler;
import io.netty.channel.socket.nio.NioSocketChannel;
import io.netty.testsuite.transport.TestsuitePermutation;
import io.netty.testsuite.transport.socket.AbstractClientSocketTest;
import org.hamcrest.MatcherAssert;
import org.hamcrest.core.IsInstanceOf;
import org.junit.Assert;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;
import org.junit.jupiter.api.Timeout;

import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.IsInstanceOf.instanceOf;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

public class IOUringClientSocketConnectionShortTimeoutTest extends AbstractClientSocketTest {

@BeforeAll
public static void loadJNI() {
assumeTrue(IOUring.isAvailable());
}

@Test
@Timeout(value = 30000, unit = TimeUnit.MILLISECONDS)
public void testConnectTimeoutAndClose(TestInfo testInfo) throws Throwable {
run(testInfo, (bootstrap) -> {
testFailedConnectWithSuperShortTimeout(bootstrap);
try {
bootstrap.config().group().shutdownGracefully().sync();
} catch (Throwable t) {
t.printStackTrace();
}
});
}

public void testFailedConnectWithSuperShortTimeout(Bootstrap cb) throws Throwable {
cb.handler(new SimpleChannelInboundHandler<Object>() {
@Override
public void channelRead0(ChannelHandlerContext ctx, Object msgs) {
// Nothing will be sent.
}
});
cb.option(ChannelOption.CONNECT_TIMEOUT_MILLIS, 1);
ChannelFuture future = cb.connect("198.51.100.254", 65535);
try {
assertThat(future.await(Integer.MAX_VALUE), is(true));
} finally {
Assert.assertNotNull(future.cause());
MatcherAssert.assertThat(future.cause(), instanceOf(ConnectTimeoutException.class));
}
}

@Override
protected List<TestsuitePermutation.BootstrapFactory<Bootstrap>> newFactories() {
return Arrays.asList(() -> new Bootstrap().group(new IOUringEventLoopGroup(1)).channel(IOUringSocketChannel.class));
}
}

0 comments on commit 018c251

Please sign in to comment.