Skip to content

Commit

Permalink
Make tls_conn_timeout test more reliable in CI (apache#7018)
Browse files Browse the repository at this point in the history
  • Loading branch information
shinrich committed Jul 24, 2020
1 parent 9f15e77 commit 549c626
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 118 deletions.
37 changes: 0 additions & 37 deletions tests/gold_tests/timeout/case2.sh

This file was deleted.

49 changes: 5 additions & 44 deletions tests/gold_tests/timeout/ssl-delay-server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <openssl/err.h>
#include <sys/time.h>
#include <sys/select.h>
#include <errno.h>

char req_buf[10000];
char post_buf[1000];
Expand Down Expand Up @@ -81,7 +82,7 @@ run_session(void *arg)
SSL *ssl = SSL_new(svr_ctx);
if (ssl == NULL) {
fprintf(stderr, "Failed to create ssl\n");
exit(1);
return nullptr;
}

#if OPENSSL_VERSION_NUMBER >= 0x10100000
Expand All @@ -99,55 +100,19 @@ run_session(void *arg)
sleep(connect_delay);
}
int did_accept = 0;
struct timeval timeout;
fd_set read_fd, write_fd, except_fd;

int ret = SSL_accept(ssl);
while (ret < 0) {
timeout.tv_sec = 1;
timeout.tv_usec = 0;
FD_ZERO(&read_fd);
FD_ZERO(&write_fd);
FD_ZERO(&except_fd);
FD_SET(sfd, &read_fd);
FD_SET(sfd, &write_fd);
FD_SET(sfd, &except_fd);
int select_r;
if ((select_r = select(sfd + 1, &read_fd, &write_fd, &except_fd, &timeout)) < 0) {
perror("Select failed");
exit(1);
}
if (select_r > 0) {
if (FD_ISSET(sfd, &except_fd)) {
fprintf(stderr, "Socket failed\n");
break;
}
if (FD_ISSET(sfd, &read_fd)) {
ret = SSL_accept(ssl);
} else if (FD_ISSET(sfd, &write_fd)) {
ret = SSL_connect(ssl);
}
} else {
fprintf(stderr, "Select timeout %ld\n", time(NULL));
}
}

if (!did_accept && ret == 1) {
did_accept = 1;
fprintf(stderr, "Done accept\n");
} else {
fprintf(stderr, "Failed accept\n");
exit(1);
int ssl_err = SSL_get_error(ssl, ret);
fprintf(stderr, "Failed accept ssl_error=%d errno=%d\n", ssl_err, errno);
return nullptr;
}

if (did_accept) {
int opt = fcntl(sfd, F_GETFL);
opt = opt & ~O_NONBLOCK;
if (fcntl(sfd, F_SETFL, opt) < 0) {
perror("Failed to set block");
exit(1);
}

ret = SSL_read(ssl, input_buf, sizeof(input_buf));
if (ret < 0) {
// Failure
Expand Down Expand Up @@ -243,10 +208,6 @@ main(int argc, char *argv[])

fprintf(stderr, "Spawn off new sesson thread %d\n", sfd);

if (fcntl(sfd, F_SETFL, O_NONBLOCK) < 0) {
perror("Failed to set non-blocking");
}

// Spawn off new thread
pthread_t thread_id;
pthread_create(&thread_id, NULL, run_session, (void *)((intptr_t)sfd));
Expand Down
86 changes: 49 additions & 37 deletions tests/gold_tests/timeout/tls_conn_timeout.test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@

ts = Test.MakeATSProcess("ts")

server2 = Test.MakeOriginServer("server2")

Test.Setup.Copy('ssl-delay-server')

Test.ContinueOnFail = True
Expand All @@ -30,75 +28,89 @@
Test.GetTcpPort("get_block_connect_port")
Test.GetTcpPort("get_block_ttfb_port")

delay_post_connect = Test.Processes.Process("delay post connect", './ssl-delay-server {0} 3 0 server.pem'.format(Test.Variables.block_connect_port))
delay_post_ttfb = Test.Processes.Process("delay post ttfb", './ssl-delay-server {0} 0 6 server.pem'.format(Test.Variables.block_ttfb_port))

delay_get_connect = Test.Processes.Process("delay get connect", './ssl-delay-server {0} 3 0 server.pem'.format(Test.Variables.get_block_connect_port))
delay_get_ttfb = Test.Processes.Process("delay get ttfb", './ssl-delay-server {0} 0 6 server.pem'.format(Test.Variables.get_block_ttfb_port))

ts.Disk.records_config.update({
'proxy.config.url_remap.remap_required': 1,
'proxy.config.http.connect_attempts_timeout': 1,
'proxy.config.http.post_connect_attempts_timeout': 1,
'proxy.config.http.connect_attempts_max_retries': 1,
'proxy.config.http.transaction_no_activity_timeout_out': 4,
'proxy.config.diags.debug.enabled': 0,
'proxy.config.diags.debug.tags': 'http|ssl',
'proxy.config.diags.debug.tags': 'http|ssl'
})

ts.Disk.remap_config.AddLine('map /connect_blocked https://127.0.0.1:{0}'.format(Test.Variables.block_connect_port))
ts.Disk.remap_config.AddLine('map /ttfb_blocked https://127.0.0.1:{0}'.format(Test.Variables.block_ttfb_port))
ts.Disk.remap_config.AddLine('map /get_connect_blocked https://127.0.0.1:{0}'.format(Test.Variables.get_block_connect_port))
ts.Disk.remap_config.AddLine('map /get_ttfb_blocked https://127.0.0.1:{0}'.format(Test.Variables.get_block_ttfb_port))

# Commenting out the per test case timeouts. In the CI, there is too big of a risk that we hit those timeouts. Had hoped to use
# The test case timeouts to differentiate between a good origin timeout and a too long origin timeout

# Run the connection and TTFB timeout tests first with POST.

# Request the port that should timeout in the handshake
# Should retry once
tr = Test.AddTestRun("tr-blocking")
tr.Setup.Copy("case2.sh")
tr.Processes.Default.StartBefore(Test.Processes.ts, ready=When.PortOpen(ts.Variables.port))
tr = Test.AddTestRun("tr-blocking-post")
tr.Setup.Copy("../chunked_encoding/ssl/server.pem")
tr.Processes.Default.Command = 'sh ./case2.sh {0} 3 0 {1} connect_blocked'.format(Test.Variables.block_connect_port, ts.Variables.port)
tr.Processes.Default.TimeOut = 6
tr.Processes.Default.StartBefore(Test.Processes.ts)
tr.Processes.Default.StartBefore(delay_post_connect, ready=When.PortOpen(Test.Variables.block_connect_port))
tr.Processes.Default.Command = 'curl -H"Connection:close" -d "bob" -i http://127.0.0.1:{0}/connect_blocked --tlsv1.2'.format(ts.Variables.port)
#tr.Processes.Default.TimeOut = 6
tr.Processes.Default.Streams.All = Testers.ContainsExpression("HTTP/1.1 502 internal error - server connection terminated", "Connect failed")
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = delay_post_connect
tr.StillRunningAfter = Test.Processes.ts

# Should not catch the connect timeout. Even though the first bytes are not sent until after the 2 second connect timeout
# Shoudl not retry the connection
tr = Test.AddTestRun("tr-delayed")
tr.Processes.Default.Command = 'sh ./case2.sh {0} 0 6 {1} ttfb_blocked'.format(Test.Variables.block_ttfb_port, ts.Variables.port)
tr.Processes.Default.TimeOut = 15
tr = Test.AddTestRun("tr-delayed-post")
tr.Processes.Default.StartBefore(delay_post_ttfb, ready=When.PortOpen(Test.Variables.block_ttfb_port))
tr.Processes.Default.Command = 'curl -H"Connection:close" -d "bob" -i http://127.0.0.1:{0}/ttfb_blocked --tlsv1.2'.format(ts.Variables.port)
#tr.Processes.Default.TimeOut = 15
tr.Processes.Default.Streams.All = Testers.ContainsExpression("504 Connection Timed Out", "Conntect timeout")
tr.Processes.Default.ReturnCode = 0

tr = Test.AddTestRun("tr-connect-retry")
tr.Processes.Default.Command = 'grep "Accept try" server{0}post.log | wc -l'.format(Test.Variables.block_connect_port)
tr.Processes.Default.Streams.All = Testers.ContainsExpression("^2$", "Two accept tries")
tr.Processes.Default.ReturnCode = 0

tr = Test.AddTestRun("tr-post-ttfb-retry")
tr.Processes.Default.Command = 'grep "Accept try" server{0}post.log | wc -l'.format(Test.Variables.block_ttfb_port)
tr.Processes.Default.Streams.All = Testers.ContainsExpression("^1$", "Only 1 accept try")
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = delay_post_ttfb

# Run the connection and TTFB timeout tests again with GET

# Request the port that should timeout in the handshake
# Should retry once
tr = Test.AddTestRun("tr-blocking")
tr.Processes.Default.Command = 'sh ./case2.sh {0} 3 0 {1} get_connect_blocked get'.format(Test.Variables.get_block_connect_port, ts.Variables.port)
tr.Processes.Default.TimeOut = 6
tr = Test.AddTestRun("tr-blocking-get")
tr.Processes.Default.StartBefore(delay_get_connect, ready=When.PortOpen(Test.Variables.get_block_connect_port))
tr.Processes.Default.Command = 'curl -H"Connection:close" -i http://127.0.0.1:{0}/get_connect_blocked --tlsv1.2'.format(ts.Variables.port)
#tr.Processes.Default.TimeOut = 6
tr.Processes.Default.Streams.All = Testers.ContainsExpression("HTTP/1.1 502 internal error - server connection terminated", "Connect failed")
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = delay_get_connect

# Should not catch the connect timeout. Even though the first bytes are not sent until after the 2 second connect timeout
# Since get is idempotent, It will try to connect again even though the GET request had been sent
tr = Test.AddTestRun("tr-delayed")
tr.Processes.Default.Command = 'sh ./case2.sh {0} 0 6 {1} get_ttfb_blocked get'.format(Test.Variables.get_block_ttfb_port, ts.Variables.port)
tr.Processes.Default.TimeOut = 15
tr = Test.AddTestRun("tr-delayed-get")
tr.Processes.Default.StartBefore(delay_get_ttfb, ready=When.PortOpen(Test.Variables.get_block_ttfb_port))
tr.Processes.Default.Command = 'curl -H"Connection:close" -i http://127.0.0.1:{0}/get_ttfb_blocked --tlsv1.2'.format(ts.Variables.port)
#tr.Processes.Default.TimeOut = 15
tr.Processes.Default.Streams.All = Testers.ContainsExpression("504 Connection Timed Out", "Conntect timeout")
tr.Processes.Default.ReturnCode = 0

tr = Test.AddTestRun("tr-connect-retry")
tr.Processes.Default.Command = 'grep "Accept try" server{0}get.log | wc -l'.format(Test.Variables.get_block_connect_port)
tr.Processes.Default.Streams.All = Testers.ContainsExpression("^2$", "Two accept tries")
tr.Processes.Default.ReturnCode = 0

tr = Test.AddTestRun("tr-get-ttfb-retry")
tr.Processes.Default.Command = 'grep "Accept try" server{0}get.log | wc -l'.format(Test.Variables.get_block_ttfb_port)
tr.Processes.Default.Streams.All = Testers.ContainsExpression("^2$", "Two accept tries")
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = delay_get_ttfb

delay_post_connect.Streams.All = Testers.ContainsExpression("Accept try", "Should appear at least two times (may be an extra one due to port ready test)")
delay_post_connect.Streams.All += Testers.ExcludesExpression("TTFB delay", "Should not reach the TTFB delay logic")
delay_post_ttfb.Streams.All = Testers.ContainsExpression("Accept try", "Should appear one time")
delay_post_ttfb.Streams.All += Testers.ContainsExpression("TTFB delay", "Should reach the TTFB delay logic")
# May fail due to port ready test
#delay_post_ttfb.Streams.All += Testers.ExcludesExpression("Failed accept", "Accept should have succeeded")


delay_get_connect.Streams.All = Testers.ContainsExpression("Accept try", "Should appear at least two times (may be an extra one due to port ready test)")
delay_get_connect.Streams.All += Testers.ExcludesExpression("TTFB delay", "Should not reach the TTFB delay logic")
delay_get_ttfb.Streams.All = Testers.ContainsExpression("Accept try", "Should appear at least two times (may be an extra one due to the port ready test)")
delay_get_ttfb.Streams.All += Testers.ContainsExpression("TTFB delay", "Should reach the TTFB delay logic")
# May fail due to port ready test
#delay_get_ttfb.Streams.All += Testers.ExcludesExpression("Failed accept", "Accept should have succeeded")

1 change: 1 addition & 0 deletions tests/gold_tests/tls/tls_tunnel.test.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@
tr.Processes.Default.Streams.All += Testers.ContainsExpression("Not Found on Accelerato", "Terminates on on Traffic Server")
tr.Processes.Default.Streams.All += Testers.ContainsExpression("ATS", "Terminate on Traffic Server")
tr.Processes.Default.Streams.All += Testers.ExcludesExpression("Could Not Connect", "Curl attempt should have succeeded")
tr.TimeOut = 30

# Should tunnel to server_bar
tr = Test.AddTestRun("bar.com Tunnel-test")
Expand Down

0 comments on commit 549c626

Please sign in to comment.