Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix connection retries #2247

Merged
merged 23 commits into from
Nov 7, 2023
Merged
3 changes: 1 addition & 2 deletions src/main/java/com/microsoft/sqlserver/jdbc/IOBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -725,8 +725,7 @@ final InetSocketAddress open(String host, int port, int timeoutMillis, boolean u
setSocketOptions(tcpSocket, this);

// set SO_TIMEOUT
int socketTimeout = con.getSocketTimeoutMilliseconds();
tcpSocket.setSoTimeout(socketTimeout);
tcpSocket.setSoTimeout(con.getSocketTimeoutMilliseconds());

inputStream = tcpInputStream = new ProxyInputStream(tcpSocket.getInputStream());
outputStream = tcpOutputStream = tcpSocket.getOutputStream();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import java.lang.Thread.State;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;

Expand Down Expand Up @@ -164,7 +165,10 @@ int getLoginTimeoutSeconds() {

void reconnect(TDSCommand cmd) throws InterruptedException {
reconnectErrorReceived = null;
reconnectThread = new ReconnectThread(this.connection, cmd);
connectRetryCount = this.connection.getRetryCount();
if (connectRetryCount > 0) {
reconnectThread = new ReconnectThread(this.connection, cmd);
}
reconnectThread.start();
reconnectThread.join();
reconnectErrorReceived = reconnectThread.getException();
Expand Down Expand Up @@ -450,8 +454,9 @@ public void run() {
}

boolean keepRetrying = true;
long connectRetryInterval = TimeUnit.SECONDS.toMillis(con.getRetryInterval());

while ((connectRetryCount > 0) && (!stopRequested) && keepRetrying) {
while ((connectRetryCount >= 0) && (!stopRequested) && keepRetrying) {
if (loggerExternal.isLoggable(Level.FINER)) {
loggerExternal.finer("Running reconnect for command: " + command.toString() + " ; ConnectRetryCount = "
+ connectRetryCount);
Expand All @@ -468,7 +473,7 @@ public void run() {
} else {
try {
if (connectRetryCount > 1) {
Thread.sleep((long) (con.getRetryInterval()) * 1000);
Thread.sleep(connectRetryInterval);
}
} catch (InterruptedException ie) {
// re-interrupt thread
Expand All @@ -492,7 +497,7 @@ public void run() {
}
}

if ((connectRetryCount == 0) && (keepRetrying)) {
if ((connectRetryCount <= 0) && (keepRetrying)) {
eReceived = new SQLServerException(SQLServerException.getErrString("R_crClientAllRecoveryAttemptsFailed"),
eReceived);
}
Expand Down
289 changes: 125 additions & 164 deletions src/main/java/com/microsoft/sqlserver/jdbc/SQLServerConnection.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ static String generateStateCode(SQLServerConnection con, int errNum, Integer dat
* original error string.
*/
static String checkAndAppendClientConnId(String errMsg, SQLServerConnection conn) {
if (null != conn && conn.attachConnId()) {
if (null != conn && conn.isConnected()) {
UUID clientConnId = conn.getClientConIdInternal();
assert null != clientConnId;
StringBuilder sb = new StringBuilder(errMsg);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ public void testConnectCountInLoginAndCorrectRetryCount() {

int connectRetryCount = 3;
int connectRetryInterval = 1;
int longLoginTimeout = loginTimeOutInSeconds * 4; // 120 seconds
int longLoginTimeout = loginTimeOutInSeconds * 4;

try {
SQLServerDataSource ds = new SQLServerDataSource();
Expand All @@ -474,17 +474,53 @@ public void testConnectCountInLoginAndCorrectRetryCount() {
assertTrue(con == null, TestResource.getResource("R_shouldNotConnect"));
}
} catch (Exception e) {
assertTrue(e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")), e.getMessage());
long totalTime = System.currentTimeMillis() - timerStart;

assertTrue(e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")), e.getMessage());
int expectedMinimumTimeInMillis = (connectRetryCount * connectRetryInterval) * 1000; // 3 seconds

// Minimum time is 0 seconds per attempt and connectRetryInterval * connectRetryCount seconds of interval.
// Maximum is unknown, but is needs to be less than longLoginTimeout or else this is an issue.
assertTrue(totalTime > expectedMinimumTimeInMillis, TestResource.getResource("R_executionNotLong"));
assertTrue(totalTime < (longLoginTimeout * 1000L), TestResource.getResource("R_executionTooLong"));
assertTrue(totalTime > expectedMinimumTimeInMillis, TestResource.getResource("R_executionNotLong")
+ " totalTime: " + totalTime + " expectedTime: " + expectedMinimumTimeInMillis);
assertTrue( totalTime < (longLoginTimeout * 1000L), TestResource.getResource("R_executionTooLong")
+ "totalTime: " + totalTime + " expectedTime: " + expectedMinimumTimeInMillis);
}
}

/**
* Tests whether connectRetryCount and connectRetryInterval are properly respected in the login loop. As well, tests
* that connection is retried the proper number of times. This is for cases with zero retries.
*/
@Test
public void testConnectCountInLoginAndCorrectRetryCountWithZeroRetry() {
long timerStart = 0;

int connectRetryCount = 0;
int connectRetryInterval = 60;
int longLoginTimeout = loginTimeOutInSeconds * 3; // 90 seconds

try {
SQLServerDataSource ds = new SQLServerDataSource();
ds.setURL(connectionString);
ds.setLoginTimeout(longLoginTimeout);
ds.setConnectRetryCount(connectRetryCount);
ds.setConnectRetryInterval(connectRetryInterval);
ds.setDatabaseName(RandomUtil.getIdentifier("DataBase"));
timerStart = System.currentTimeMillis();

try (Connection con = ds.getConnection()) {
assertTrue(con == null, TestResource.getResource("R_shouldNotConnect"));
}
} catch (Exception e) {
assertTrue(e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")), e.getMessage());
long totalTime = System.currentTimeMillis() - timerStart;

// Maximum is unknown, but is needs to be less than longLoginTimeout or else this is an issue.
assertTrue(totalTime < (longLoginTimeout * 1000L), TestResource.getResource("R_executionTooLong"));
}
}

@Test
@Tag(Constants.xAzureSQLDW)
@Tag(Constants.xAzureSQLDB)
Expand Down Expand Up @@ -974,10 +1010,9 @@ public void testThreadInterruptedStatus() throws InterruptedException {
Runnable runnable = new Runnable() {
public void run() {
SQLServerDataSource ds = new SQLServerDataSource();

ds.setURL(connectionString);
ds.setServerName("invalidServerName" + UUID.randomUUID());
ds.setLoginTimeout(5);
ds.setDatabaseName("invalidDatabase" + UUID.randomUUID());
ds.setLoginTimeout(30);
try (Connection con = ds.getConnection()) {} catch (SQLException e) {}
}
};
Expand All @@ -992,7 +1027,8 @@ public void run() {
Thread.sleep(8000);
executor.shutdownNow();

assertTrue(status && future.isCancelled(), TestResource.getResource("R_threadInterruptNotSet"));
assertTrue(status && future.isCancelled(), TestResource.getResource("R_threadInterruptNotSet") + " status: "
+ status + " isCancelled: " + future.isCancelled());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class TimeoutTest extends AbstractTest {
static String randomServer = RandomUtil.getIdentifier("Server");
static String waitForDelaySPName = RandomUtil.getIdentifier("waitForDelaySP");
static final int waitForDelaySeconds = 10;
static final int defaultTimeout = 15; // loginTimeout default value
static final int defaultTimeout = 60; // loginTimeout default value

@BeforeAll
public static void setupTests() throws Exception {
Expand All @@ -55,13 +55,13 @@ public void testDefaultLoginTimeout() {
try (Connection con = PrepUtil.getConnection("jdbc:sqlserver://" + randomServer + "connectRetryCount=0")) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, defaultTimeout);
Expand All @@ -77,13 +77,13 @@ public void testURLLoginTimeout() {
try (Connection con = PrepUtil.getConnection("jdbc:sqlserver://" + randomServer + ";logintimeout=" + timeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, timeout);
Expand All @@ -100,13 +100,13 @@ public void testDMLoginTimeoutApplied() {
try (Connection con = PrepUtil.getConnection("jdbc:sqlserver://" + randomServer)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, timeout);
Expand All @@ -124,6 +124,7 @@ public void testDMLoginTimeoutNotApplied() {
.getConnection("jdbc:sqlserver://" + randomServer + ";loginTimeout=" + timeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue(
(e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
Expand All @@ -132,7 +133,6 @@ public void testDMLoginTimeoutNotApplied() {
.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}
verifyTimeout(timerEnd - timerStart, timeout);
} finally {
Expand All @@ -152,13 +152,13 @@ public void testConnectRetryBadServer() {
.getConnection("jdbc:sqlserver://" + randomServer + ";loginTimeout=" + loginTimeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, loginTimeout);
Expand All @@ -179,13 +179,13 @@ public void testConnectRetryServerError() {
+ ";connectRetryInterval=" + connectRetryInterval)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

// connect + all retries should always be <= loginTimeout
Expand All @@ -211,13 +211,13 @@ public void testConnectRetryServerErrorDS() {
try (Connection con = PrepUtil.getConnection(connectStr)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

// connect + all retries should always be <= loginTimeout
Expand All @@ -228,8 +228,8 @@ public void testConnectRetryServerErrorDS() {
@Test
public void testConnectRetryTimeout() {
long timerEnd = 0;
long timerStart = System.currentTimeMillis();
int loginTimeout = 2;
long timerStart = System.currentTimeMillis();

// non existent server with very short loginTimeout so there is no time to do all retries
try (Connection con = PrepUtil.getConnection(
Expand All @@ -238,13 +238,13 @@ public void testConnectRetryTimeout() {
+ (new Random().nextInt(defaultTimeout - 1) + 1) + ";loginTimeout=" + loginTimeout)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, loginTimeout);
Expand All @@ -260,13 +260,13 @@ public void testFailoverInstanceResolution() throws SQLException {
+ ";databaseName=FailoverDB_abc;failoverPartner=" + randomServer + "\\foo;user=sa;password=pwd;")) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
timerEnd = System.currentTimeMillis();
assertTrue((e.getMessage().contains(TestResource.getResource("R_tcpipConnectionToHost")))
|| ((isSqlAzure() || isSqlAzureDW())
? e.getMessage().contains(
TestResource.getResource("R_connectTimedOut"))
: false),
e.getMessage());
timerEnd = System.currentTimeMillis();
}

verifyTimeout(timerEnd - timerStart, defaultTimeout * 2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void testBasicConnectionAAD() throws Exception {
basicReconnect("jdbc:sqlserver://" + azureServer + ";database=" + azureDatabase + ";user="
+ azureUserName + ";password=" + azurePassword
+ ";loginTimeout=90;Authentication=ActiveDirectoryPassword");
retry = THROTTLE_RETRY_COUNT + 1;
retry = THROTTLE_RETRY_COUNT + 1;
} catch (Exception e) {
if (e.getMessage().matches(TestUtils.formatErrorMsg("R_crClientAllRecoveryAttemptsFailed"))) {
System.out.println(e.getMessage() + ". Recovery failed, retry #" + retry + " in "
Expand Down Expand Up @@ -265,7 +265,8 @@ public void testPooledConnectionDB() throws SQLException {
@Test
public void testPooledConnectionLang() throws SQLException {
SQLServerConnectionPoolDataSource mds = new SQLServerConnectionPoolDataSource();
mds.setURL(connectionString);
mds.setURL(connectionString + ";connectRetryCount=1");

PooledConnection pooledConnection = mds.getPooledConnection();
String lang0 = null, lang1 = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ private void timeoutVariations(Map<String, String> props, long expectedDuration,
fail("Successfully executed query on a blocked connection.");
} catch (SQLException e) {
double elapsedTime = System.currentTimeMillis() - startTime;

// Timeout should occur after query timeout and not login timeout
assertTrue("Query did not timeout in " + expectedDuration + "ms, elapsed time(ms): " + elapsedTime,
elapsedTime < expectedDuration);
assertTrue("Exception: " + e.getMessage() + ": Query did not timeout in " + expectedDuration
+ "ms, elapsed time(ms): " + elapsedTime, elapsedTime < expectedDuration);
if (expectedErrMsg.isPresent()) {
assertTrue(TestResource.getResource("R_unexpectedErrorMessage") + e.getMessage(),
e.getMessage().matches(TestUtils.formatErrorMsg(expectedErrMsg.get())));
Expand Down Expand Up @@ -82,6 +83,9 @@ public void testDefaultTimeout() throws SQLException {
public void testDefaultRetry() throws SQLException {
Map<String, String> m = new HashMap<>();
m.put("loginTimeout", "5");

// ensure count is not set to something else as this test assumes exactly just 1 retry
m.put("connectRetryCount", "1");
timeoutVariations(m, 6000, Optional.empty());
}

Expand Down
Loading