Skip to content
This repository has been archived by the owner on Sep 23, 2020. It is now read-only.

Commit

Permalink
Merge branch 'master' of github.com:nimbusproject/nimbus
Browse files Browse the repository at this point in the history
  • Loading branch information
buzztroll committed Jun 14, 2011
2 parents 3586fd0 + 275e114 commit 4d0b6d7
Show file tree
Hide file tree
Showing 8 changed files with 372 additions and 7 deletions.
2 changes: 1 addition & 1 deletion scripts/integration-suites.sh
@@ -1,7 +1,7 @@
#!/bin/bash


ALL_TEST_SUITES="basic01 basic02 basic03 basic04 basic05 basic06 failure01 spot01 spot02 spot03 spot04 spot05 spot06"
ALL_TEST_SUITES="basic01 basic02 basic03 basic04 basic05 basic06 failure01 failure02 spot01 spot02 spot03 spot04 spot05 spot06"


if [ "X" == "X$1" ]; then
Expand Down
Expand Up @@ -27,9 +27,11 @@
import org.globus.workspace.Lager;

import java.util.Calendar;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/**
* Used to sweep for terminated instances.
Expand All @@ -53,6 +55,9 @@ public class ResourceSweeper implements Runnable {
protected final ExecutorService executor;
protected final WorkspaceHome home;
protected final Lager lager;

// instance ID : attempt count
protected final Map<Integer,Integer> zombieBackoffs = new Hashtable<Integer,Integer>();


// -------------------------------------------------------------------------
Expand Down Expand Up @@ -124,7 +129,7 @@ protected void findAndDestroy() {
this.executor.submit(task);
}

// Log any unexpected errors. Wait two minutes (normal destroy time
// Log any unexpected errors. Wait 30s (normal destroy time
// should be a matter of seconds even if there is high congestion).

// todo: make timeout configurable when this class comes via IoC
Expand All @@ -133,7 +138,7 @@ protected void findAndDestroy() {
while (iter2.hasNext()) {
final FutureTask task = (FutureTask) iter2.next();
try {
final String msg = (String) task.get(120L, TimeUnit.SECONDS);
final String msg = (String) task.get(30L, TimeUnit.SECONDS);
if (msg != null) {
logger.debug(msg);
}
Expand All @@ -149,7 +154,7 @@ protected void findAndDestroy() {
}
}

protected List getDestroyTasks(Sweepable[] toSweep) {
protected synchronized List getDestroyTasks(Sweepable[] toSweep) {

final Calendar currentTime = Calendar.getInstance();
final LinkedList killList = new LinkedList();
Expand All @@ -166,9 +171,56 @@ protected List getDestroyTasks(Sweepable[] toSweep) {
if (expired) {
logger.debug("Sweep found that " + Lager.id(sw.getID()) + " is expired.");
}

if (sw.isZombie()) {

// Only attempt on the following attempt # of sweeper runs:
// 1st, 2nd, 3rd, 6th, 10th, 15th, 25th, and then on modulo 20's

final Integer exists = this.zombieBackoffs.get(sw.getID());

final Integer attemptCount;
if (exists == null) {
attemptCount = 1;
} else {
attemptCount = exists + 1;
}
this.zombieBackoffs.put(sw.getID(), attemptCount);

int actualRetryNumber = attemptCount;
if (attemptCount < 40) {
switch (attemptCount) {
case 1:
break;
case 2:
break;
case 3:
break;
case 6:
actualRetryNumber = 4;
break;
case 10:
actualRetryNumber = 5;
break;
case 15:
actualRetryNumber = 6;
break;
case 25:
actualRetryNumber = 7;
break;
default:
continue;
}
} else {
if (attemptCount % 20 != 0) {
continue;
} else {
actualRetryNumber = 6 + attemptCount / 20;
}
}

logger.warn(Lager.ev(sw.getID()) + "Node that could not be destroyed " +
"previously, attempting again.");
"previously, attempting again. Retry #" + actualRetryNumber);
}

if (expired || sw.isZombie()) {
Expand Down
Expand Up @@ -35,6 +35,7 @@ public class MockShutdownTrash extends XenTask {

private static boolean fail = false;
private static int failCount = 0;
private static long msAtLastAttempt = 0;

// Point of control from tests, ANY created task object will respect this static field
// when the init() method is called -- so creating a number of instances simultaneously
Expand All @@ -45,9 +46,13 @@ public static void setFail(boolean doFail) {
public static int getFailCount() {
return failCount;
}
public static long getMsAtLastAttempt() {
return msAtLastAttempt;
}

public static void resetFailCount() {
failCount = 0;
msAtLastAttempt = 0;
}

protected void init() throws WorkspaceException {
Expand All @@ -69,7 +74,7 @@ protected void init() throws WorkspaceException {
logger.warn(this.name + " forced to fail.");
failCount += 1;
}

msAtLastAttempt = System.currentTimeMillis();
this.cmd = (String[]) ssh.toArray(new String[ssh.size()]);
}
}
1 change: 1 addition & 0 deletions service/service/java/tests/suites/build.properties
Expand Up @@ -32,6 +32,7 @@ st.basic06=ParallelIdempotentCreationSuite
# FAILURE SUITES
st.failure.dir=${nimbus.suitesdir}/failure
st.failure01=TerminateSuite
st.failure02=BackoffSuite

# SPOT INSTANCE SUITES
st.spot.dir=${nimbus.suitesdir}/spotinstances
Expand Down
8 changes: 7 additions & 1 deletion service/service/java/tests/suites/build.xml
Expand Up @@ -215,8 +215,14 @@
<param name="runone.suite.report.subdir" value="basic-${st.failure01}"/>
</antcall>
</target>
<target name="failure02">
<antcall target="runone">
<param name="runone.suite.file" value="${st.failure.dir}/${st.failure02}.xml"/>
<param name="runone.suite.report.subdir" value="basic-${st.failure02}"/>
</antcall>
</target>

<target name="allfailure" depends="failure01" />
<target name="allfailure" depends="failure01,failure02" />


<!-- SPOT INSTANCE SUITES -->
Expand Down
9 changes: 9 additions & 0 deletions service/service/java/tests/suites/failure/BackoffSuite.xml
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
<suite name="BackoffSuite" parallel="none">
<test verbose="1" name="nimbus" annotations="JDK">
<classes>
<class name="org.globus.workspace.testing.suites.failure.BackoffSuite"/>
</classes>
</test>
</suite>
Expand Up @@ -339,6 +339,7 @@
<property name="sshAccount" value="$SSH{control.ssh.user}" />
<property name="sshIdentityFile" value="$SSH{use.identity}" />

<!-- This is assumed to be a certain value in the suites. Tests will fail if you change this -->
<property name="sweeperDelay" value="3000" /> <!-- ms -->
<property name="threadPoolInitialSize" value="5" />
<property name="threadPoolMaxSize" value="50" />
Expand Down

0 comments on commit 4d0b6d7

Please sign in to comment.