Browse files

Test suite to examine destruction errors during backfill preemption

  • Loading branch information...
1 parent cf52f1e commit a1fac0b2935393931108a1484c47b1ef4d3766e2 @timf timf committed Jun 13, 2011
View
2 scripts/integration-suites.sh
@@ -1,7 +1,7 @@
#!/bin/bash
-ALL_TEST_SUITES="basic01 basic02 basic03 basic04 basic05 basic06 failure01 spot01 spot02 spot03 spot04 spot05"
+ALL_TEST_SUITES="basic01 basic02 basic03 basic04 basic05 basic06 failure01 spot01 spot02 spot03 spot04 spot05 spot06"
if [ "X" == "X$1" ]; then
View
6 ...ervice/java/source/src/org/globus/workspace/scheduler/defaults/DefaultSlotManagement.java
@@ -410,9 +410,14 @@ private Reservation _reserveCoscheduledSpace(NodeRequest[] requests,
final String[] nodes = new String[vmids.length];
int bailed = -1;
Throwable failure = null;
+ int maxAttempts = vmids.length + 2;
for (int i = 0; i < vmids.length; i++) {
+ if (maxAttempts == 0) {
+ throw new NotEnoughMemoryException("Could not reclaim enough memory");
+ }
+
try {
nodes[i] = ResourcepoolUtil.getResourcePoolEntry(memory,
assocs,
@@ -429,6 +434,7 @@ private Reservation _reserveCoscheduledSpace(NodeRequest[] requests,
throw e;
} catch (NotEnoughMemoryException e) {
if(!preemptable){
+ maxAttempts -= 1;
try {
//If there isn't available memory
//for a non-preemptable reservation
View
4 service/service/java/source/src/org/globus/workspace/service/WorkspaceHome.java
@@ -97,11 +97,11 @@ public void destroy(String id)
* including if the workspace was not found, etc. This does not cut out
* early if there is any kind of problem.
*
- * Allow parameter to set to block until work is complete (up to thirty seconds).
+ * Allow parameter to set to block until work is complete (up to twenty seconds).
*
* @param workspaces list of workspace IDs
* @param sourceStr string for log msgs
- * @param block set true if you want to block until complete (up to thirty seconds)
+ * @param block set true if you want to block until complete (up to twenty seconds)
* @return string report on what happened
*/
public String destroyMultiple(int[] workspaces, String sourceStr, boolean block);
View
4 service/service/java/source/src/org/globus/workspace/service/impls/WorkspaceHomeImpl.java
@@ -465,12 +465,12 @@ public String destroyMultiple(int[] workspaces, String sourceStr, boolean block)
final StringBuilder buf = new StringBuilder(tasks.length * 256);
- // Log any unexpected errors. Wait thirty seconds (normal destroy time
+ // Log any unexpected errors. Wait twenty seconds (normal destroy time
// should be a matter of seconds even if there is high congestion).
// todo: make timeout configurable
for (int i = 0; i < tasks.length; i++) {
try {
- final String msg = (String) tasks[i].get(30L, TimeUnit.SECONDS);
+ final String msg = (String) tasks[i].get(20L, TimeUnit.SECONDS);
if (msg != null) {
buf.append(msg);
}
View
1 service/service/java/tests/suites/build.properties
@@ -40,3 +40,4 @@ st.spot02=NoResourcesSISuite
st.spot03=SimpleSISuite
st.spot04=SimplestSISuite
st.spot05=SingleResourcePoolSISuite
+st.spot06=BackfillTerminationSuite
View
8 service/service/java/tests/suites/build.xml
@@ -251,8 +251,14 @@
<param name="runone.suite.report.subdir" value="basic-${st.spot05}"/>
</antcall>
</target>
+ <target name="spot06">
+ <antcall target="runone">
+ <param name="runone.suite.file" value="${st.spot.dir}/${st.spot06}.xml"/>
+ <param name="runone.suite.report.subdir" value="basic-${st.spot06}"/>
+ </antcall>
+ </target>
- <target name="allspot" depends="spot01,spot02,spot03,spot04,spot05" />
+ <target name="allspot" depends="spot01,spot02,spot03,spot04,spot05,spot06" />
<!-- *******************************************************************
View
6 ...ice/service/java/tests/suites/common/src/org/globus/workspace/testing/NimbusTestBase.java
@@ -29,6 +29,8 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
import com.google.gson.Gson;
import org.apache.commons.dbcp.BasicDataSource;
@@ -40,7 +42,6 @@
import org.globus.workspace.WorkspaceException;
import org.globus.workspace.WorkspaceUtil;
import org.globus.workspace.remoting.admin.VmmNode;
-import org.globus.workspace.service.impls.WorkspaceHomeImpl;
import org.globus.workspace.testing.utils.ReprPopulator;
import org.nimbustools.api.brain.ModuleLocator;
import org.nimbustools.api.brain.NimbusHomePathResolver;
@@ -87,6 +88,8 @@
// 'logger' should be used only after suite setup, this class prevents NPEs beforehand
protected Log logger = new FakeLog();
+ protected final ExecutorService suiteExecutor = Executors.newCachedThreadPool();
+
// -----------------------------------------------------------------------------------------
// ABSTRACT METHODS
@@ -223,6 +226,7 @@ protected void suiteTeardown() throws Exception {
logger.debug(LOG_SEP + "\n*** TESTS DONE (beginning teardown): " +
this.getClass().getSimpleName() + LOG_SEP);
+ this.suiteExecutor.shutdownNow();
final String nh = System.getProperty(NimbusHomePathResolver.NIMBUS_HOME_ENV_NAME);
if (nh == null) {
View
9 service/service/java/tests/suites/spotinstances/BackfillTerminationSuite.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="BackfillTerminationSuite" parallel="none">
+ <test verbose="1" name="nimbus" annotations="JDK">
+ <classes>
+ <class name="org.globus.workspace.testing.suites.spotinstances.BackfillTerminationSuite"/>
+ </classes>
+ </test>
+</suite>
View
12 ...java/tests/suites/spotinstances/home/services/etc/nimbus/workspace-service/other/main.xml
@@ -502,17 +502,9 @@
<constructor-arg ref="nimbus-rm.loglevels" />
- <!-- Current choices: xenlocal, xenssh -->
- <property name="commandSet" value="xenssh" />
+ <!-- Mock command set for failure scenarios -->
+ <property name="commandSet" value="failure_commands" />
- <!--
- Not exposing the distinction between xenlocal and xenssh anymore,
- the user should just set up localhost ssh logins if there is one
- VMM node and it happens to be colocated. workspace-control needs
- to send notifications back over sshd anyhow, so this login has to
- work regardless until reliable messaging system is introduced.
- -->
-
</bean>
<bean id="nimbus-rm.service.async.ResourceMessage"
View
200 ...ances/src/org/globus/workspace/testing/suites/spotinstances/BackfillTerminationSuite.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright 1999-2011 University of Chicago
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy
+ * of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.globus.workspace.testing.suites.spotinstances;
+
+import com.google.gson.Gson;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.globus.workspace.remoting.admin.VmmNode;
+import org.globus.workspace.testing.NimbusTestBase;
+import org.globus.workspace.testing.NimbusTestContextLoader;
+import org.globus.workspace.xen.xenssh.MockShutdownTrash;
+import org.nimbustools.api.repr.AsyncCreateRequest;
+import org.nimbustools.api.repr.Caller;
+import org.nimbustools.api.repr.CreateRequest;
+import org.nimbustools.api.repr.CreateResult;
+import org.nimbustools.api.repr.RequestInfo;
+import org.nimbustools.api.repr.vm.VM;
+import org.nimbustools.api.services.admin.RemoteNodeManagement;
+import org.nimbustools.api.services.rm.Manager;
+import org.nimbustools.api.services.rm.NotEnoughMemoryException;
+import org.springframework.test.annotation.DirtiesContext;
+import org.springframework.test.context.ContextConfiguration;
+import org.testng.annotations.AfterSuite;
+import org.testng.annotations.Listeners;
+import org.testng.annotations.Test;
+
+import java.rmi.RemoteException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertTrue;
+
+@Listeners({ org.globus.workspace.testing.suites.spotinstances.TestListener.class })
+@ContextConfiguration(locations={"file:./service/service/java/tests/suites/spotinstances/" +
+ "home/services/etc/nimbus/workspace-service/other/main.xml"},
+ loader= NimbusTestContextLoader.class)
+public class BackfillTerminationSuite extends NimbusTestBase {
+
+ // -----------------------------------------------------------------------------------------
+ // STATIC VARIABLES
+ // -----------------------------------------------------------------------------------------
+
+ private static final Log logger =
+ LogFactory.getLog(BackfillTerminationSuite.class.getName());
+
+
+ // -----------------------------------------------------------------------------------------
+ // extends NimbusTestBase
+ // -----------------------------------------------------------------------------------------
+
+ /**
+ * This is how coordinate your Java test suite code with the conf files to use.
+ * @return absolute path to the value that should be set for $NIMBUS_HOME
+ * @throws Exception if $NIMBUS_HOME cannot be determined
+ */
+ protected String getNimbusHome() throws Exception {
+ return this.determineSuitesPath() + "/spotinstances/home";
+ }
+
+ @AfterSuite(alwaysRun=true)
+ public void suiteTeardown() throws Exception {
+ super.suiteTeardown();
+ }
+
+ protected void setUpVmms() throws RemoteException {
+ logger.info("Before test method: overriden setUpVmms()");
+ Gson gson = new Gson();
+ List<VmmNode> nodes = new ArrayList<VmmNode>(1);
+ nodes.add(new VmmNode("fakehost1", true, "default", 512, "*", true));
+ final String nodesJson = gson.toJson(nodes);
+ RemoteNodeManagement rnm = this.locator.getNodeManagement();
+ rnm.addNodes(nodesJson);
+ }
+
+ // -----------------------------------------------------------------------------------------
+ // TESTS
+ // -----------------------------------------------------------------------------------------
+
+ @Test
+ @DirtiesContext
+ public void backfillWontDie() throws Exception {
+ Manager rm = this.locator.getManager();
+ Caller superuser = this.populator().getSuperuserCaller();
+
+ logger.info(rm.getVMMReport());
+
+ logger.debug("Submitting backfill request");
+
+ AsyncCreateRequest backfill1 = this.populator().getBackfillRequest("backfill1", 1);
+ RequestInfo backfill1Result = rm.addBackfillRequest(backfill1, superuser);
+
+ logger.debug("Waiting 2 seconds for resources to be allocated.");
+ Thread.sleep(2000);
+
+ // Check backfill request state
+ RequestInfo[] backfillRequestsByCaller = rm.getBackfillRequestsByCaller(superuser);
+ assertEquals(1, backfillRequestsByCaller.length);
+
+ logger.info(rm.getVMMReport());
+
+ // Set the shutdown task to not work
+ MockShutdownTrash.resetFailCount();
+ MockShutdownTrash.setFail(true);
+ logger.warn("Set to fail.");
+
+ // One regular VM that needs all the 512 RAM will preempt
+ Caller caller = this.populator().getCaller();
+ CreateRequest req = this.populator().getCreateRequest("regular", 1200, 512 , 1);
+
+ final long startMs = System.currentTimeMillis();
+
+ boolean notEnoughMemory = false;
+ try {
+ rm.create(req, caller);
+ } catch (NotEnoughMemoryException e) {
+ notEnoughMemory = true;
+ }
+
+ final long endMs = System.currentTimeMillis();
+ final long totalSeconds = (endMs - startMs) / 1000;
+ logger.info("Total seconds: " + totalSeconds);
+
+ // That should have waited up to ~20 seconds before giving up on the incoming request
+ assertTrue(totalSeconds > 18);
+
+ // backfill wouldn't die, and the service correctly denies the incoming request
+ assertTrue(notEnoughMemory);
+ }
+
+
+ @Test
+ @DirtiesContext
+ public void backfillEventuallyDies() throws Exception {
+ Manager rm = this.locator.getManager();
+ Caller superuser = this.populator().getSuperuserCaller();
+
+ logger.info(rm.getVMMReport());
+
+ logger.debug("Submitting backfill request");
+
+ AsyncCreateRequest backfill1 = this.populator().getBackfillRequest("backfill1", 1);
+ RequestInfo backfill1Result = rm.addBackfillRequest(backfill1, superuser);
+
+ logger.debug("Waiting 2 seconds for resources to be allocated.");
+ Thread.sleep(2000);
+
+ // Check backfill request state
+ RequestInfo[] backfillRequestsByCaller = rm.getBackfillRequestsByCaller(superuser);
+ assertEquals(1, backfillRequestsByCaller.length);
+
+ logger.info(rm.getVMMReport());
+
+ // Set the shutdown task to not work
+ MockShutdownTrash.resetFailCount();
+ MockShutdownTrash.setFail(true);
+ logger.warn("Set to fail.");
+
+ // One regular VM that needs all the 512 RAM will preempt
+ Caller caller = this.populator().getCaller();
+ CreateRequest req = this.populator().getCreateRequest("regular", 1200, 512 , 1);
+
+ // In 10 seconds, trigger the shutdown task to start succeeding
+ this.suiteExecutor.submit(new DestroyEnableFutureTask(10));
+
+ final long startMs = System.currentTimeMillis();
+ final CreateResult result = rm.create(req, caller);
+
+ final long endMs = System.currentTimeMillis();
+ final long totalSeconds = (endMs - startMs) / 1000;
+ logger.info("Total seconds: " + totalSeconds);
+
+ final VM[] vms = result.getVMs();
+ assertEquals(1, vms.length);
+ assertNotNull(vms[0]);
+ logger.info("Leased vm '" + vms[0].getID() + '\'');
+
+ assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));
+
+ // That should have only waited around ~10 seconds (+/- 2 seconds for sweeper)
+ assertTrue(totalSeconds > 9);
+ assertTrue(totalSeconds < 14);
+ }
+
+}
View
23 ...tances/src/org/globus/workspace/testing/suites/spotinstances/DestroyEnableFutureTask.java
@@ -0,0 +1,23 @@
+package org.globus.workspace.testing.suites.spotinstances;
+
+import org.globus.workspace.xen.xenssh.MockShutdownTrash;
+
+import java.util.concurrent.Callable;
+
+public class DestroyEnableFutureTask implements Callable {
+
+ private final int secondsToWait;
+
+ public DestroyEnableFutureTask(int secondsToWait) {
+ if (secondsToWait < 1) {
+ throw new IllegalArgumentException("secondsToWait is too low");
+ }
+ this.secondsToWait = secondsToWait;
+ }
+
+ public Object call() throws Exception {
+ Thread.sleep(secondsToWait * 1000);
+ MockShutdownTrash.setFail(false);
+ return null;
+ }
+}
View
1 ...tes/spotinstances/src/org/globus/workspace/testing/suites/spotinstances/Issue26Suite.java
@@ -28,7 +28,6 @@
import org.nimbustools.api.services.admin.RemoteNodeManagement;
import org.nimbustools.api.services.rm.ImpossibleAmountOfMemoryException;
import org.nimbustools.api.services.rm.Manager;
-import org.nimbustools.api.services.rm.NotEnoughMemoryException;
import org.springframework.test.annotation.DirtiesContext;
import org.springframework.test.context.ContextConfiguration;
import org.testng.annotations.AfterSuite;
View
2 .../spotinstances/src/org/globus/workspace/testing/suites/spotinstances/SimplestSISuite.java
@@ -75,7 +75,7 @@ public void suiteTeardown() throws Exception {
protected void setUpVmms() throws RemoteException {
logger.info("Before test method: overriden setUpVmms(), one unique instance");
Gson gson = new Gson();
- List<VmmNode> nodes = new ArrayList<VmmNode>(4);
+ List<VmmNode> nodes = new ArrayList<VmmNode>(1);
nodes.add(new VmmNode("fakehost1", true, "default", 512, "*", true));
final String nodesJson = gson.toJson(nodes);
RemoteNodeManagement rnm = this.locator.getNodeManagement();

0 comments on commit a1fac0b

Please sign in to comment.