Permalink
Browse files

Add cleanup support to nimbus-admin

VMM failure can sometimes leave VMs in corrupted or cancelled state,
with the service node trying to destroy them repetitively even if they
already disappeared from the VMM.

The --cleanup option in nimbus-admin now allows an administrator to
clean up the service of VMs without performing any action on VMMs.
Allocated IPs are released, memory is given back to the VMM node, and VM
information is removed from the persistence database.

WARNING: administrators should make sure all VMM resources are actually
free'd (VM is terminated, images are removed, ebtables rules are
deleted...) before doing a cleanup, as the service will not perform
these operations.
  • Loading branch information...
1 parent 2652487 commit b317d4c81e19c4da73c1441dadf96c3140be2941 @priteau priteau committed Aug 21, 2012
Showing with 245 additions and 12 deletions.
  1. +7 −0 service-api/java/source/src/org/nimbustools/api/defaults/services/rm/DefaultManager.java
  2. +9 −0 service-api/java/source/src/org/nimbustools/api/services/admin/RemoteAdminToolsManagement.java
  3. +3 −0 service-api/java/source/src/org/nimbustools/api/services/rm/Manager.java
  4. +4 −0 ...ce/service/java/source/src/org/globus/workspace/creation/defaults/IdempotentInstanceResource.java
  5. +10 −0 service/service/java/source/src/org/globus/workspace/manager/DelegatingManager.java
  6. +6 −1 service/service/java/source/src/org/globus/workspace/remoting/admin/client/Opts.java
  7. +79 −4 service/service/java/source/src/org/globus/workspace/remoting/admin/client/RemoteAdminToolsMain.java
  8. +17 −1 service/service/java/source/src/org/globus/workspace/remoting/admin/client/adminHelp.txt
  9. +38 −6 ...ice/java/source/src/org/globus/workspace/remoting/admin/defaults/DefaultRemoteAdminToolsMgmt.java
  10. +9 −0 service/service/java/source/src/org/globus/workspace/scheduler/Scheduler.java
  11. +9 −0 service/service/java/source/src/org/globus/workspace/scheduler/defaults/DefaultSchedulerAdapter.java
  12. +2 −0 service/service/java/source/src/org/globus/workspace/service/InstanceResource.java
  13. +7 −0 service/service/java/source/src/org/globus/workspace/service/WorkspaceHome.java
  14. +4 −0 service/service/java/source/src/org/globus/workspace/service/impls/InstanceResourceImpl.java
  15. +41 −0 service/service/java/source/src/org/globus/workspace/service/impls/WorkspaceHomeImpl.java
@@ -169,6 +169,13 @@ public void shutdownSave(String id, int type, ShutdownTasks tasks,
"', caller '" + caller + "'");
}
+ public void cleanup(String id, int type, Caller caller)
+ throws DoesNotExistException, ManageException,
+ OperationDisabledException {
+ Logging.debug("Manager.cleanup() -- id '" + id + "', type '" + type +
+ "', caller '" + caller + "'");
+ }
+
public void pause(String id, int type, ShutdownTasks tasks,
Caller caller) throws DoesNotExistException,
ManageException,
@@ -35,6 +35,14 @@
public static final int SHUTDOWN_GID = 5;
public static final int SHUTDOWN_GNAME = 6;
+ public static final int CLEANUP_ALL = 0;
+ public static final int CLEANUP_ID = 1;
+ public static final int CLEANUP_HOST = 2;
+ public static final int CLEANUP_UNAME = 3;
+ public static final int CLEANUP_DN = 4;
+ public static final int CLEANUP_GID = 5;
+ public static final int CLEANUP_GNAME = 6;
+
public String getAllRunningVMs() throws RemoteException;
public String getVMsByDN(String userDN) throws RemoteException;
public String getVMsByUser(String user) throws RemoteException;
@@ -44,4 +52,5 @@
public String getVMsByState(String state) throws RemoteException;
public Hashtable<String, String[]> showVMsForAllHosts() throws RemoteException;
public String shutdown(int type, String typeID, String seconds) throws RemoteException;
+ public String cleanup(int type, String typeID) throws RemoteException;
}
@@ -120,6 +120,9 @@ public void shutdownSave(String id, int type,
ShutdownTasks tasks, Caller caller)
throws DoesNotExistException, ManageException, OperationDisabledException;
+ public void cleanup(String id, int type, Caller caller)
+ throws DoesNotExistException, ManageException, OperationDisabledException;
+
public void pause(String id, int type,
ShutdownTasks tasks, Caller caller)
throws DoesNotExistException, ManageException, OperationDisabledException;
@@ -225,6 +225,10 @@ public boolean remove() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
+ public void cleanup() {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
+
public void setState(int state, Throwable throwable) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@@ -324,6 +324,16 @@ public void shutdownSave(String id, int type,
}
}
+ public void cleanup(String id, int type, Caller caller)
+ throws DoesNotExistException,
+ ManageException,
+ OperationDisabledException {
+
+ this.opIntake("CLEANUP", id, type, caller);
+
+ this.home.cleanup(id);
+ }
+
public void pause(String id, int type,
ShutdownTasks tasks, Caller caller)
throws DoesNotExistException,
@@ -152,6 +152,11 @@ public Options getOptions() {
// NIMBUS-ADMIN
//*************************************************************************
+ public static final String CLEANUP_VMS = "x";
+ public static final String CLEANUP_VMS_LONG = "cleanup";
+ public final Option CLEANUP_VMS_OPT =
+ OptionBuilder.withLongOpt(CLEANUP_VMS_LONG).hasOptionalArg().create(CLEANUP_VMS);
+
public static final String LIST_VMS = "l";
public static final String LIST_VMS_LONG = "list";
public final Option LIST_VMS_OPT =
@@ -216,7 +221,7 @@ public Options getOptions() {
HELP_OPT, DEBUG_OPT, CONFIG_OPT, BATCH_OPT, DELIMITER_OPT,
REPORT_OPT, JSON_OPT, OUTPUT_OPT, ADD_NODES_OPT, LIST_NODES_OPT,
REMOVE_NODES_OPT, UPDATE_NODES_OPT, POOL_AVAILABILITY_OPT, NETWORKS_OPT, MEMORY_OPT, POOL_OPT,
- ACTIVE_OPT, INACTIVE_OPT, LIST_VMS_OPT, SHUTDOWN_VMS_OPT, USER_OPT, ID_OPT,
+ ACTIVE_OPT, INACTIVE_OPT, CLEANUP_VMS_OPT, LIST_VMS_OPT, SHUTDOWN_VMS_OPT, USER_OPT, ID_OPT,
SECONDS_OPT, ALL_VMS_OPT, HOST_OPT, DN_OPT, GROUP_ID_OPT,
GROUP_NAME_OPT, FREE_OPT, USED_OPT, NODE_LIST_OPT, STATE_OPT
};
@@ -132,6 +132,9 @@ public void run(String[] args) throws ExecutionProblem, ParameterProblem {
super.loadConfig(PROP_RMI_BINDING_ADMINTOOLS_DIR);
this.remoteAdminToolsManagement = (RemoteAdminToolsManagement) super.setupRemoting();
switch (this.action) {
+ case CleanupVMs:
+ cleanupVMs();
+ break;
case ListVMs:
listVMs();
break;
@@ -232,7 +235,8 @@ private void loadArgs(String[] args) throws ParameterProblem {
numOpts++;
}
}
- else if(this.action == ToolAction.ShutdownVMs) {
+ else if(this.action == ToolAction.ShutdownVMs ||
+ this.action == ToolAction.CleanupVMs) {
if(line.hasOption(Opts.ALL_VMS)) {
allVMs = true;
numOpts++;
@@ -510,7 +514,79 @@ else if(hostList != null) {
result = "Shutdown requires either --all, --id, --user, --dn, --gid, --gname, or --host option";
}
if(result != null && !result.isEmpty())
- System.err.print(result);
+ System.err.println(result);
+ }
+ catch (RemoteException e) {
+ System.err.println(e.getMessage());
+ }
+ }
+
+ private void cleanupVMs() {
+ try {
+ String result = "";
+ String feedback;
+ if(numOpts > 1) {
+ result = "You must select only one of --all, --id, --user, --dn, --gid, --gname, or --host";
+ System.err.println(result);
+ return;
+ }
+ if(allVMs) {
+ result = this.remoteAdminToolsManagement.cleanup(
+ RemoteAdminToolsManagement.CLEANUP_ALL, null);
+ }
+ else if(vmIDs != null) {
+ for(int i = 0; i < vmIDs.size(); i++) {
+ feedback = this.remoteAdminToolsManagement.cleanup(
+ RemoteAdminToolsManagement.CLEANUP_ID, vmIDs.get(i));
+ if(feedback != null)
+ result += feedback + "\n";
+ }
+ }
+ else if(userList != null) {
+ for(int i = 0; i < userList.size(); i++) {
+ feedback = this.remoteAdminToolsManagement.cleanup(
+ RemoteAdminToolsManagement.CLEANUP_UNAME, userList.get(i));
+ if(feedback != null)
+ result += feedback + "\n";
+ }
+ }
+ else if(DNList != null) {
+ for(int i = 0; i < DNList.size(); i++) {
+ feedback = this.remoteAdminToolsManagement.cleanup(
+ RemoteAdminToolsManagement.CLEANUP_DN, DNList.get(i));
+ if(feedback != null)
+ result += feedback + "\n";
+ }
+ }
+ else if(gidList != null) {
+ for(int i = 0; i < gidList.size(); i++) {
+ feedback = this.remoteAdminToolsManagement.cleanup(
+ RemoteAdminToolsManagement.CLEANUP_GID, gidList.get(i));
+ if(feedback != null)
+ result += feedback + "\n";
+ }
+ }
+ else if(gnameList != null) {
+ for(int i = 0; i < gnameList.size(); i++) {
+ feedback = this.remoteAdminToolsManagement.cleanup(
+ RemoteAdminToolsManagement.CLEANUP_GNAME, gnameList.get(i));
+ if(feedback != null)
+ result += feedback + "\n";
+ }
+ }
+ else if(hostList != null) {
+ for(int i = 0; i < hostList.size(); i++) {
+ feedback = this.remoteAdminToolsManagement.cleanup(
+ RemoteAdminToolsManagement.CLEANUP_HOST, hostList.get(i));
+ if(feedback != null)
+ result += feedback + "\n";
+ }
+ }
+ else {
+ result = "Cleanup requires either --all, --id, --user, --dn, --gid, --gname, or --host option";
+ }
+ if(result != null && !result.isEmpty())
+ System.err.println(result);
}
catch (RemoteException e) {
System.err.println(e.getMessage());
@@ -582,6 +658,7 @@ else if(hostList != null) {
}
enum ToolAction implements AdminEnum {
+ CleanupVMs(Opts.CLEANUP_VMS, null),
ListVMs(Opts.LIST_VMS, RemoteAdminToolsMain.ADMIN_FIELDS),
ListNodes(Opts.NODE_LIST, RemoteAdminToolsMain.NODE_LIST_FIELDS),
ShutdownVMs(Opts.SHUTDOWN_VMS, null),
@@ -603,5 +680,3 @@ public String option() {
return fields;
}
}
-
-
@@ -15,6 +15,22 @@ General
-----------------------------------------------------------------------------
Actions
+ --cleanup (-x) Cleanup VMs known to the IaaS service. Can
+ cleanup by all, id or hostname
+ All options except --all can accept a comma separated
+ list of VMs to cleanup
+ ie. --cleanup --id 3,5,2,33
+ Careful: this only removes VM from the
+ service, if resources are still allocated on
+ a VMM they will stay in use.
+ --all (-a) -Option to cleanup all running VMs on service
+ --id (-i) -Option to cleanup VM by VM id
+ --user (-u) -Option to cleanup VM by user display name
+ --dn (-d) -Option to cleanup VM by user DN name
+ --gid (-g) -Option to cleanup VM by group id
+ --gname (-gn) -Option to cleanup VM by group name
+ --host (-hn) -Option to cleanup all VMs with specified host
+
--list (-l) List the VMs known to the IaaS service
--user (-u) -Option to list VMs by user display name
--dn (-d) -Option to list VMs by user DN name
@@ -29,7 +45,7 @@ Actions
--shutdown (-s) Shutdown VMs known to the IaaS service. Can
shutdown by all, id or hostname
All options except --all can accept a comma separated
- list of VM's to shutdown
+ list of VMs to shutdown
ie. --shutdown --id 3,5,2,33
--all (-a) -Option to shutdown all running VMs on service
--id (-i) -Option to shutdown VM by VM id
@@ -281,29 +281,61 @@ public String shutdown(int type, String typeID, String seconds) throws RemoteExc
}
}
+ /*
+ * This class handles cleanup by host, id and all.
+ * The constants for int type are in the interface for this class
+ * typeID is either the id or hostname, depending on if cleaning up by id or host, or null if cleaning up all
+ */
+ public String cleanup(int type, String typeID) throws RemoteException {
+ try {
+ VM[] vms;
+ vms = typeSet(type, typeID);
+
+ if(vms == null || vms.length == 0)
+ return errorMsg;
+
+ for(int i = 0; i < vms.length; i++) {
+ String id = vms[i].getID();
+ Caller caller = vms[i].getCreator();
+ manager.cleanup(id, manager.INSTANCE, caller);
+ }
+
+ return null;
+ }
+ catch (ManageException e) {
+ throw new RemoteException(e.getMessage());
+ }
+ catch (DoesNotExistException e) {
+ throw new RemoteException(e.getMessage());
+ }
+ catch (OperationDisabledException e) {
+ throw new RemoteException(e.getMessage());
+ }
+ }
+
private VM[] typeSet(int type, String typeID) throws RemoteException {
try {
- if(type == SHUTDOWN_HOST)
+ if(type == SHUTDOWN_HOST || type == CLEANUP_HOST)
return getVMByHost(typeID);
- else if(type == SHUTDOWN_ID)
+ else if(type == SHUTDOWN_ID || type == CLEANUP_ID)
return getVMById(typeID);
- else if(type == SHUTDOWN_UNAME) {
+ else if(type == SHUTDOWN_UNAME || type == CLEANUP_UNAME) {
authz = new AuthzDBAdapter(authzDataSource);
String userId = authz.getCanonicalUserIdFromFriendlyName(typeID);
VM[] vms = getVMsByUserId(userId);
if(vms == null)
errorMsg = "No VMs with user name " + typeID + " found";
return vms;
}
- else if(type == SHUTDOWN_DN) {
+ else if(type == SHUTDOWN_DN || type == CLEANUP_DN) {
final _Caller caller = this.reprFactory._newCaller();
caller.setIdentity(typeID);
VM[] vms = manager.getAllByCaller(caller);
if(vms.length == 0)
errorMsg = "No VMs with DN " + typeID + " found";
return vms;
}
- else if(type == SHUTDOWN_GID) {
+ else if(type == SHUTDOWN_GID || type == CLEANUP_GID) {
Group group = getGroupByGroupId(typeID);
if(group == null)
return null;
@@ -312,7 +344,7 @@ else if(type == SHUTDOWN_GID) {
errorMsg = "No VMs with group id " + typeID + " found";
return vms;
}
- else if(type == SHUTDOWN_GNAME) {
+ else if(type == SHUTDOWN_GNAME || type == CLEANUP_GNAME) {
Group group = getGroupByGroupName(typeID);
if(group == null)
return null;
@@ -114,6 +114,15 @@ public void slotReserved(int vmid,
/**
+ * Used when something went wrong with the VMM and the node reservation
+ * needs to be removed manually by the administrator.
+ * @param vmid id
+ */
+ public void cleanup(int vmid)
+
+ throws ManageException;
+
+ /**
* Used just in backout situations, when request did not reach STATE_FIRST_LEGAL
* NOTE: This is to be used instead of scheduler.stateNotification(id, WorkspaceConstants.STATE_DESTROYING),
* when the request did not reach the first legal state
@@ -873,6 +873,15 @@ public void recover(int recovered) {
}
}
+ public void cleanup(int vmid) throws ManageException {
+ if (lager.traceLog) {
+ logger.trace("cleanup(): reservation " + Lager.id(vmid));
+ }
+
+ this.slotManager.releaseSpace(vmid); // *** SYNCHRONIZED ISSUE ***
+ this.db.deleteNodeRequest(vmid);
+ }
+
private void remove(int vmid) throws ManageException {
if (lager.traceLog) {
@@ -179,6 +179,8 @@ public boolean remove()
throws ManageException, DoesNotExistException;
+ public void cleanup();
+
/**
* Called when a new state is actually achieved. Depending on the
* target state, this can cause task requests to be issued from
@@ -67,6 +67,13 @@ public boolean isActiveWorkspaceID(String id)
public Sweepable[] currentSweeps();
+ // -------------------------------------------------------------------------
+ // CLEANUP
+ // -------------------------------------------------------------------------
+
+ public void cleanup(String id)
+
+ throws ManageException, DoesNotExistException;
// -------------------------------------------------------------------------
// DESTRUCTION
@@ -789,6 +789,10 @@ public synchronized boolean remove() throws ManageException {
return true;
}
+ public synchronized void cleanup() {
+ do_remove();
+ }
+
protected void do_remove() {
// scheduler already notified by doStateChange()
Oops, something went wrong.

0 comments on commit b317d4c

Please sign in to comment.