Skip to content
This repository has been archived by the owner on Sep 23, 2020. It is now read-only.

Commit

Permalink
Send core request as ppn to qsub for workspace pilot
Browse files Browse the repository at this point in the history
  • Loading branch information
oldpatricka committed Feb 28, 2011
1 parent 1339cf6 commit b450678
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 27 deletions.
14 changes: 8 additions & 6 deletions service/service/java/source/etc/workspace-service/pilot.conf
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -79,12 +79,14 @@ pbs.submit.path=qsub
pbs.delete.path=qdel pbs.delete.path=qdel




# Processors per node, right now this should be set to be the maximum processors # Processors per node. If this is set to 0, your pilot job will request
# on each cluster node. If it set too high, pilot job submissions will fail. # as many processors as are requested for a VM. For example, if a user requests
# If it is set too low, the pilot may end up not being the only LRM job on the # a 2 core VM, ppn will be set to 2.
# node at a time and that is unpredictable/unsupported right now. #

# On some installations, you may wish to hardcode this to a specific value
pbs.ppn=2 # to ensure that each pilot job reserves a whole node for a VM. In this case,
# choose a non-zero value.
pbs.ppn=0




# If the pilot job should be submitted to a special queue/server, configure # If the pilot job should be submitted to a special queue/server, configure
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public ArrayList constructQsub(String destination,
throw new WorkspaceException(err); throw new WorkspaceException(err);
} }


if (ppn < 1) { if (ppn < 0) {
final String err = "invalid processors per node " + final String err = "invalid processors per node " +
"request: " + Integer.toString(ppn); "request: " + Integer.toString(ppn);
throw new WorkspaceException(err); throw new WorkspaceException(err);
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -850,6 +850,7 @@ protected Reservation scheduleImpl(VirtualMachine vm,
} }


final int memory = dep.getIndividualPhysicalMemory(); final int memory = dep.getIndividualPhysicalMemory();
final int cores = dep.getIndividualCPUCount();
final int duration = dep.getMinDuration(); final int duration = dep.getMinDuration();


// list of associations should be in the DB, perpetuation of // list of associations should be in the DB, perpetuation of
Expand All @@ -860,7 +861,7 @@ protected Reservation scheduleImpl(VirtualMachine vm,
assocs = assocStr.split(","); assocs = assocStr.split(",");
} }


return this.scheduler.schedule(memory, duration, assocs, numNodes, return this.scheduler.schedule(memory, cores, duration, assocs, numNodes,
groupid, coschedid, vm.isPreemptable(), callerID); groupid, coschedid, vm.isPreemptable(), callerID);
} }


Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public interface Scheduler extends StateChangeInterested{
* @see #proceedCoschedule for handling separate requests together * @see #proceedCoschedule for handling separate requests together
* *
* @param memory MB needed * @param memory MB needed
* @param CPU cores needed
* @param duration seconds needed * @param duration seconds needed
* @param neededAssociations networks needed * @param neededAssociations networks needed
* @param numNodes number needed * @param numNodes number needed
Expand All @@ -49,6 +50,7 @@ public interface Scheduler extends StateChangeInterested{
* @throws SchedulingException internal problem * @throws SchedulingException internal problem
*/ */
public Reservation schedule(int memory, public Reservation schedule(int memory,
int cores,
int duration, int duration,
String[] neededAssociations, String[] neededAssociations,
int numNodes, int numNodes,
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ public long getSweeperDelay() {
} }


public Reservation schedule(int memory, public Reservation schedule(int memory,
int cores,
int duration, int duration,
String[] neededAssociations, String[] neededAssociations,
int numNodes, int numNodes,
Expand Down Expand Up @@ -263,7 +264,7 @@ public Reservation schedule(int memory,
this.creationPending.pending(ids); this.creationPending.pending(ids);


final NodeRequest req = final NodeRequest req =
new NodeRequest(ids, memory, duration, assocs, groupid, creatorDN); new NodeRequest(ids, memory, cores, duration, assocs, groupid, creatorDN);


try { try {


Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
public class NodeRequest { public class NodeRequest {


private int memory; // MBs private int memory; // MBs
private int cores;
private int duration; // seconds private int duration; // seconds


private int[] ids = null; private int[] ids = null;
Expand All @@ -41,12 +42,14 @@ public NodeRequest(int memory,


public NodeRequest(int[] ids, public NodeRequest(int[] ids,
int memory, int memory,
int cores,
int duration, int duration,
String[] neededAssociations, String[] neededAssociations,
String groupid, String groupid,
String creatorDN) { String creatorDN) {
this(memory, duration); this(memory, duration);


this.cores = cores;
this.ids = ids; this.ids = ids;
this.neededAssociations = neededAssociations; this.neededAssociations = neededAssociations;
this.groupid = groupid; this.groupid = groupid;
Expand Down Expand Up @@ -80,6 +83,18 @@ public int getNumNodes() {
return this.ids.length; return this.ids.length;
} }


public int getCores() {
// Java sets ints to 0 if they're never initialized
if (this.cores == 0) {
return 1;
}
return this.cores;
}

public void setCores(int cores) {
this.cores = cores;
}

public int getMemory() { public int getMemory() {
return this.memory; return this.memory;
} }
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -369,8 +369,8 @@ public synchronized void validate() throws Exception {
"Is the configuration present?"); "Is the configuration present?");
} }


if (this.ppn < 1) { if (this.ppn < 0) {
throw new Exception("processors per node (ppn) is less than one, " + throw new Exception("processors per node (ppn) is less than zero, " +
"invalid. Is the configuration present?"); "invalid. Is the configuration present?");
} }


Expand Down Expand Up @@ -492,6 +492,7 @@ public Reservation reserveSpace(NodeRequest request, boolean preemptable)


this.reserveSpace(request.getIds(), this.reserveSpace(request.getIds(),
request.getMemory(), request.getMemory(),
request.getCores(),
request.getDuration(), request.getDuration(),
request.getGroupid(), request.getGroupid(),
request.getCreatorDN()); request.getCreatorDN());
Expand Down Expand Up @@ -520,6 +521,7 @@ public Reservation reserveCoscheduledSpace(NodeRequest[] requests,
// capacity vs. mapping and we will get more sophisticated here later) // capacity vs. mapping and we will get more sophisticated here later)


int highestMemory = 0; int highestMemory = 0;
int highestCores = 0;
int highestDuration = 0; int highestDuration = 0;


final ArrayList idInts = new ArrayList(64); final ArrayList idInts = new ArrayList(64);
Expand All @@ -533,6 +535,12 @@ public Reservation reserveCoscheduledSpace(NodeRequest[] requests,
highestMemory = thisMemory; highestMemory = thisMemory;
} }


final int thisCores = requests[i].getCores();

if (highestCores < thisCores) {
highestCores = thisCores;
}

final int thisDuration = requests[i].getDuration(); final int thisDuration = requests[i].getDuration();


if (highestDuration < thisDuration) { if (highestDuration < thisDuration) {
Expand Down Expand Up @@ -563,7 +571,7 @@ public Reservation reserveCoscheduledSpace(NodeRequest[] requests,
// Assume that the creator's DN is the same for each node // Assume that the creator's DN is the same for each node
final String creatorDN = requests[0].getCreatorDN(); final String creatorDN = requests[0].getCreatorDN();


this.reserveSpace(all_ids, highestMemory, highestDuration, coschedid, creatorDN); this.reserveSpace(all_ids, highestMemory, highestCores, highestDuration, coschedid, creatorDN);
return new Reservation(all_ids, null, all_durations); return new Reservation(all_ids, null, all_durations);
} }


Expand All @@ -579,6 +587,7 @@ public Reservation reserveCoscheduledSpace(NodeRequest[] requests,
* than one VM is mapped to the same node, the returned node * than one VM is mapped to the same node, the returned node
* assignment array will include duplicates. * assignment array will include duplicates.
* @param memory megabytes needed * @param memory megabytes needed
* @param cores needed
* @param duration seconds needed * @param duration seconds needed
* @param uuid group ID, can not be null if vmids is length > 1 * @param uuid group ID, can not be null if vmids is length > 1
* @param creatorDN the DN of the user who requested creation of the VM * @param creatorDN the DN of the user who requested creation of the VM
Expand All @@ -587,6 +596,7 @@ public Reservation reserveCoscheduledSpace(NodeRequest[] requests,
*/ */
private void reserveSpace(final int[] vmids, private void reserveSpace(final int[] vmids,
final int memory, final int memory,
final int cores,
final int duration, final int duration,
final String uuid, final String uuid,
final String creatorDN) final String creatorDN)
Expand Down Expand Up @@ -628,13 +638,14 @@ private void reserveSpace(final int[] vmids,
} }
} }


this.reserveSpaceImpl(memory, duration, slotid, vmids, creatorDN); this.reserveSpaceImpl(memory, cores, duration, slotid, vmids, creatorDN);


// pilot reports hostname when it starts running, not returning an // pilot reports hostname when it starts running, not returning an
// exception to signal successful best effort pending slot // exception to signal successful best effort pending slot
} }


private void reserveSpaceImpl(final int memory, private void reserveSpaceImpl(final int memory,
final int cores,
final int duration, final int duration,
final String uuid, final String uuid,
final int[] vmids, final int[] vmids,
Expand All @@ -646,20 +657,32 @@ private void reserveSpaceImpl(final int memory,
final int dur = duration + this.padding; final int dur = duration + this.padding;
final long wallTime = duration + this.padding; final long wallTime = duration + this.padding;



// If the pbs.ppn option in pilot.conf is 0, we should send
// the number of CPU cores used by the VM as the ppn string,
// otherwise, use the defined ppn value
int ppnRequested;
if (this.ppn == 0) {
ppnRequested = cores;
}
else {
ppnRequested = this.ppn;
}

// we know it's torque for now, no casing // we know it's torque for now, no casing
final ArrayList torquecmd; final ArrayList torquecmd;
try { try {
torquecmd = this.torque.constructQsub(this.destination, torquecmd = this.torque.constructQsub(this.destination,
memory, memory,
vmids.length, vmids.length,
this.ppn, ppnRequested,
wallTime, wallTime,
this.extraProperties, this.extraProperties,
outputFile, outputFile,
false, false,
false, false,
creatorDN); creatorDN);

} catch (WorkspaceException e) { } catch (WorkspaceException e) {
final String msg = "Problem with Torque argument construction"; final String msg = "Problem with Torque argument construction";
if (logger.isDebugEnabled()) { if (logger.isDebugEnabled()) {
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -79,12 +79,14 @@ pbs.submit.path=qsub
pbs.delete.path=qdel pbs.delete.path=qdel




# Processors per node, right now this should be set to be the maximum processors # Processors per node. If this is set to 0, your pilot job will request
# on each cluster node. If it set too high, pilot job submissions will fail. # as many processors as are requested for a VM. For example, if a user requests
# If it is set too low, the pilot may end up not being the only LRM job on the # a 2 core VM, ppn will be set to 2.
# node at a time and that is unpredictable/unsupported right now. #

# On some installations, you may wish to hardcode this to a specific value
pbs.ppn=2 # to ensure that each pilot job reserves a whole node for a VM. In this case,
# choose a non-zero value.
pbs.ppn=0




# If the pilot job should be submitted to a special queue/server, configure # If the pilot job should be submitted to a special queue/server, configure
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -79,12 +79,14 @@ pbs.submit.path=qsub
pbs.delete.path=qdel pbs.delete.path=qdel




# Processors per node, right now this should be set to be the maximum processors # Processors per node. If this is set to 0, your pilot job will request
# on each cluster node. If it set too high, pilot job submissions will fail. # as many processors as are requested for a VM. For example, if a user requests
# If it is set too low, the pilot may end up not being the only LRM job on the # a 2 core VM, ppn will be set to 2.
# node at a time and that is unpredictable/unsupported right now. #

# On some installations, you may wish to hardcode this to a specific value
pbs.ppn=2 # to ensure that each pilot job reserves a whole node for a VM. In this case,
# choose a non-zero value.
pbs.ppn=0




# If the pilot job should be submitted to a special queue/server, configure # If the pilot job should be submitted to a special queue/server, configure
Expand Down

0 comments on commit b450678

Please sign in to comment.