Skip to content

Commit

Permalink
[cgroup_manager] adapt process stealing for nixos
Browse files Browse the repository at this point in the history
  • Loading branch information
adfaure committed Oct 26, 2023
1 parent 37f7d21 commit 0d3167b
Showing 1 changed file with 44 additions and 1 deletion.
45 changes: 44 additions & 1 deletion oar/tools/job_resource_manager_cgroups_nixos.pl
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,12 @@
exit_myself(13,"Directory $Cpuset->{oar_tmp_directory} does not exist and cannot be created");
}

my @cgroup_list = ();
if (defined($Cpuset_path_job)){
if (open(LOCKFILE,"> $Cpuset->{oar_tmp_directory}/job_manager_lock_file")){
flock(LOCKFILE,LOCK_EX) or exit_myself(17,"flock failed: $!");
if (!(-r $Cgroup_directory_collection_links.'/cpuset/tasks')){
my @cgroup_list = ("cpuset","cpu","cpuacct","devices","freezer");
@cgroup_list = ("cpuset","cpu","cpuacct","devices","freezer");
push(@cgroup_list, "memory") if ($Enable_mem_cg eq "YES");
push(@cgroup_list, "blkio") if ($Enable_blkio_cg eq "YES");
push(@cgroup_list, "net_cls") if ($Enable_net_cls_cg eq "YES");
Expand Down Expand Up @@ -483,6 +484,48 @@
exit_myself(10,"Error opening $Cpuset->{ssh_keys}->{public}->{file_name}");
}
}

# Finally steal processes if needed.
if(!($Cpuset->{evolving_migrate_processes_from_cpusetpath} eq "undef") and (-d $Cgroup_directory_collection_links.'/cpuset/'.$Cpuset->{cpuset_path}.'/'.$Cpuset->{evolving_migrate_processes_from_cpusetpath} )) {
my $Cpuset_migrate_from;
my $Previous_oarexec_pid_file;
my $Previous_oarexec_pid;

$Previous_oarexec_pid_file = $Cpuset->{evolving_migrate_processes_from_oarexec_pid_file};
$Cpuset_migrate_from = $Cpuset->{cpuset_path}.'/'.$Cpuset->{evolving_migrate_processes_from_cpusetpath};

# If this file exists, we are on the head node of the previous job
if(-e $Previous_oarexec_pid_file) {
$Previous_oarexec_pid_file = $Cpuset->{evolving_migrate_processes_from_oarexec_pid_file};
# Thus, we get the pid of the oarexec because we do not want to migrate it
$Previous_oarexec_pid=`cat $Previous_oarexec_pid_file`;
} else {
# If we are not on a head node, we set pid to -1 which should be impossible
$Previous_oarexec_pid = "-1";
}

print_log(3, "Migrate processes from " . $Cpuset_migrate_from . " to ours. Old oarexec pid was: $Previous_oarexec_pid.");

system('set -ux
echo "all cgroups: '.join(' ',@cgroup_list).'"
for cg_path in '.$Cgroup_directory_collection_links.'/* ; do
cg=$(basename $cg_path)
echo "Treat cgroup: $cg"
for d in '.$Cgroup_directory_collection_links.'/$cg/'.$Cpuset_migrate_from.'; do
if [ -f "$d/tasks" ]; then
echo "migration ! yaouh: $d"
PROCESSES=$(/run/current-system/sw/bin/cat $d/tasks | grep -v -E "\b+'.$Previous_oarexec_pid.'\b+" | head -n 1)
while [ "$PROCESSES" != "" ]; do
echo "write into: '.$Cgroup_directory_collection_links.'/$cg/'.$Cpuset_path_job.'/tasks"
OARDO_BECOME_USER=root oardodo /run/current-system/sw/bin/echo "$PROCESSES" >> '.$Cgroup_directory_collection_links.'/$cg/'.$Cpuset_path_job.'/tasks
PROCESSES=$(/run/current-system/sw/bin/cat $d/tasks | grep -v '.$Previous_oarexec_pid.' | head -n 1)
done
fi
done
done');
} else {
print_log(3, "No processes to steal, maybe " . $Cgroup_directory_collection_links.'/cpuset/'.$Cpuset->{cpuset_path}.'/'.$Cpuset->{evolving_migrate_processes_from_cpusetpath} . " is empty ?");
}
}elsif ($ARGV[0] eq "clean"){
# delete ssh key files
if ($Cpuset->{ssh_keys}->{private}->{key} ne ""){
Expand Down

0 comments on commit 0d3167b

Please sign in to comment.