@@ -57,7 +57,8 @@ static xcgroup_ns_t cpuacct_ns;
57
57
static xcgroup_t user_cpuacct_cg ;
58
58
static xcgroup_t job_cpuacct_cg ;
59
59
static xcgroup_t step_cpuacct_cg ;
60
- xcgroup_t task_cpuacct_cg ;
60
+
61
+ List task_cpuacct_cg_list = NULL ;
61
62
62
63
static uint32_t max_task_id ;
63
64
@@ -68,6 +69,7 @@ jobacct_gather_cgroup_cpuacct_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
68
69
user_cgroup_path [0 ]= '\0' ;
69
70
job_cgroup_path [0 ]= '\0' ;
70
71
jobstep_cgroup_path [0 ]= '\0' ;
72
+ task_cgroup_path [0 ]= '\0' ;
71
73
72
74
/* initialize cpuacct cgroup namespace */
73
75
if (xcgroup_ns_create (slurm_cgroup_conf , & cpuacct_ns , "" , "cpuacct" )
@@ -77,6 +79,9 @@ jobacct_gather_cgroup_cpuacct_init(slurm_cgroup_conf_t *slurm_cgroup_conf)
77
79
return SLURM_ERROR ;
78
80
}
79
81
82
+ FREE_NULL_LIST (task_cpuacct_cg_list );
83
+ task_cpuacct_cg_list = list_create (free_task_cg_info );
84
+
80
85
return SLURM_SUCCESS ;
81
86
}
82
87
@@ -90,7 +95,7 @@ jobacct_gather_cgroup_cpuacct_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
90
95
if (user_cgroup_path [0 ] == '\0'
91
96
|| job_cgroup_path [0 ] == '\0'
92
97
|| jobstep_cgroup_path [0 ] == '\0'
93
- || task_cgroup_path [0 ] == 0 )
98
+ || task_cgroup_path [0 ] == '\0' )
94
99
return SLURM_SUCCESS ;
95
100
96
101
/*
@@ -151,16 +156,16 @@ jobacct_gather_cgroup_cpuacct_fini(slurm_cgroup_conf_t *slurm_cgroup_conf)
151
156
if (lock_ok == true)
152
157
xcgroup_unlock (& cpuacct_cg );
153
158
154
- xcgroup_destroy (& task_cpuacct_cg );
155
159
xcgroup_destroy (& user_cpuacct_cg );
156
160
xcgroup_destroy (& job_cpuacct_cg );
157
161
xcgroup_destroy (& step_cpuacct_cg );
158
162
xcgroup_destroy (& cpuacct_cg );
163
+ FREE_NULL_LIST (task_cpuacct_cg_list );
159
164
160
165
user_cgroup_path [0 ]= '\0' ;
161
166
job_cgroup_path [0 ]= '\0' ;
162
167
jobstep_cgroup_path [0 ]= '\0' ;
163
- task_cgroup_path [0 ] = 0 ;
168
+ task_cgroup_path [0 ]= '\0' ;
164
169
165
170
xcgroup_ns_destroy (& cpuacct_ns );
166
171
@@ -180,6 +185,8 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
180
185
int fstatus = SLURM_SUCCESS ;
181
186
int rc ;
182
187
char * slurm_cgpath ;
188
+ task_cg_info_t * task_cg_info ;
189
+ bool need_to_add = false;
183
190
184
191
job = jobacct_id -> job ;
185
192
uid = job -> uid ;
@@ -191,6 +198,8 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
191
198
if (taskid >= max_task_id )
192
199
max_task_id = taskid ;
193
200
201
+ xassert (task_cpuacct_cg_list );
202
+
194
203
debug ("%s: jobid %u stepid %u taskid %u max_task_id %u" ,
195
204
__func__ , jobid , stepid , taskid , max_task_id );
196
205
@@ -210,6 +219,7 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
210
219
return SLURM_ERROR ;
211
220
}
212
221
}
222
+ xfree (slurm_cgpath );
213
223
214
224
/* build job cgroup relative path if not set (may not be) */
215
225
if (* job_cgroup_path == '\0' ) {
@@ -344,26 +354,40 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
344
354
goto error ;
345
355
}
346
356
357
+ if (!(task_cg_info = list_find_first (task_cpuacct_cg_list ,
358
+ find_task_cg_info ,
359
+ & taskid ))) {
360
+ task_cg_info = xmalloc (sizeof (* task_cg_info ));
361
+ task_cg_info -> taskid = taskid ;
362
+ need_to_add = true;
363
+ }
364
+
347
365
/*
348
366
* Create task cgroup in the cpuacct ns
349
367
*/
350
- if (xcgroup_create (& cpuacct_ns , & task_cpuacct_cg ,
368
+ if (xcgroup_create (& cpuacct_ns , & task_cg_info -> task_cg ,
351
369
task_cgroup_path ,
352
370
uid , gid ) != XCGROUP_SUCCESS ) {
353
371
/* do not delete user/job cgroup as they can exist for other
354
372
* steps, but release cgroup structures */
355
373
xcgroup_destroy (& user_cpuacct_cg );
356
374
xcgroup_destroy (& job_cpuacct_cg );
375
+
376
+ /* Don't use free_task_cg_info as the task_cg isn't there */
377
+ xfree (task_cg_info );
378
+
357
379
error ("jobacct_gather/cgroup: unable to create jobstep %u.%u "
358
380
"task %u cpuacct cgroup" , jobid , stepid , taskid );
359
381
fstatus = SLURM_ERROR ;
360
382
goto error ;
361
383
}
362
384
363
- if (xcgroup_instantiate (& task_cpuacct_cg ) != XCGROUP_SUCCESS ) {
385
+ if (xcgroup_instantiate (& task_cg_info -> task_cg )
386
+ != XCGROUP_SUCCESS ) {
364
387
xcgroup_destroy (& user_cpuacct_cg );
365
388
xcgroup_destroy (& job_cpuacct_cg );
366
389
xcgroup_destroy (& step_cpuacct_cg );
390
+ free_task_cg_info (task_cg_info );
367
391
error ("jobacct_gather/cgroup: unable to instantiate jobstep "
368
392
"%u.%u task %u cpuacct cgroup" , jobid , stepid , taskid );
369
393
fstatus = SLURM_ERROR ;
@@ -373,14 +397,18 @@ jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id)
373
397
/*
374
398
* Attach the slurmstepd to the task cpuacct cgroup
375
399
*/
376
- rc = xcgroup_add_pids (& task_cpuacct_cg , & pid , 1 );
400
+ rc = xcgroup_add_pids (& task_cg_info -> task_cg , & pid , 1 );
377
401
if (rc != XCGROUP_SUCCESS ) {
378
- error ("jobacct_gather/cgroup: unable to add slurmstepd to "
379
- "cpuacct cg '%s'" , task_cpuacct_cg .path );
402
+ error ("jobacct_gather/cgroup: unable to add slurmstepd to cpuacct cg '%s'" ,
403
+ task_cg_info -> task_cg .path );
380
404
fstatus = SLURM_ERROR ;
381
405
} else
382
406
fstatus = SLURM_SUCCESS ;
383
407
408
+ /* Add the task cgroup to the list now that it is initialized. */
409
+ if (need_to_add )
410
+ list_append (task_cpuacct_cg_list , task_cg_info );
411
+
384
412
error :
385
413
xcgroup_unlock (& cpuacct_cg );
386
414
xcgroup_destroy (& cpuacct_cg );
0 commit comments