Skip to content

Commit

Permalink
fast-reboot: parallel memory clearing
Browse files Browse the repository at this point in the history
Arbitrarily pick 16GB as the unit of parallelism, and
split up clearing memory into jobs and schedule them
node-local to the memory (or on node 0 if we can't
work that out because it's the memory up to SKIBOOT_BASE)

This seems to cut at least ~40% time from memory zeroing on
fast-reboot on a 256GB Boston system.

Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
  • Loading branch information
stewartsmith committed Jul 17, 2018
1 parent 5bf0375 commit 06808a0
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 4 deletions.
2 changes: 1 addition & 1 deletion core/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ u64 dt_get_address(const struct dt_node *node, unsigned int index,
return dt_get_number(p->prop + pos, na);
}

static u32 __dt_get_chip_id(const struct dt_node *node)
u32 __dt_get_chip_id(const struct dt_node *node)
{
const struct dt_property *prop;

Expand Down
100 changes: 98 additions & 2 deletions core/mem_region.c
Original file line number Diff line number Diff line change
Expand Up @@ -1206,29 +1206,125 @@ static void mem_clear_range(uint64_t s, uint64_t e)
return;
}

prlog(PR_NOTICE, "Clearing region %llx-%llx\n",
prlog(PR_DEBUG, "Clearing region %llx-%llx\n",
(long long)s, (long long)e);
memset((void *)s, 0, e - s);
}

struct mem_region_clear_job_args {
char *job_name;
uint64_t s,e;
};

static void mem_region_clear_job(void *data)
{
struct mem_region_clear_job_args *arg = (struct mem_region_clear_job_args*)data;
mem_clear_range(arg->s, arg->e);
}

#define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30))

void mem_region_clear_unused(void)
{
int njobs = 0;
struct cpu_job **jobs;
struct mem_region *r;
struct mem_region_clear_job_args *job_args;
uint64_t s,l;
uint64_t total = 0;
uint32_t chip_id;
char *path;
int i;

lock(&mem_region_lock);
assert(mem_regions_finalised);

list_for_each(&regions, r, list) {
if (!(r->type == REGION_OS))
continue;
njobs++;
/* One job per 16GB */
njobs += r->len / MEM_REGION_CLEAR_JOB_SIZE;
}

jobs = malloc(njobs * sizeof(struct cpu_job*));
job_args = malloc(njobs * sizeof(struct mem_region_clear_job_args));

prlog(PR_NOTICE, "Clearing unused memory:\n");
i = 0;
list_for_each(&regions, r, list) {
/* If it's not unused, ignore it. */
if (!(r->type == REGION_OS))
continue;

assert(r != &skiboot_heap);

mem_clear_range(r->start, r->start + r->len);
s = r->start;
l = r->len;
while(l > MEM_REGION_CLEAR_JOB_SIZE) {
job_args[i].s = s+l - MEM_REGION_CLEAR_JOB_SIZE;
job_args[i].e = s+l;
l-=MEM_REGION_CLEAR_JOB_SIZE;
job_args[i].job_name = malloc(sizeof(char)*100);
total+=MEM_REGION_CLEAR_JOB_SIZE;
chip_id = __dt_get_chip_id(r->node);
if (chip_id == -1)
chip_id = 0;
path = dt_get_path(r->node);
snprintf(job_args[i].job_name, 100,
"clear %s, %s 0x%"PRIx64" len: %"PRIx64" on %d",
r->name, path,
job_args[i].s,
(job_args[i].e - job_args[i].s),
chip_id);
free(path);
printf("job: %s\n", job_args[i].job_name);
jobs[i] = cpu_queue_job_on_node(chip_id,
job_args[i].job_name,
mem_region_clear_job,
&job_args[i]);
if (!jobs[i])
jobs[i] = cpu_queue_job(NULL,
job_args[i].job_name,
mem_region_clear_job,
&job_args[i]);
assert(jobs[i]);
i++;
}
job_args[i].s = s;
job_args[i].e = s+l;
job_args[i].job_name = malloc(sizeof(char)*100);
total+=l;
chip_id = __dt_get_chip_id(r->node);
if (chip_id == -1)
chip_id = 0;
path = dt_get_path(r->node);
snprintf(job_args[i].job_name,100,
"clear %s, %s 0x%"PRIx64" len: 0x%"PRIx64" on %d",
r->name, path,
job_args[i].s,
(job_args[i].e - job_args[i].s),
chip_id);
free(path);
printf("job: %s\n", job_args[i].job_name);
jobs[i] = cpu_queue_job_on_node(chip_id,
job_args[i].job_name,
mem_region_clear_job,
&job_args[i]);
i++;
}
cpu_process_local_jobs();
l = 0;
for(i=0; i < njobs; i++) {
cpu_wait_job(jobs[i], true);
l += (job_args[i].e - job_args[i].s);
printf("Clearing memory... %"PRIu64"/%"PRIu64"GB done\n",
l>>30, total>>30);
free(job_args[i].job_name);
}
unlock(&mem_region_lock);
free(jobs);
free(job_args);
}

static void mem_region_add_dt_reserved_node(struct dt_node *parent,
Expand Down
5 changes: 4 additions & 1 deletion include/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,12 @@ u32 dt_n_size_cells(const struct dt_node *node);
u64 dt_get_number(const void *pdata, unsigned int cells);

/* Find an ibm,chip-id property in this node; if not found, walk up the parent
* nodes. Returns -1 if no chip-id property exists. */
* nodes. */
u32 dt_get_chip_id(const struct dt_node *node);

/* Same as dt_get_chip_id except Returns -1 if no chip-id property exists. */
u32 __dt_get_chip_id(const struct dt_node *node);

/* Address accessors ("reg" properties parsing). No translation,
* only support "simple" address forms (1 or 2 cells). Asserts
* if address doesn't exist
Expand Down

0 comments on commit 06808a0

Please sign in to comment.