Skip to content

Commit

Permalink
greatly improve status display
Browse files Browse the repository at this point in the history
- ETA always visible
- based on processed bytes instead of stupid assuptions
- compatible with multithreading
  • Loading branch information
nil0x42 committed Sep 4, 2020
1 parent 0efd5a9 commit 33a4f98
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 51 deletions.
7 changes: 4 additions & 3 deletions include/status.h
Expand Up @@ -7,16 +7,17 @@ enum e_status_update
TAGDUP_START, /* start processing duplicate removal [2/3] */
CHUNK_DONE, /* a chunk is terminated */
CTASK_DONE, /* a ctask is terminated */
FCLEAN_START, /* start processing fclean (outfile cleanout) [3/3] */
FCLEAN_START, /* start processing fclean (outfile cleanup) [3/3] */
};

/* update status variables */
enum e_status_set
{
FILE_SIZE, /* total file size (g_infile) */
FCOPY_BYTES, /* currently copied bytes */
FCOPY_BYTES, /* bytes currently processed by FCOPY */
CHUNK_SIZE, /* chunk size */
CLEANOUT_BYTES, /* bytes processed by cleanout_chunk() */
TAGDUP_BYTES, /* bytes currently processed by TAGDUP */
FCLEAN_BYTES, /* bytes currently processed by FCLEAN */
};

/* source file: status.c */
Expand Down
16 changes: 14 additions & 2 deletions src/chunk.c
Expand Up @@ -9,6 +9,7 @@
#include "const.h"
#include "config.h"
#include "status.h"
#include "debug.h"


int count_chunks(void)
Expand Down Expand Up @@ -70,9 +71,13 @@ bool get_next_chunk(t_chunk *chunk, struct file *file)
*/
void cleanout_chunk(t_chunk *chunk)
{
t_line line;
long slot;
t_line line;
long slot;
char *base_ptr;
int i;

i = 0;
base_ptr = chunk->ptr;
while (get_next_line(&line, chunk))
{
slot = hash(&line) % g_hmap.size;
Expand All @@ -86,7 +91,14 @@ void cleanout_chunk(t_chunk *chunk)
/* archaic open addressing collision resolver */
slot = (slot + 1) % g_hmap.size;
}
i++;
if (i == 500000) {
set_status(TAGDUP_BYTES, (size_t)(chunk->ptr - base_ptr));
base_ptr = chunk->ptr;
i = 0;
}
}
set_status(TAGDUP_BYTES, (size_t)(chunk->ptr - base_ptr));
free(chunk);
update_status(CTASK_DONE);
}
19 changes: 16 additions & 3 deletions src/hmap.c
Expand Up @@ -2,6 +2,7 @@
#include "hmap.h"
#include "hash.h"
#include "const.h"
#include "status.h"
#include "error.h"
#include "debug.h"

Expand Down Expand Up @@ -43,9 +44,14 @@ void destroy_hmap(void)
void populate_hmap(t_chunk *chunk)
{
DLOG("populate_hmap()");
t_line line;
long slot;
size_t has_slots;
t_line line;
long slot;
size_t has_slots;
char *base_ptr;
int i;

i = 0;
base_ptr = chunk->ptr;

#ifdef DEBUG
int last_percent_filled = 0;
Expand Down Expand Up @@ -86,7 +92,14 @@ void populate_hmap(t_chunk *chunk)
}
if (!has_slots)
error("populate_hmap(): no space left on hashmap.");
i++;
if (i == 500000) {
set_status(TAGDUP_BYTES, (size_t)(chunk->ptr - base_ptr));
base_ptr = chunk->ptr;
i = 0;
}
}
set_status(TAGDUP_BYTES, (size_t)(chunk->ptr - base_ptr));
#ifdef DEBUG
DLOG("populate_hmap(): used %ld/%ld slots (%.2f%%)",
filled, g_hmap.size, (double)filled / (double)g_hmap.size * 100.0);
Expand Down
13 changes: 13 additions & 0 deletions src/main.c
Expand Up @@ -36,10 +36,15 @@ static void remove_duplicates(void)
t_line line;
size_t line_size;
char *dst;
char *base_ptr;
int i;

file_chunk.ptr = g_file->addr;
file_chunk.endptr = g_file->addr + g_file->info.st_size;

i = 0;
base_ptr = file_chunk.ptr;

dst = file_chunk.ptr;
while (get_next_line(&line, &file_chunk))
{
Expand All @@ -48,10 +53,17 @@ static void remove_duplicates(void)
dst += line_size;
if (dst != file_chunk.endptr)
*dst++ = '\n';
i++;
if (i == 500000) {
set_status(FCLEAN_BYTES, (size_t)(file_chunk.ptr - base_ptr));
base_ptr = file_chunk.ptr;
i = 0;
}
}

/* update file size */
g_file->info.st_size = dst - g_file->addr;
set_status(FCLEAN_BYTES, (size_t)(file_chunk.ptr - base_ptr));
}


Expand All @@ -65,6 +77,7 @@ int main(int argc, char **argv)
update_status(FCOPY_START);
init_file(g_conf.infile_name, g_conf.outfile_name);
config(); /* configure g_conf options */
set_status(CHUNK_SIZE, g_conf.chunk_size);

init_hmap(g_conf.hmap_size);
update_status(TAGDUP_START);
Expand Down
114 changes: 71 additions & 43 deletions src/status.c
Expand Up @@ -37,7 +37,8 @@ struct status
size_t file_size;
size_t fcopy_bytes;
size_t chunk_size;
size_t cleanout_bytes;
size_t tagdup_bytes;
size_t fclean_bytes;
};

static struct status g_status = {
Expand All @@ -52,7 +53,8 @@ static struct status g_status = {
.file_size = 0,
.fcopy_bytes = 0,
.chunk_size = 0,
.cleanout_bytes = 0,
.tagdup_bytes = 0,
.fclean_bytes = 0,
};

pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER;
Expand Down Expand Up @@ -98,13 +100,27 @@ void set_status(enum e_status_set var, size_t val)

switch (var) {
case FILE_SIZE:
DLOG("set_status(FILE_SIZE) called");
DLOG("set_status(FILE_SIZE, %lu) called", val);
g_status.file_size = val;
break ;
case FCOPY_BYTES:
DLOG("set_status(FCOPY_BYTES) called");
DLOG("set_status(FCOPY_BYTES, %lu) called", val);
g_status.fcopy_bytes += val;
break ;
case CHUNK_SIZE:
DLOG("set_status(CHUNK_SIZE, %lu) called", val);
g_status.chunk_size = val;
break ;
case TAGDUP_BYTES:
DLOG("set_status(TAGDUP_BYTES, %lu) called", val);
pthread_mutex_lock(&g_mutex);
g_status.tagdup_bytes += val;
pthread_mutex_unlock(&g_mutex);
break ;
case FCLEAN_BYTES:
DLOG("set_status(FCLEAN_BYTES, %lu) called", val);
g_status.fclean_bytes += val;
break ;
}
}

Expand Down Expand Up @@ -159,65 +175,77 @@ void display_status(void)
char elapsed_time_str[BUF_SIZE] = {0};
char arrival_time_str[BUF_SIZE] = {0};
char current_task_str[BUF_SIZE] = {0};
double percent_progression = 0.0;

time_t current_time = 0;
time_t elapsed_time = 0;
time_t arrival_time = 0;

if (!FCOPY_STARTED())
return ;
double progress = 0.0; /* 1.0 == 100% */
double remain_time = 0.0;


current_time = time(NULL);
elapsed_time = current_time - START_TIME();

percent_progression = 0.0;
if (g_status.fcopy_bytes > 0)
{
double fcopy_part;
fcopy_part = (double)g_status.fcopy_bytes / (double)g_status.file_size;
percent_progression = fcopy_part * 5.0;
if (elapsed_time > 0) {
arrival_time = elapsed_time * (time_t)(100.0 / percent_progression);
/* we need at least 1 sec execution to show status */
if (elapsed_time == 0)
return ;

/* FCLEAN [3/3] --> 94% to 100% */
if (g_status.fclean_bytes) {
double fclean_part =
(double)g_status.fclean_bytes / (double)g_status.file_size;
progress = 0.94 + (fclean_part * 0.06);
if (progress > 0.9999)
progress = 0.9999;

double fclean_elapsed_time = elapsed_time;
fclean_elapsed_time -= FCOPY_DURATION() + TAGDUP_DURATION();
if (fclean_elapsed_time >= 1) {
remain_time = fclean_elapsed_time / fclean_part;
remain_time -= fclean_elapsed_time;
arrival_time = current_time + remain_time;
}
}
else if (!TAGDUP_TERMINATED())
{
percent_progression = 5.0;
double tagdup_elapsed_time = elapsed_time - FCOPY_DURATION();
if (g_status.done_ctasks > 0 && tagdup_elapsed_time > 0.9)
{
double time_per_ctask = tagdup_elapsed_time / g_status.done_ctasks;
time_t remaining_time = time_per_ctask * MISSING_CTASKS();
/* adding FCOPY_DURATION because it's ~= FCLEAN_DURATION */
arrival_time = current_time + remaining_time + FCOPY_DURATION();

double percent_per_ctask = 90.0 / g_status.total_ctasks;
percent_progression += percent_per_ctask * g_status.done_ctasks;
double cur_ctasks_seconds = current_time - g_status.last_ctask_date;
double ctask_progression = cur_ctasks_seconds / time_per_ctask;
if (ctask_progression > 1.0)
ctask_progression = 1.0;
percent_progression += percent_per_ctask * ctask_progression;
/* TAGDUP [2/3] --> 4% to 94% */
else if (g_status.tagdup_bytes) {
double total_bytes = g_status.total_ctasks * g_status.chunk_size;
double tagdup_part = (double)g_status.tagdup_bytes / total_bytes;
progress = 0.04 + (tagdup_part * 0.90);

double tagdup_elapsed_time = elapsed_time;
tagdup_elapsed_time -= FCOPY_DURATION();
if (tagdup_elapsed_time >= 1) {
remain_time = tagdup_elapsed_time / tagdup_part;
remain_time -= tagdup_elapsed_time;
arrival_time = current_time + remain_time;
/* add estimation of FCLEAN duration: */
arrival_time += (FCOPY_DURATION() * 6) / 4;
}
}
else
{
percent_progression = 95.0;
/* FCOPY [1/3] --> 0% to 4% */
else if (g_status.fcopy_bytes) {
progress = (double)g_status.fcopy_bytes / (double)g_status.file_size;
progress *= 0.04;
}
else {
return;
}

double percent_per_second = 5.0 / (double) FCOPY_DURATION();
time_t elapsed_fclean = current_time - g_status.fclean_date;
percent_progression += percent_per_second * (double)elapsed_fclean;
if (percent_progression > 99.99)
percent_progression = 99.99;
/* fallback method to display ETA */
if (progress > 0 && arrival_time == 0) {
remain_time = (double)elapsed_time / progress;
remain_time -= elapsed_time;
arrival_time = current_time + remain_time;
}


repr_elapsed_time(elapsed_time_str, elapsed_time);
repr_arrival_time(arrival_time_str, arrival_time);
repr_current_task(current_task_str);
fprintf(stderr, "time: %s %.2f%% (ETA: %s) %s ...\n",
fprintf(stderr, "time: %s %5.2f%% (ETA: %s) %s ...\n",
elapsed_time_str,
percent_progression,
progress * 100.0,
arrival_time_str,
current_task_str);
}

0 comments on commit 33a4f98

Please sign in to comment.