Skip to content

Commit

Permalink
Use file locking to better detect if the server really is running
Browse files Browse the repository at this point in the history
  • Loading branch information
lakeman committed Apr 6, 2016
1 parent 1cf46dc commit 2241a15
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 76 deletions.
115 changes: 62 additions & 53 deletions server.c
Expand Up @@ -49,11 +49,11 @@ __thread keyring_file *keyring=NULL;

static char pidfile_path[256];
static int server_getpid = 0;
static int server_pidfd = -1;
static int server_bind();
static void server_loop();
static int server();
static int server_write_pid();
static int server_unlink_pid();
static void signal_handler(int signal);
static void serverCleanUp();
static const char *_server_pidfile_path(struct __sourceloc __whence);
Expand Down Expand Up @@ -83,15 +83,32 @@ int server_pid()
return -1;
const char *p = strrchr(ppath, '/');
assert(p != NULL);
FILE *f = fopen(ppath, "r");
if (f == NULL) {
int pid = -1;
int fd = open(ppath, O_RDONLY);
if (fd == -1) {
if (errno != ENOENT)
return WHYF_perror("fopen(%s,\"r\")", alloca_str_toprint(ppath));
return WHYF_perror("open(%s, O_RDONLY)", alloca_str_toprint(ppath));
} else {
char buf[20];
int pid = (fgets(buf, sizeof buf, f) != NULL) ? atoi(buf) : -1;
fclose(f);
if (pid > 0 && kill(pid, 0) != -1)
ssize_t len = read(fd, buf, sizeof buf);
if (len>0){
buf[len]=0;
pid = atoi(buf);
}
if (pid > 0){
// check that the server process still holds a lock
struct flock lock;
bzero(&lock, sizeof lock);
lock.l_type = F_RDLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = len;
fcntl(fd, F_GETLK, &lock);
if (lock.l_type == F_UNLCK || lock.l_pid != pid)
pid = -1;
}
close(fd);
if (pid > 0)
return pid;
INFOF("Unlinking stale pidfile %s", ppath);
unlink(ppath);
Expand Down Expand Up @@ -246,25 +263,6 @@ static void wokeup()

#endif

DEFINE_ALARM(server_shutdown_check);
void server_shutdown_check(struct sched_ent *alarm)
{
// TODO we should watch a descriptor and quit when it closes
/* If this server has been supplanted with another or Serval has been uninstalled, then its PID
file will change or be unaccessible. In this case, shut down without all the cleanup.
Perform this check at most once per second. */
static time_ms_t server_pid_time_ms = 0;
time_ms_t now = gettime_ms();
if (server_pid_time_ms == 0 || now - server_pid_time_ms > 1000) {
server_pid_time_ms = now;
if (server_pid() != server_getpid) {
WARNF("Server pid file no longer contains pid=%d -- shutting down without cleanup", server_getpid);
exit(1);
}
}
RESCHEDULE(alarm, now+1000, TIME_MS_NEVER_WILL, now+1100);
}

static int server_bind()
{
serverMode = SERVER_RUNNING;
Expand Down Expand Up @@ -335,9 +333,6 @@ static int server_bind()

time_ms_t now = gettime_ms();

// Periodically check for server shut down
RESCHEDULE(&ALARM_STRUCT(server_shutdown_check), now, TIME_MS_NEVER_WILL, now);

olsr_init_socket();

/* Calculate (and possibly show) CPU usage stats periodically */
Expand All @@ -357,12 +352,13 @@ static void server_loop()
while((serverMode==SERVER_RUNNING) && fd_poll2(waiting, wokeup))
;
serverCleanUp();

/* It is safe to unlink the pidfile here without checking whether it actually contains our own
* PID, because server_shutdown_check() will have been executed very recently (in fd_poll()), so
* if the code reaches here, the check has been done recently.
*/
server_unlink_pid();

if (server_pidfd!=-1){
close(server_pidfd);
server_pidfd = -1;
const char *ppath = server_pidfile_path();
unlink(ppath);
}
serverMode = 0;
}

Expand All @@ -389,25 +385,38 @@ static int server_write_pid()
const char *ppath = server_pidfile_path();
if (ppath == NULL)
return -1;

FILE *f = fopen(ppath, "w");
if (!f)
return WHYF_perror("fopen(%s,\"w\")", alloca_str_toprint(ppath));
server_getpid = getpid();
fprintf(f,"%d\n", server_getpid);
fclose(f);

return 0;
}

static int server_unlink_pid()
{
/* Remove PID file to indicate that the server is no longer running */
const char *ppath = server_pidfile_path();
if (ppath == NULL)
return -1;
if (unlink(ppath) == -1)
WHYF_perror("unlink(%s)", alloca_str_toprint(ppath));
int fd = open(ppath, O_RDWR | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd==-1)
return WHYF_perror("open(%s, O_RDWR | O_CREAT)", alloca_str_toprint(ppath));

int pid = server_getpid = getpid();
char buf[20];
int len = snprintf(buf, sizeof buf, "%d", pid);

struct flock lock;
bzero(&lock, sizeof lock);
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = len;
if (fcntl(fd, F_SETLK, &lock)==-1){
close(fd);
return WHYF_perror("fcntl(%d, F_SETLK, &lock)", fd);
}

if (ftruncate(fd, 0)==-1){
close(fd);
return WHYF_perror("ftruncate(%d, 0)", fd);
}

if (write(fd, buf, len)!=len){
close(fd);
return WHYF_perror("write(%d, %p, %d)", fd, buf, len);
}

// leave the pid file open!
server_pidfd = fd;
return 0;
}

Expand Down
23 changes: 0 additions & 23 deletions tests/server
Expand Up @@ -141,29 +141,6 @@ test_StartTwice() {
assert_no_servald_processes
}

doc_RemovePid="Server stops when pid file removed"
setup_RemovePid() {
setup
# Set a watchdog alarm to ensure that the daemon wakes regularly, so the
# pidfile is checked quickly, otherwise the daemon will wait 30 seconds
# before waking itself to check.
>watchdog
chmod 0550 watchdog
executeOk_servald config \
set log.console.level debug \
set log.console.show_time true \
set log.console.show_pid true \
set debug.io true \
set debug.watchdog on \
set server.watchdog.executable "$PWD/watchdog" \
set server.watchdog.interval_ms 100
start_servald_server
}
test_RemovePid() {
rm $instance_servald_pidfile
wait_until ! kill -0 $servald_pid 2>/dev/null
}

doc_MonitorQuit="Server stops due to monitor client disconnection"
setup_MonitorQuit() {
setup
Expand Down

0 comments on commit 2241a15

Please sign in to comment.