Permalink
Newer
Older
100644 646 lines (563 sloc) 18 KB
2
* Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at:
8
* http://www.apache.org/licenses/LICENSE-2.0
9
*
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
15
*/
16
17
#include <config.h>
18
#include "daemon.h"
19
#include <errno.h>
20
#include <fcntl.h>
21
#include <signal.h>
22
#include <stdlib.h>
23
#include <string.h>
24
#include <sys/resource.h>
26
#include <sys/stat.h>
28
#include "command-line.h"
29
#include "fatal-signal.h"
30
#include "dirs.h"
31
#include "lockfile.h"
33
#include "socket-util.h"
35
#include "util.h"
36
#include "vlog.h"
37
38
VLOG_DEFINE_THIS_MODULE(daemon);
40
/* --detach: Should we run in the background? */
43
/* --pidfile: Name of pidfile (null if none). */
46
/* Device and inode of pidfile, so we can avoid reopening it. */
47
static dev_t pidfile_dev;
48
static ino_t pidfile_ino;
49
50
/* --overwrite-pidfile: Create pidfile even if one already exists and is
51
locked? */
52
static bool overwrite_pidfile;
54
/* --no-chdir: Should we chdir to "/"? */
55
static bool chdir_ = true;
56
57
/* File descriptor used by daemonize_start() and daemonize_complete(). */
58
static int daemonize_fd = -1;
60
/* --monitor: Should a supervisory process monitor the daemon and restart it if
61
* it dies due to an error signal? */
62
static bool monitor;
63
64
static void check_already_running(void);
65
static int lock_pidfile(FILE *, int command);
66
67
/* Returns the file name that would be used for a pidfile if 'name' were
68
* provided to set_pidfile(). The caller must free the returned string. */
69
char *
70
make_pidfile_name(const char *name)
73
? xasprintf("%s/%s.pid", ovs_rundir(), program_name)
74
: abs_file_name(ovs_rundir(), name));
75
}
76
77
/* Sets up a following call to daemonize() to create a pidfile named 'name'.
78
* If 'name' begins with '/', then it is treated as an absolute path.
79
* Otherwise, it is taken relative to RUNDIR, which is $(prefix)/var/run by
80
* default.
81
*
82
* If 'name' is null, then program_name followed by ".pid" is used. */
83
void
84
set_pidfile(const char *name)
85
{
86
free(pidfile);
87
pidfile = make_pidfile_name(name);
88
}
89
90
/* Returns an absolute path to the configured pidfile, or a null pointer if no
91
* pidfile is configured. The caller must not modify or free the returned
92
* string. */
93
const char *
94
get_pidfile(void)
95
{
96
return pidfile;
97
}
98
99
/* Sets that we do not chdir to "/". */
100
void
101
set_no_chdir(void)
102
{
103
chdir_ = false;
104
}
105
106
/* Will we chdir to "/" as part of daemonizing? */
107
bool
108
is_chdir_enabled(void)
109
{
110
return chdir_;
111
}
112
113
/* Normally, daemonize() or damonize_start() will terminate the program with a
114
* message if a locked pidfile already exists. If this function is called, an
115
* existing pidfile will be replaced, with a warning. */
116
void
117
ignore_existing_pidfile(void)
118
{
119
overwrite_pidfile = true;
120
}
121
122
/* Sets up a following call to daemonize() to detach from the foreground
123
* session, running this process in the background. */
124
void
125
set_detach(void)
126
{
127
detach = true;
128
}
129
130
/* Will daemonize() really detach? */
131
bool
132
get_detach(void)
133
{
134
return detach;
135
}
136
137
/* Sets up a following call to daemonize() to fork a supervisory process to
138
* monitor the daemon and restart it if it dies due to an error signal. */
139
void
140
daemon_set_monitor(void)
141
{
142
monitor = true;
143
}
144
145
/* If a pidfile has been configured, creates it and stores the running
146
* process's pid in it. Ensures that the pidfile will be deleted when the
147
* process exits. */
148
static void
149
make_pidfile(void)
150
{
151
long int pid = getpid();
152
struct stat s;
153
char *tmpfile;
154
FILE *file;
155
int error;
156
157
/* Create a temporary pidfile. */
158
tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
159
fatal_signal_add_file_to_unlink(tmpfile);
160
file = fopen(tmpfile, "w+");
161
if (!file) {
162
VLOG_FATAL("%s: create failed (%s)", tmpfile, strerror(errno));
163
}
164
165
if (fstat(fileno(file), &s) == -1) {
166
VLOG_FATAL("%s: fstat failed (%s)", tmpfile, strerror(errno));
167
}
168
169
fprintf(file, "%ld\n", pid);
170
if (fflush(file) == EOF) {
171
VLOG_FATAL("%s: write failed (%s)", tmpfile, strerror(errno));
172
}
173
174
error = lock_pidfile(file, F_SETLK);
175
if (error) {
176
VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile, strerror(error));
177
}
178
179
/* Rename or link it to the correct name. */
180
if (overwrite_pidfile) {
181
if (rename(tmpfile, pidfile) < 0) {
182
VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)",
183
tmpfile, pidfile, strerror(errno));
184
}
185
} else {
186
do {
187
error = link(tmpfile, pidfile) == -1 ? errno : 0;
188
if (error == EEXIST) {
189
check_already_running();
191
} while (error == EINTR || error == EEXIST);
192
if (error) {
193
VLOG_FATAL("failed to link \"%s\" as \"%s\" (%s)",
194
tmpfile, pidfile, strerror(error));
195
}
196
}
197
198
/* Ensure that the pidfile will get deleted on exit. */
199
fatal_signal_add_file_to_unlink(pidfile);
200
201
/* Delete the temporary pidfile if it still exists. */
202
if (!overwrite_pidfile) {
203
error = fatal_signal_unlink_file_now(tmpfile);
204
if (error) {
205
VLOG_FATAL("%s: unlink failed (%s)", tmpfile, strerror(error));
208
209
/* Clean up.
210
*
211
* We don't close 'file' because its file descriptor must remain open to
212
* hold the lock. */
213
pidfile_dev = s.st_dev;
214
pidfile_ino = s.st_ino;
215
free(tmpfile);
216
free(pidfile);
217
pidfile = NULL;
218
}
219
220
/* If configured with set_pidfile() or set_detach(), creates the pid file and
221
* detaches from the foreground session. */
222
void
223
daemonize(void)
224
{
225
daemonize_start();
226
daemonize_complete();
227
}
228
229
static pid_t
230
fork_and_wait_for_startup(int *fdp)
231
{
232
int fds[2];
233
pid_t pid;
234
236
237
pid = fork();
238
if (pid > 0) {
239
/* Running in parent process. */
241
char c;
242
243
close(fds[1]);
244
fatal_signal_fork();
245
if (read_fully(fds[0], &c, 1, &bytes_read) != 0) {
246
int retval;
247
int status;
248
249
do {
250
retval = waitpid(pid, &status, 0);
251
} while (retval == -1 && errno == EINTR);
252
253
if (retval == pid
254
&& WIFEXITED(status)
255
&& WEXITSTATUS(status)) {
256
/* Child exited with an error. Convey the same error to
257
* our parent process as a courtesy. */
258
exit(WEXITSTATUS(status));
259
}
260
261
VLOG_FATAL("fork child failed to signal startup (%s)",
262
strerror(errno));
263
}
264
close(fds[0]);
265
*fdp = -1;
266
} else if (!pid) {
267
/* Running in child process. */
268
close(fds[0]);
269
time_postfork();
270
lockfile_postfork();
271
*fdp = fds[1];
272
} else {
273
VLOG_FATAL("fork failed (%s)", strerror(errno));
274
}
275
276
return pid;
277
}
278
279
static void
280
fork_notify_startup(int fd)
281
{
282
if (fd != -1) {
283
size_t bytes_written;
284
int error;
285
286
error = write_fully(fd, "", 1, &bytes_written);
287
if (error) {
288
VLOG_FATAL("pipe write failed (%s)", strerror(error));
289
}
290
291
close(fd);
292
}
293
}
294
295
static bool
296
should_restart(int status)
297
{
298
if (WIFSIGNALED(status)) {
299
static const int error_signals[] = {
300
SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV,
301
SIGXCPU, SIGXFSZ
302
};
303
304
size_t i;
305
306
for (i = 0; i < ARRAY_SIZE(error_signals); i++) {
307
if (error_signals[i] == WTERMSIG(status)) {
308
return true;
309
}
310
}
311
}
312
return false;
313
}
314
315
static void
316
monitor_daemon(pid_t daemon_pid)
317
{
318
/* XXX Should log daemon's stderr output at startup time. */
319
const char *saved_program_name;
320
time_t last_restart;
323
324
saved_program_name = program_name;
325
program_name = xasprintf("monitor(%s)", program_name);
326
status_msg = xstrdup("healthy");
327
last_restart = TIME_MIN;
329
for (;;) {
330
int retval;
331
int status;
332
333
proctitle_set("%s: monitoring pid %lu (%s)",
334
saved_program_name, (unsigned long int) daemon_pid,
335
status_msg);
336
337
do {
338
retval = waitpid(daemon_pid, &status, 0);
339
} while (retval == -1 && errno == EINTR);
340
341
if (retval == -1) {
342
VLOG_FATAL("waitpid failed (%s)", strerror(errno));
343
} else if (retval == daemon_pid) {
344
char *s = process_status_msg(status);
345
if (should_restart(status)) {
346
free(status_msg);
347
status_msg = xasprintf("%d crashes: pid %lu died, %s",
348
++crashes,
349
(unsigned long int) daemon_pid, s);
350
free(s);
351
352
if (WCOREDUMP(status)) {
353
/* Disable further core dumps to save disk space. */
354
struct rlimit r;
355
356
r.rlim_cur = 0;
357
r.rlim_max = 0;
358
if (setrlimit(RLIMIT_CORE, &r) == -1) {
359
VLOG_WARN("failed to disable core dumps: %s",
360
strerror(errno));
361
}
362
}
363
364
/* Throttle restarts to no more than once every 10 seconds. */
365
if (time(NULL) < last_restart + 10) {
366
VLOG_WARN("%s, waiting until 10 seconds since last "
367
"restart", status_msg);
368
for (;;) {
369
time_t now = time(NULL);
370
time_t wakeup = last_restart + 10;
371
if (now >= wakeup) {
372
break;
373
}
374
sleep(wakeup - now);
375
}
376
}
377
last_restart = time(NULL);
378
379
VLOG_ERR("%s, restarting", status_msg);
380
daemon_pid = fork_and_wait_for_startup(&daemonize_fd);
381
if (!daemon_pid) {
382
break;
383
}
384
} else {
385
VLOG_INFO("pid %lu died, %s, exiting",
386
(unsigned long int) daemon_pid, s);
387
free(s);
388
exit(0);
389
}
390
}
391
}
393
394
/* Running in new daemon process. */
396
free((char *) program_name);
397
program_name = saved_program_name;
398
}
399
400
/* Close stdin, stdout, stderr. If we're started from e.g. an SSH session,
401
* then this keeps us from holding that session open artificially. */
402
static void
403
close_standard_fds(void)
404
{
405
int null_fd = get_null_fd();
406
if (null_fd >= 0) {
407
dup2(null_fd, STDIN_FILENO);
408
dup2(null_fd, STDOUT_FILENO);
409
dup2(null_fd, STDERR_FILENO);
410
}
411
412
/* Disable logging to stderr to avoid wasting CPU time. */
413
vlog_set_levels(NULL, VLF_CONSOLE, VLL_EMER);
414
}
415
416
/* If daemonization is configured, then starts daemonization, by forking and
417
* returning in the child process. The parent process hangs around until the
418
* child lets it know either that it completed startup successfully (by calling
419
* daemon_complete()) or that it failed to start up (by exiting with a nonzero
420
* exit code). */
421
void
422
daemonize_start(void)
424
daemonize_fd = -1;
426
if (detach) {
427
if (fork_and_wait_for_startup(&daemonize_fd) > 0) {
428
/* Running in parent process. */
431
/* Running in daemon or monitor process. */
432
}
433
434
if (monitor) {
435
int saved_daemonize_fd = daemonize_fd;
436
pid_t daemon_pid;
437
438
daemon_pid = fork_and_wait_for_startup(&daemonize_fd);
439
if (daemon_pid > 0) {
440
/* Running in monitor process. */
441
fork_notify_startup(saved_daemonize_fd);
442
close_standard_fds();
443
monitor_daemon(daemon_pid);
444
}
445
/* Running in daemon process. */
447
448
if (pidfile) {
449
make_pidfile();
450
}
451
452
/* Make sure that the unixctl commands for vlog get registered in a
453
* daemon, even before the first log message. */
454
vlog_init();
457
/* If daemonization is configured, then this function notifies the parent
458
* process that the child process has completed startup successfully.
459
*
460
* Calling this function more than once has no additional effect. */
461
void
462
daemonize_complete(void)
463
{
464
fork_notify_startup(daemonize_fd);
467
if (detach) {
468
setsid();
469
if (chdir_) {
470
ignore(chdir("/"));
471
}
472
close_standard_fds();
477
void
478
daemon_usage(void)
479
{
480
printf(
481
"\nDaemon options:\n"
482
" --detach run in background as daemon\n"
483
" --no-chdir do not chdir to '/'\n"
484
" --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n"
485
" --overwrite-pidfile with --pidfile, start even if already "
486
"running\n",
487
ovs_rundir(), program_name);
490
static int
491
lock_pidfile__(FILE *file, int command, struct flock *lck)
492
{
493
int error;
494
495
lck->l_type = F_WRLCK;
496
lck->l_whence = SEEK_SET;
497
lck->l_start = 0;
498
lck->l_len = 0;
499
lck->l_pid = 0;
500
501
do {
502
error = fcntl(fileno(file), command, lck) == -1 ? errno : 0;
503
} while (error == EINTR);
504
return error;
505
}
506
507
static int
508
lock_pidfile(FILE *file, int command)
509
{
510
struct flock lck;
511
512
return lock_pidfile__(file, command, &lck);
513
}
514
516
read_pidfile__(const char *pidfile, bool delete_if_stale)
518
struct stat s, s2;
520
char line[128];
524
if ((pidfile_ino || pidfile_dev)
525
&& !stat(pidfile, &s)
526
&& s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) {
527
/* It's our own pidfile. We can't afford to open it, because closing
528
* *any* fd for a file that a process has locked also releases all the
529
* locks on that file.
530
*
531
* Fortunately, we know the associated pid anyhow: */
532
return getpid();
533
}
534
535
file = fopen(pidfile, "r+");
537
if (errno == ENOENT && delete_if_stale) {
540
error = errno;
541
VLOG_WARN("%s: open: %s", pidfile, strerror(error));
542
goto error;
543
}
544
545
error = lock_pidfile__(file, F_GETLK, &lck);
546
if (error) {
547
VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error));
548
goto error;
549
}
550
if (lck.l_type == F_UNLCK) {
551
/* pidfile exists but it isn't locked by anyone. We need to delete it
552
* so that a new pidfile can go in its place. But just calling
553
* unlink(pidfile) makes a nasty race: what if someone else unlinks it
554
* before we do and then replaces it by a valid pidfile? We'd unlink
555
* their valid pidfile. We do a little dance to avoid the race, by
556
* locking the invalid pidfile. Only one process can have the invalid
557
* pidfile locked, and only that process has the right to unlink it. */
558
if (!delete_if_stale) {
559
error = ESRCH;
560
VLOG_DBG("%s: pid file is stale", pidfile);
561
goto error;
562
}
563
564
/* Get the lock. */
565
error = lock_pidfile(file, F_SETLK);
566
if (error) {
567
/* We lost a race with someone else doing the same thing. */
568
VLOG_WARN("%s: lost race to lock pidfile", pidfile);
569
goto error;
570
}
571
572
/* Is the file we have locked still named 'pidfile'? */
573
if (stat(pidfile, &s) || fstat(fileno(file), &s2)
574
|| s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) {
575
/* No. We lost a race with someone else who got the lock before
576
* us, deleted the pidfile, and closed it (releasing the lock). */
577
error = EALREADY;
578
VLOG_WARN("%s: lost race to delete pidfile", pidfile);
579
goto error;
580
}
581
582
/* We won the right to delete the stale pidfile. */
583
if (unlink(pidfile)) {
584
error = errno;
585
VLOG_WARN("%s: failed to delete stale pidfile (%s)",
586
pidfile, strerror(error));
587
goto error;
588
}
589
VLOG_DBG("%s: deleted stale pidfile", pidfile);
590
fclose(file);
591
return 0;
592
}
593
594
if (!fgets(line, sizeof line, file)) {
595
if (ferror(file)) {
596
error = errno;
597
VLOG_WARN("%s: read: %s", pidfile, strerror(error));
598
} else {
599
error = ESRCH;
600
VLOG_WARN("%s: read: unexpected end of file", pidfile);
601
}
602
goto error;
603
}
604
605
if (lck.l_pid != strtoul(line, NULL, 10)) {
606
/* The process that has the pidfile locked is not the process that
607
* created it. It must be stale, with the process that has it locked
608
* preparing to delete it. */
610
VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld",
611
pidfile, line, (long int) lck.l_pid);
612
goto error;
613
}
614
615
fclose(file);
616
return lck.l_pid;
617
618
error:
619
if (file) {
620
fclose(file);
621
}
622
return -error;
623
}
624
625
/* Opens and reads a PID from 'pidfile'. Returns the positive PID if
626
* successful, otherwise a negative errno value. */
627
pid_t
628
read_pidfile(const char *pidfile)
629
{
630
return read_pidfile__(pidfile, false);
633
/* Checks whether a process with the given 'pidfile' is already running and,
634
* if so, aborts. If 'pidfile' is stale, deletes it. */
635
static void
636
check_already_running(void)
638
long int pid = read_pidfile__(pidfile, true);
639
if (pid > 0) {
640
VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid);
641
} else if (pid < 0) {
642
VLOG_FATAL("%s: pidfile check failed (%s), aborting",
643
pidfile, strerror(-pid));
644
}