Skip to content

Commit

Permalink
Add linux events
Browse files Browse the repository at this point in the history
This topic branch leverages the Solaris style FMA call points
in ZFS to create a user space visible event notification system
under Linux.  This new system is called zevent and it unifies
all previous Solaris style ereports and sysevent notifications.

Under this Linux specific scheme when a sysevent or ereport event
occurs an nvlist describing the event is created which looks almost
exactly like a Solaris ereport.  These events are queued up in the
kernel when they occur and conditionally logged to the console.
It is then up to a user space application to consume the events
and do whatever it likes with them.

To make this possible the existing /dev/zfs ABI has been extended
with two new ioctls which behave as follows.

* ZFS_IOC_EVENTS_NEXT
Get the next pending event.  The kernel will keep track of the last
event consumed by the file descriptor and provide the next one if
available.  If no new events are available the ioctl() will block
waiting for the next event.  This ioctl may also be called in a
non-blocking mode by setting zc.zc_guid = ZEVENT_NONBLOCK.  In the
non-blocking case if no events are available ENOENT will be returned.
It is possible that ESHUTDOWN will be returned if the ioctl() is
called while module unloading is in progress.  And finally ENOMEM
may occur if the provided nvlist buffer is not large enough to
contain the entire event.

* ZFS_IOC_EVENTS_CLEAR
Clear are events queued by the kernel.  The kernel will keep a fairly
large number of recent events queued, use this ioctl to clear the
in kernel list.  This will effect all user space processes consuming
events.

The zpool command has been extended to use this events ABI with the
'events' subcommand.  You may run 'zpool events -v' to output a
verbose log of all recent events.  This is very similar to the
Solaris 'fmdump -ev' command with the key difference being it also
includes what would be considered sysevents under Solaris.  You
may also run in follow mode with the '-f' option.  To clear the
in kernel event queue use the '-c' option.

$ sudo cmd/zpool/zpool events -fv
TIME                        CLASS
May 13 2010 16:31:15.777711000 ereport.fs.zfs.config.sync
        class = "ereport.fs.zfs.config.sync"
        ena = 0x40982b7897700001
        detector = (embedded nvlist)
                version = 0x0
                scheme = "zfs"
                pool = 0xed976600de75dfa6
        (end detector)

        time = 0x4bec8bc3 0x2e5aed98
        pool = "zpios"
        pool_guid = 0xed976600de75dfa6
        pool_context = 0x0

While the 'zpool events' command is handy for interactive debugging
it is not expected to be the primary consumer of zevents.  This ABI
was primarily added to facilitate the addition of a user space
monitoring daemon.  This daemon would consume all events posted by
the kernel and based on the type of event perform an action.  For
most events simply forwarding them on to syslog is likely enough.
But this interface also cleanly allows for more sophisticated
actions to be taken such as generating an email for a failed drive.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information
behlendorf committed Aug 31, 2010
1 parent c9c0d07 commit 2668527
Show file tree
Hide file tree
Showing 18 changed files with 1,105 additions and 492 deletions.
333 changes: 333 additions & 0 deletions cmd/zpool/zpool_main.c
Expand Up @@ -42,6 +42,8 @@
#include <zone.h>
#include <sys/fs/zfs.h>
#include <sys/stat.h>
#include <sys/fm/util.h>
#include <sys/fm/protocol.h>

#include <libzfs.h>

Expand Down Expand Up @@ -77,6 +79,7 @@ static int zpool_do_export(int, char **);
static int zpool_do_upgrade(int, char **);

static int zpool_do_history(int, char **);
static int zpool_do_events(int, char **);

static int zpool_do_get(int, char **);
static int zpool_do_set(int, char **);
Expand Down Expand Up @@ -119,6 +122,7 @@ typedef enum {
HELP_SCRUB,
HELP_STATUS,
HELP_UPGRADE,
HELP_EVENTS,
HELP_GET,
HELP_SET,
HELP_SPLIT
Expand Down Expand Up @@ -167,6 +171,8 @@ static zpool_command_t command_table[] = {
{ "upgrade", zpool_do_upgrade, HELP_UPGRADE },
{ NULL },
{ "history", zpool_do_history, HELP_HISTORY },
{ "events", zpool_do_events, HELP_EVENTS },
{ NULL },
{ "get", zpool_do_get, HELP_GET },
{ "set", zpool_do_set, HELP_SET },
};
Expand Down Expand Up @@ -234,6 +240,8 @@ get_usage(zpool_help_t idx) {
return (gettext("\tupgrade\n"
"\tupgrade -v\n"
"\tupgrade [-V version] <-a | pool ...>\n"));
case HELP_EVENTS:
return (gettext("\tevents [-vfc]\n"));
case HELP_GET:
return (gettext("\tget <\"all\" | property[,...]> "
"<pool> ...\n"));
Expand Down Expand Up @@ -4210,6 +4218,331 @@ zpool_do_history(int argc, char **argv)
return (ret);
}

typedef struct ev_opts {
int verbose;
int follow;
int clear;
} ev_opts_t;

static void
zpool_do_events_short(nvlist_t *nvl)
{
char ctime_str[26], str[32], *ptr;
int64_t *tv;
uint_t n;

verify(nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0);
memset(str, ' ', 32);
(void) ctime_r((const time_t *)&tv[0], ctime_str);
(void) strncpy(str, ctime_str+4, 6); /* 'Jun 30' */
(void) strncpy(str+7, ctime_str+20, 4); /* '1993' */
(void) strncpy(str+12, ctime_str+11, 8); /* '21:49:08' */
(void) sprintf(str+20, ".%09lld", (longlong_t)tv[1]);/* '.123456789' */
(void) printf(gettext("%s "), str);

verify(nvlist_lookup_string(nvl, FM_CLASS, &ptr) == 0);
(void) printf(gettext("%s\n"), ptr);
}

static void
zpool_do_events_nvprint(nvlist_t *nvl, int depth)
{
nvpair_t *nvp;

for (nvp = nvlist_next_nvpair(nvl, NULL);
nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {

data_type_t type = nvpair_type(nvp);
const char *name = nvpair_name(nvp);

boolean_t b;
uint8_t i8;
uint16_t i16;
uint32_t i32;
uint64_t i64;
char *str;
nvlist_t *cnv;

printf(gettext("%*s%s = "), depth, "", name);

switch (type) {
case DATA_TYPE_BOOLEAN:
printf(gettext("%s"), "1");
break;

case DATA_TYPE_BOOLEAN_VALUE:
(void) nvpair_value_boolean_value(nvp, &b);
printf(gettext("%s"), b ? "1" : "0");
break;

case DATA_TYPE_BYTE:
(void) nvpair_value_byte(nvp, &i8);
printf(gettext("0x%x"), i8);
break;

case DATA_TYPE_INT8:
(void) nvpair_value_int8(nvp, (void *)&i8);
printf(gettext("0x%x"), i8);
break;

case DATA_TYPE_UINT8:
(void) nvpair_value_uint8(nvp, &i8);
printf(gettext("0x%x"), i8);
break;

case DATA_TYPE_INT16:
(void) nvpair_value_int16(nvp, (void *)&i16);
printf(gettext("0x%x"), i16);
break;

case DATA_TYPE_UINT16:
(void) nvpair_value_uint16(nvp, &i16);
printf(gettext("0x%x"), i16);
break;

case DATA_TYPE_INT32:
(void) nvpair_value_int32(nvp, (void *)&i32);
printf(gettext("0x%x"), i32);
break;

case DATA_TYPE_UINT32:
(void) nvpair_value_uint32(nvp, &i32);
printf(gettext("0x%x"), i32);
break;

case DATA_TYPE_INT64:
(void) nvpair_value_int64(nvp, (void *)&i64);
printf(gettext("0x%llx"), (u_longlong_t)i64);
break;

case DATA_TYPE_UINT64:
(void) nvpair_value_uint64(nvp, &i64);
printf(gettext("0x%llx"), (u_longlong_t)i64);
break;

case DATA_TYPE_HRTIME:
(void) nvpair_value_hrtime(nvp, (void *)&i64);
printf(gettext("0x%llx"), (u_longlong_t)i64);
break;

case DATA_TYPE_STRING:
(void) nvpair_value_string(nvp, &str);
printf(gettext("\"%s\""), str ? str : "<NULL>");
break;

case DATA_TYPE_NVLIST:
printf(gettext("(embedded nvlist)\n"));
(void) nvpair_value_nvlist(nvp, &cnv);
zpool_do_events_nvprint(cnv, depth + 8);
printf(gettext("%*s(end %s)\n"), depth, "", name);
break;

case DATA_TYPE_NVLIST_ARRAY: {
nvlist_t **val;
uint_t i, nelem;

(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
printf(gettext("(%d embedded nvlists)\n"), nelem);
for (i = 0; i < nelem; i++) {
printf(gettext("%*s%s[%d] = %s\n"),
depth, "", name, i, "(embedded nvlist)");
zpool_do_events_nvprint(val[i], depth + 8);
printf(gettext("%*s(end %s[%i])\n"),
depth, "", name, i);
}
printf(gettext("%*s(end %s)\n"), depth, "", name);
}
break;

case DATA_TYPE_INT8_ARRAY: {
int8_t *val;
uint_t i, nelem;

(void) nvpair_value_int8_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);

break;
}

case DATA_TYPE_UINT8_ARRAY: {
uint8_t *val;
uint_t i, nelem;

(void) nvpair_value_uint8_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);

break;
}

case DATA_TYPE_INT16_ARRAY: {
int16_t *val;
uint_t i, nelem;

(void) nvpair_value_int16_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);

break;
}

case DATA_TYPE_UINT16_ARRAY: {
uint16_t *val;
uint_t i, nelem;

(void) nvpair_value_uint16_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);

break;
}

case DATA_TYPE_INT32_ARRAY: {
int32_t *val;
uint_t i, nelem;

(void) nvpair_value_int32_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);

break;
}

case DATA_TYPE_UINT32_ARRAY: {
uint32_t *val;
uint_t i, nelem;

(void) nvpair_value_uint32_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);

break;
}

case DATA_TYPE_INT64_ARRAY: {
int64_t *val;
uint_t i, nelem;

(void) nvpair_value_int64_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%llx "), (u_longlong_t)val[i]);

break;
}

case DATA_TYPE_UINT64_ARRAY: {
uint64_t *val;
uint_t i, nelem;

(void) nvpair_value_uint64_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%llx "), (u_longlong_t)val[i]);

break;
}

case DATA_TYPE_STRING_ARRAY:
case DATA_TYPE_BOOLEAN_ARRAY:
case DATA_TYPE_BYTE_ARRAY:
case DATA_TYPE_DOUBLE:
case DATA_TYPE_UNKNOWN:
printf(gettext("<unknown>"));
break;
}

printf(gettext("\n"));
}
}

static int
zpool_do_events_next(ev_opts_t *opts)
{
nvlist_t *nvl;
int cleanup_fd, ret, dropped;

cleanup_fd = open(ZFS_DEV, O_RDWR);
VERIFY(cleanup_fd >= 0);

(void) printf(gettext("%-30s %s\n"), "TIME", "CLASS");

while (1) {
ret = zpool_events_next(g_zfs, &nvl, &dropped,
!!opts->follow, cleanup_fd);
if (ret || nvl == NULL)
break;

if (dropped > 0)
(void) printf(gettext("dropped %d events\n"), dropped);

zpool_do_events_short(nvl);

if (opts->verbose) {
zpool_do_events_nvprint(nvl, 8);
printf(gettext("\n"));
}

nvlist_free(nvl);
}

VERIFY(0 == close(cleanup_fd));

return (ret);
}

static int
zpool_do_events_clear(ev_opts_t *opts)
{
int count, ret;

ret = zpool_events_clear(g_zfs, &count);
if (!ret)
(void) printf(gettext("cleared %d events\n"), count);

return (ret);
}

/*
* zpool events [-vfc]
*
* Displays events logs by ZFS.
*/
int
zpool_do_events(int argc, char **argv)
{
ev_opts_t opts = { 0 };
int ret;
int c;

/* check options */
while ((c = getopt(argc, argv, "vfc")) != -1) {
switch (c) {
case 'v':
opts.verbose = 1;
break;
case 'f':
opts.follow = 1;
break;
case 'c':
opts.clear = 1;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;

if (opts.clear)
ret = zpool_do_events_clear(&opts);
else
ret = zpool_do_events_next(&opts);

return ret;
}

static int
get_callback(zpool_handle_t *zhp, void *data)
{
Expand Down
2 changes: 2 additions & 0 deletions lib/libzfs/include/libzfs.h
Expand Up @@ -368,6 +368,8 @@ extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
extern void zpool_set_history_str(const char *subcommand, int argc,
char **argv, char *history_str);
extern int zpool_stage_history(libzfs_handle_t *, const char *);
extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int);
extern int zpool_events_clear(libzfs_handle_t *, int *);
extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
size_t len);
extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
Expand Down

0 comments on commit 2668527

Please sign in to comment.