Skip to content

Commit

Permalink
server: Minimal implementation of NBD Structured Replies.
Browse files Browse the repository at this point in the history
This is about the simplest implementation of NBD Structured Replies
(SR) that we can do right now.

It accepts NBD_OPT_STRUCTURED_REPLIES when negotiated by the client,
but only sends back the simplest possible SR when required to by
NBD_CMD_READ.  The rest of the time it will send back simple replies
as before.  We do not modify the plugin API so plugins are unable to
send complex SRs.

Also we do not handle human-readable error messages yet because that
would require changes in how we handle nbdkit_error().

Also we do not understand NBD_CMD_FLAG_DF, but that seems to be OK
because (a) we don't advertize the feature and (b) we only send back a
single chunk anyway.
  • Loading branch information
rwmjones committed Mar 8, 2019
1 parent b165ff3 commit eaa4c6e
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 48 deletions.
6 changes: 5 additions & 1 deletion docs/nbdkit-protocol.pod
Expand Up @@ -122,7 +122,11 @@ Supported in nbdkit E<ge> 1.9.9.

=item Structured Replies

I<Not supported>.
Supported in nbdkit E<ge> 1.11.8.

However we don’t expose the capability to send structured replies to
plugins yet, nor do we send human-readable error messages using this
facility.

=item Block Status

Expand Down
4 changes: 2 additions & 2 deletions plugins/nbd/nbd.c
Expand Up @@ -345,15 +345,15 @@ nbd_request (struct handle *h, uint16_t flags, uint16_t type, uint64_t offset,
static int
nbd_reply_raw (struct handle *h, int *fd)
{
struct reply rep;
struct simple_reply rep;
struct transaction *trans;
void *buf;
uint32_t count;

*fd = -1;
if (read_full (h->fd, &rep, sizeof rep) < 0)
return nbd_mark_dead (h);
if (be32toh (rep.magic) != NBD_REPLY_MAGIC)
if (be32toh (rep.magic) != NBD_SIMPLE_REPLY_MAGIC)
return nbd_mark_dead (h);
nbdkit_debug ("received reply for cookie %#" PRIx64 ", status %s",
rep.handle, name_of_nbd_error(be32toh (rep.error)));
Expand Down
189 changes: 157 additions & 32 deletions server/connections.c
Expand Up @@ -86,6 +86,7 @@ struct connection {
bool can_fua;
bool can_multi_conn;
bool using_tls;
bool structured_replies;

int sockin, sockout;
connection_recv_function recv;
Expand Down Expand Up @@ -905,6 +906,26 @@ _negotiate_handshake_newstyle_options (struct connection *conn)

break;

case NBD_OPT_STRUCTURED_REPLY:
if (optlen != 0) {
if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
== -1)
return -1;
if (conn_recv_full (conn, data, optlen,
"read: %s: %m", name_of_nbd_opt (option)) == -1)
return -1;
continue;
}

debug ("newstyle negotiation: %s: client requested structured replies",
name_of_nbd_opt (option));

if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
return -1;

conn->structured_replies = true;
break;

default:
/* Unknown option. */
if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_UNSUP) == -1)
Expand Down Expand Up @@ -1224,12 +1245,123 @@ nbd_errno (int error)
}
}

static int
send_simple_reply (struct connection *conn,
uint64_t handle, uint16_t cmd,
const char *buf, uint32_t count,
uint32_t error)
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
struct simple_reply reply;
int r;

reply.magic = htobe32 (NBD_SIMPLE_REPLY_MAGIC);
reply.handle = handle;
reply.error = htobe32 (nbd_errno (error));

r = conn->send (conn, &reply, sizeof reply);
if (r == -1) {
nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}

/* Send the read data buffer. */
if (cmd == NBD_CMD_READ && !error) {
r = conn->send (conn, buf, count);
if (r == -1) {
nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}
}

return 1; /* command processed ok */
}

static int
send_structured_reply_read (struct connection *conn,
uint64_t handle, uint16_t cmd,
const char *buf, uint32_t count, uint64_t offset)
{
/* Once we are really using structured replies and sending data back
* in chunks, we'll be able to grab the write lock for each chunk,
* allowing other threads to interleave replies. As we're not doing
* that yet we acquire the lock for the whole function.
*/
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
struct structured_reply reply;
struct structured_reply_offset_data offset_data;
int r;

assert (cmd == NBD_CMD_READ);

reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
reply.handle = handle;
reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
reply.type = htobe16 (NBD_REPLY_TYPE_OFFSET_DATA);
reply.length = htobe32 (count + sizeof offset_data);

r = conn->send (conn, &reply, sizeof reply);
if (r == -1) {
nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}

/* Send the offset + read data buffer. */
offset_data.offset = htobe64 (offset);
r = conn->send (conn, &offset_data, sizeof offset_data);
if (r == -1) {
nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}

r = conn->send (conn, buf, count);
if (r == -1) {
nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}

return 1; /* command processed ok */
}

static int
send_structured_reply_error (struct connection *conn,
uint64_t handle, uint16_t cmd, uint32_t error)
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
struct structured_reply reply;
struct structured_reply_error error_data;
int r;

reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
reply.handle = handle;
reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
reply.type = htobe16 (NBD_REPLY_TYPE_ERROR);
reply.length = htobe32 (0 /* no human readable error */ + sizeof error_data);

r = conn->send (conn, &reply, sizeof reply);
if (r == -1) {
nbdkit_error ("write error reply: %m");
return set_status (conn, -1);
}

/* Send the error. */
error_data.error = htobe32 (error);
error_data.len = htobe16 (0);
r = conn->send (conn, &error_data, sizeof error_data);
if (r == -1) {
nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}
/* No human readable error message at the moment. */

return 1; /* command processed ok */
}

static int
recv_request_send_reply (struct connection *conn)
{
int r;
struct request request;
struct reply reply;
uint16_t cmd, flags;
uint32_t magic, count, error = 0;
uint64_t offset;
Expand Down Expand Up @@ -1317,40 +1449,33 @@ recv_request_send_reply (struct connection *conn)

/* Send the reply packet. */
send_reply:
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
if (get_status (conn) < 0)
return -1;
reply.magic = htobe32 (NBD_REPLY_MAGIC);
reply.handle = request.handle;
reply.error = htobe32 (nbd_errno (error));

if (error != 0) {
/* Since we're about to send only the limited NBD_E* errno to the
* client, don't lose the information about what really happened
* on the server side. Make sure there is a way for the operator
* to retrieve the real error.
*/
debug ("sending error reply: %s", strerror (error));
}

r = conn->send (conn, &reply, sizeof reply);
if (r == -1) {
nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}
if (get_status (conn) < 0)
return -1;

/* Send the read data buffer. */
if (cmd == NBD_CMD_READ && !error) {
r = conn->send (conn, buf, count);
if (r == -1) {
nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
return set_status (conn, -1);
}
}
if (error != 0) {
/* Since we're about to send only the limited NBD_E* errno to the
* client, don't lose the information about what really happened
* on the server side. Make sure there is a way for the operator
* to retrieve the real error.
*/
debug ("sending error reply: %s", strerror (error));
}

return 1; /* command processed ok */
/* Currently we prefer to send simple replies for everything except
* where we have to (ie. NBD_CMD_READ when structured_replies have
* been negotiated). However this prevents us from sending
* human-readable error messages to the client, so we should
* reconsider this in future.
*/
if (conn->structured_replies && cmd == NBD_CMD_READ) {
if (!error)
return send_structured_reply_read (conn, request.handle, cmd,
buf, count, offset);
else
return send_structured_reply_error (conn, request.handle, cmd, error);
}
else
return send_simple_reply (conn, request.handle, cmd, buf, count, error);
}

/* Write buffer to conn->sockout and either succeed completely
Expand Down
59 changes: 47 additions & 12 deletions server/protocol.h
@@ -1,5 +1,5 @@
/* nbdkit
* Copyright (C) 2013-2018 Red Hat Inc.
* Copyright (C) 2013-2019 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -98,12 +98,13 @@ extern const char *name_of_nbd_flag (int);

/* NBD options (new style handshake only). */
extern const char *name_of_nbd_opt (int);
#define NBD_OPT_EXPORT_NAME 1
#define NBD_OPT_ABORT 2
#define NBD_OPT_LIST 3
#define NBD_OPT_STARTTLS 5
#define NBD_OPT_INFO 6
#define NBD_OPT_GO 7
#define NBD_OPT_EXPORT_NAME 1
#define NBD_OPT_ABORT 2
#define NBD_OPT_LIST 3
#define NBD_OPT_STARTTLS 5
#define NBD_OPT_INFO 6
#define NBD_OPT_GO 7
#define NBD_OPT_STRUCTURED_REPLY 8

extern const char *name_of_nbd_rep (int);
#define NBD_REP_ACK 1
Expand Down Expand Up @@ -144,15 +145,49 @@ struct request {
uint32_t count; /* Request length. */
} __attribute__((packed));

/* Reply (server -> client). */
struct reply {
uint32_t magic; /* NBD_REPLY_MAGIC. */
/* Simple reply (server -> client). */
struct simple_reply {
uint32_t magic; /* NBD_SIMPLE_REPLY_MAGIC. */
uint32_t error; /* NBD_SUCCESS or one of NBD_E*. */
uint64_t handle; /* Opaque handle. */
} __attribute__((packed));

#define NBD_REQUEST_MAGIC 0x25609513
#define NBD_REPLY_MAGIC 0x67446698
/* Structured reply (server -> client). */
struct structured_reply {
uint32_t magic; /* NBD_STRUCTURED_REPLY_MAGIC. */
uint16_t flags; /* NBD_REPLY_FLAG_* */
uint16_t type; /* NBD_REPLY_TYPE_* */
uint64_t handle; /* Opaque handle. */
uint32_t length; /* Length of payload which follows. */
} __attribute__((packed));

struct structured_reply_offset_data {
uint64_t offset; /* offset */
/* Followed by data. */
} __attribute__((packed));

struct structured_reply_error {
uint32_t error; /* NBD_E* error number */
uint16_t len; /* Length of human readable error. */
/* Followed by human readable error string. */
} __attribute__((packed));

#define NBD_REQUEST_MAGIC 0x25609513
#define NBD_SIMPLE_REPLY_MAGIC 0x67446698
#define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef

/* Structured reply flags. */
extern const char *name_of_nbd_reply_flag (int);
#define NBD_REPLY_FLAG_DONE (1<<0)

/* Structured reply types. */
extern const char *name_of_nbd_reply_type (int);
#define NBD_REPLY_TYPE_NONE 0
#define NBD_REPLY_TYPE_OFFSET_DATA 1
#define NBD_REPLY_TYPE_OFFSET_HOLE 2
#define NBD_REPLY_TYPE_BLOCK_STATUS 3
#define NBD_REPLY_TYPE_ERROR ((1<<15) + 1)
#define NBD_REPLY_TYPE_ERROR_OFFSET ((1<<15) + 2)

/* NBD commands. */
extern const char *name_of_nbd_cmd (int);
Expand Down
2 changes: 1 addition & 1 deletion tests/test-layers.c
Expand Up @@ -92,7 +92,7 @@ main (int argc, char *argv[])
struct new_handshake_finish handshake_finish;
uint16_t eflags;
struct request request;
struct reply reply;
struct simple_reply reply;
char data[512];

#ifndef HAVE_EXIT_WITH_PARENT
Expand Down

0 comments on commit eaa4c6e

Please sign in to comment.