Skip to content

Commit

Permalink
Merge pull request #154 from virtualopensystems/vhost-dev
Browse files Browse the repository at this point in the history
vhost-user with zero copy
  • Loading branch information
lukego committed Apr 30, 2014
2 parents 8d4b6bd + 8694d63 commit c244ac4
Show file tree
Hide file tree
Showing 10 changed files with 721 additions and 469 deletions.
24 changes: 21 additions & 3 deletions src/apps/basic/basic_apps.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module(...,package.seeall)

local app = require("core.app")
local buffer = require("core.buffer")
local freelist = require("core.freelist")
local packet = require("core.packet")
local link = require("core.link")

Expand All @@ -24,15 +25,32 @@ end

Source = setmetatable({zone = "Source"}, {__index = Basic})

function Source:new()
return setmetatable({}, {__index=Source})
function Source:new(size)
return setmetatable({size=tonumber(size) or 60}, {__index=Source})
end

-- Allocate receive buffers from the given freelist.
function Source:set_rx_buffer_freelist (fl)
assert(fl)
self.rx_buffer_freelist = fl
end

function Source:pull ()
local fl = self.rx_buffer_freelist
for _, o in ipairs(self.outputi) do
for i = 1, link.nwritable(o) do
local b = nil
if fl then
if freelist.nfree(fl) > 0 then
b = freelist.remove(fl)
else
return
end
else
b = buffer.allocate()
end
local p = packet.allocate()
packet.add_iovec(p, buffer.allocate(), 60)
packet.add_iovec(p, b, self.size)
link.transmit(o, p)
end
end
Expand Down
217 changes: 115 additions & 102 deletions src/apps/vhost/vhost_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,129 +12,142 @@
#include "vhost.h"
#include "vhost_user.h"

#define VHOST_USER_MSG_SIZE 12

int vhost_user_connect(const char *path) {
int sock;
struct sockaddr_un un;

if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
perror("socket");
return -1;
}

un.sun_family = AF_UNIX;
strncpy(un.sun_path, path, sizeof(un.sun_path));

if (connect(sock, (struct sockaddr *) &un, sizeof(un)) == -1) {
close(sock);
return -1;
}

return sock;
#define MEMB_SIZE(t,m) (sizeof(((t*)0)->m))
#define VHOST_USER_HDR_SIZE (MEMB_SIZE(struct vhost_user_msg,request) \
+ MEMB_SIZE(struct vhost_user_msg,flags) \
+ MEMB_SIZE(struct vhost_user_msg,size))

int vhost_user_connect(const char *path)
{
int sock;
struct sockaddr_un un;

if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
perror("socket");
return -1;
}

un.sun_family = AF_UNIX;
strncpy(un.sun_path, path, sizeof(un.sun_path));

if (connect(sock, (struct sockaddr *) &un, sizeof(un)) == -1) {
close(sock);
return -1;
}

return sock;
}

int vhost_user_accept(int sock) {
int newsock;
if ((newsock = accept(sock, NULL, NULL)) == -1) {
assert(errno == EAGAIN);
} else {
assert(fcntl(newsock, F_SETFL, O_NONBLOCK) == 0);
}
return newsock;
int vhost_user_accept(int sock)
{
int newsock;
if ((newsock = accept(sock, NULL, NULL)) == -1) {
assert(errno == EAGAIN);
} else {
assert(fcntl(newsock, F_SETFL, O_NONBLOCK) == 0);
}
return newsock;
}

int vhost_user_send(int sock, struct vhost_user_msg *msg) {
int ret;
int vhost_user_send(int sock, struct vhost_user_msg *msg)
{
int ret;

struct msghdr msgh;
struct iovec iov[1];
struct msghdr msgh;
struct iovec iov[1];

memset(&msgh, 0, sizeof(msgh));
memset(&msgh, 0, sizeof(msgh));

iov[0].iov_base = (void *)msg;
iov[0].iov_len = VHOST_USER_MSG_SIZE + msg->size;
iov[0].iov_base = (void *) msg;
iov[0].iov_len = VHOST_USER_HDR_SIZE + msg->size;

msgh.msg_iov = iov;
msgh.msg_iovlen = 1;
msgh.msg_iov = iov;
msgh.msg_iovlen = 1;

msgh.msg_control = 0;
msgh.msg_controllen = 0;
msgh.msg_control = 0;
msgh.msg_controllen = 0;

printf("vhost_user_send %d %d %d %d\n", msg->request, msg->flags, msg->size, (int)iov[0].iov_len);
printf("vhost_user_send %d %d %d %d\n", msg->request, msg->flags, msg->size,
(int) iov[0].iov_len);

do {
ret = sendmsg(sock, &msgh, 0);
} while (ret < 0 && errno == EINTR);
do {
ret = sendmsg(sock, &msgh, 0);
} while (ret < 0 && errno == EINTR);

if (ret < 0) {
perror("sendmsg");
}
if (ret < 0) {
perror("sendmsg");
}

return ret;
return ret;
}

int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds, int *nfds) {
struct msghdr msgh;
struct iovec iov[1];
int ret;

int fd_size = sizeof(int) * VHOST_USER_MEMORY_MAX_NREGIONS;
char control[CMSG_SPACE(fd_size)];
struct cmsghdr *cmsg;

memset(&msgh, 0, sizeof(msgh));
memset(control, 0, sizeof(control));
*nfds = 0;

iov[0].iov_base = (void *) msg;
iov[0].iov_len = VHOST_USER_MSG_SIZE;

msgh.msg_iov = iov;
msgh.msg_iovlen = 1;
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);

do {
ret = recvmsg(sock, &msgh, MSG_DONTWAIT | MSG_WAITALL);
} while (ret < 0 && errno == EINTR);
if (ret == VHOST_USER_MSG_SIZE) {
if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
ret = -1;
} else {
// Copy file descriptors
cmsg = CMSG_FIRSTHDR(&msgh);
if (cmsg && cmsg->cmsg_len > 0&&
cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_RIGHTS) {
if (fd_size >= cmsg->cmsg_len - CMSG_LEN(0)) {
fd_size = cmsg->cmsg_len - CMSG_LEN(0);
memcpy(fds, CMSG_DATA(cmsg), fd_size);
*nfds = fd_size / sizeof(int);
int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds,
int *nfds)
{
struct msghdr msgh;
struct iovec iov[1];
int ret;

int fd_size = sizeof(int) * VHOST_USER_MEMORY_MAX_NREGIONS;
char control[CMSG_SPACE(fd_size)];
struct cmsghdr *cmsg;

memset(&msgh, 0, sizeof(msgh));
memset(control, 0, sizeof(control));
*nfds = 0;

iov[0].iov_base = (void *) msg;
iov[0].iov_len = VHOST_USER_HDR_SIZE;

msgh.msg_iov = iov;
msgh.msg_iovlen = 1;
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);

do {
ret = recvmsg(sock, &msgh, MSG_DONTWAIT | MSG_WAITALL);
} while (ret < 0 && errno == EINTR);
if (ret == VHOST_USER_HDR_SIZE) {
if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
ret = -1;
} else {
// Copy file descriptors
cmsg = CMSG_FIRSTHDR(&msgh);
if (cmsg && cmsg->cmsg_len > 0&&
cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_RIGHTS) {
if (fd_size >= cmsg->cmsg_len - CMSG_LEN(0)) {
fd_size = cmsg->cmsg_len - CMSG_LEN(0);
memcpy(fds, CMSG_DATA(cmsg), fd_size);
*nfds = fd_size / sizeof(int);
}
}
if (msg->size > 0) {
do {
ret = read(sock, ((char*) msg) + VHOST_USER_HDR_SIZE,
msg->size);
} while (ret < 0 && errno == EINTR);
}
}
}
if (msg->size > 0) {
do {
ret = read(sock, ((char*)msg)+VHOST_USER_MSG_SIZE, msg->size);
} while (ret < 0 && errno == EINTR);
}
}
}
if (ret < 0 && errno != EAGAIN) {
perror("recvmsg");
}
return ret;
if (ret < 0 && errno != EAGAIN) {
perror("recvmsg");
}
return ret;
}

void* vhost_user_map_guest_memory(int fd, int size) {
void *ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
return ptr == MAP_FAILED ? 0 : ptr;
void* vhost_user_map_guest_memory(int fd, int size)
{
void *ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
return ptr == MAP_FAILED ? 0 : ptr;
}

int vhost_user_unmap_guest_memory(void *ptr, int size) {
return munmap(ptr, size);
int vhost_user_unmap_guest_memory(void *ptr, int size)
{
return munmap(ptr, size);
}

int vhost_user_sync_shm(void *ptr, size_t size) {
return msync(ptr, size, MS_SYNC | MS_INVALIDATE);
int vhost_user_sync_shm(void *ptr, size_t size)
{
return msync(ptr, size, MS_SYNC | MS_INVALIDATE);
}
51 changes: 29 additions & 22 deletions src/apps/vhost/vhost_user.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
enum { VHOST_USER_MEMORY_MAX_NREGIONS = 8 };
enum {
VHOST_USER_MEMORY_MAX_NREGIONS = 8
};

// vhost_user request types
enum {
Expand All @@ -17,40 +19,45 @@ enum {
VHOST_USER_SET_VRING_KICK = 12,
VHOST_USER_SET_VRING_CALL = 13,
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_NET_SET_BACKEND = 15,
VHOST_USER_ECHO = 16,
VHOST_USER_MAX
};

struct vhost_user_memory_region {
uint64_t guest_phys_addr;
uint64_t memory_size;
uint64_t userspace_addr;
uint64_t guest_phys_addr;
uint64_t memory_size;
uint64_t userspace_addr;
};

struct vhost_user_memory {
uint32_t nregions;
uint32_t padding;
struct vhost_user_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS];
uint32_t nregions;
uint32_t padding;
struct vhost_user_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS];
};

struct vhost_user_msg {
int request;
uint32_t flags;
uint32_t size;
union {
uint64_t u64;
// defined in vhost.h
struct vhost_vring_state state;
struct vhost_vring_addr addr;
struct vhost_user_memory memory;
};
} __attribute__((packed));
enum {
VHOST_USER_VERSION_MASK = (0x3),
VHOST_USER_REPLY_MASK = (0x1 << 2),
VHOST_USER_VRING_IDX_MASK = (0xff),
VHOST_USER_VRING_NOFD_MASK = (0x1 << 8)
};

struct vhost_user_msg {
int request;
uint32_t flags;
uint32_t size;
union {
uint64_t u64;
// defined in vhost.h
struct vhost_vring_state state;
struct vhost_vring_addr addr;
struct vhost_user_memory memory;
};
}__attribute__((packed));

int vhost_user_connect(const char *path);
int vhost_user_send(int sock, struct vhost_user_msg *msg);
int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds, int *nfds);
int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds,
int *nfds);
void* vhost_user_map_guest_memory(int fd, int size);
int vhost_user_unmap_guest_memory(void *ptr, int size);
int vhost_user_sync_shm(void *ptr, size_t size);

0 comments on commit c244ac4

Please sign in to comment.