Skip to content

Commit

Permalink
virtio.lua: Oops forgotten file :)
Browse files Browse the repository at this point in the history
  • Loading branch information
lukego committed Feb 11, 2013
1 parent df7c5f4 commit 18aebc3
Showing 1 changed file with 201 additions and 0 deletions.
201 changes: 201 additions & 0 deletions src/virtio.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
-- virtio.lua -- Linux 'vhost' interface for ethernet I/O towards the kernel.
-- Copyright 2013 Luke Gorrie
-- Apache License 2.0: http://www.apache.org/licenses/LICENSE-2.0

module(...,package.seeall)

local ffi = require("ffi")
local C = ffi.C

require("virtio_h")

function new (tapinterface)

local M = {}

local vio = ffi.new("struct vio")
local rx_freelist, tx_freelist
local rxring, txring = vio.vring[0], vio.vring[1]

local txpackets, rxpackets = 0, 0

function init ()
-- XXX do this better!
os.execute("modprobe tun")
os.execute("modprobe vhost_net")
local tapfd = C.open_tap(tapinterface);
assert(C.vhost_open(vio, tapfd, memory_regions()) == 0)
-- Initialize freelists
rx_freelist, tx_freelist = {}, {}
for i = 0, C.VIO_VRING_SIZE-1 do
rx_freelist[i+1] = i
tx_freelist[i+1] = i
end
end M.init = init

function print_vio (vio)
print("avail[0].idx:" .. tostring(vio.vring[0].avail.idx))
print(" used[0].idx:" .. tostring(vio.vring[0].used.idx))
print(" used[0].pktlen: " .. tostring(vio.vring[0].used.ring[0].len))
print("avail[1].idx:" .. tostring(vio.vring[1].avail.idx))
print(" used[1].idx:" .. tostring(vio.vring[1].used.idx))
end

local next_tx_avail = 0 -- Next available position in the tx avail ring

function transmit (address, size)
local descindex = init_transmit_descriptor(address, size)
assert(descindex < C.VIO_VRING_SIZE)
txring.avail.ring[next_tx_avail % C.VIO_VRING_SIZE] = descindex
next_tx_avail = (next_tx_avail + 1) % 65536
end M.transmit = transmit

function init_transmit_descriptor (address, size)
local index = get_transmit_buffer()
assert(index <= C.VIO_VRING_SIZE)
local d = txring.desc[index]
d.addr, d.len, d.flags, d.next = address, size, 0, 0
return index
end

-- Return the index of an available transmit buffer.
-- Precondition: transmit_ready() tested to return true.
function get_transmit_buffer ()
assert(transmit_ready())
return table.remove(tx_freelist)
end

local txused = 0
function reclaim_transmit_buffers ()
while txused ~= txring.used.idx do
C.full_memory_barrier()
table.insert(tx_freelist, txring.used.ring[txused % C.VIO_VRING_SIZE].id)
assert(#tx_freelist <= C.VIO_VRING_SIZE)
txused = (txused + 1) % 65536
txpackets = txpackets + 1
end
end

function flush_transmit ()
C.full_memory_barrier() txring.avail.idx = next_tx_avail kick(txring)
end M.flush_transmit = flush_transmit

function transmit_ready ()
if tx_freelist[1] == nil then return nil, 'no free descriptors'
else return true end
end M.transmit_ready = transmit_ready

local next_rx_avail = 0 -- Next available position in the rx avail ring

function add_rxbuf (address, size)
local bufferindex = get_rx_buffer()
assert(bufferindex < C.VIO_VRING_SIZE)
local desc = rxring.desc[bufferindex]
desc.addr, desc.len = ffi.cast("uint64_t", address), size
desc.flags, desc.next = C.VIO_DESC_F_WRITE, 0
next_rx_avail = (next_rx_avail + 1) % 65536
end M.add_rxbuf = add_rxbuf

function get_rx_buffer ()
assert(receive_buffer_ready())
return table.remove(rx_freelist)
end

-- Is there a receive descriptor available to store a new buffer in? [XXX name]
function receive_buffer_ready ()
return rx_freelist[1] ~= nil
end M.receive_buffer_ready = receive_buffer_ready

local rxused = 0
function receive ()
assert(receive_packet_ready())
local index = rxring.used.ring[rxused % C.VIO_VRING_SIZE].id
local length = rxring.used.ring[rxused % C.VIO_VRING_SIZE].len
local address = rxring.desc[index].addr
rxused = (rxused + 1) % 65536
table.insert(rx_freelist, index)
assert(#rx_freelist <= C.VIO_VRING_SIZE)
rxpackets = rxpackets + 1
return address, length
end M.receive = receive

function receive_packet_ready ()
return rxused ~= rxring.used.idx
end M.receive_packet_ready = receive_packet_ready

function flush_rx()
C.full_memory_barrier() rxring.avail.idx = next_rx_avail kick(rxring)
end M.flush_rx = flush_rx

-- Make all of our DMA memory usable as vhost packet buffers.
function update_vhost_memory_map ()
C.vhost_set_memory(vio, memory_regions())
end

-- Construct a vhost memory map for the kernel. The memory map
-- includes all of our currently allocated DMA buffers and reuses
-- the address space of this process. This means that we can use
-- ordinary pointer addresses to DMA buffers in our vring
-- descriptors.
function memory_regions ()
local vio_memory = ffi.new("struct vio_memory")
local dma_regions = memory.dma_regions
vio_memory.nregions = #dma_regions
vio_memory.padding = 0
local vio_index = 0
for _,region in ipairs(dma_regions) do
local r = vio_memory.regions + vio_index
r.guest_phys_addr = region.address
r.userspace_addr = region.address
r.memory_size = region.size
r.flags_padding = 0
vio_index = vio_index + 1
end
return vio_memory
end

function print_stats ()
print("packets transmitted: " .. lib.comma_value(txpackets))
print("packets received: " .. lib.comma_value(rxpackets))
end

-- Selftest procedure to read packets from a tap device and write them back.
function M.selftest ()
local secs = 10
local deadline = C.get_time_ns() + secs * 1e9
local done = function () return C.get_time_ns() > deadline end
print("Echoing packets for "..secs.." second(s).")
repeat
while receive_buffer_ready() do
add_rxbuf(memory.dma_alloc(2048), 2048)
-- XXX memory.lua should call this automatically when needed
update_vhost_memory_map()
end
while transmit_ready() and receive_packet_ready() do
local address, length = receive()
transmit(address, length)
end
flush_rx()
flush_transmit()
reclaim_transmit_buffers()
until done()
print_stats()
end

-- Signal the kernel via the 'kick' eventfd that there is new data.
function kick (ring)
local value = ffi.new("uint64_t[1]")
value[0] = 1
C.write(ring.kickfd, value, 8)
end

return M
end

function selftest ()
print("Testing vhost (virtio) support.")
local v = virtio.new("vio%d")
v.init()
v.selftest()
end

7 comments on commit 18aebc3

@pkazmier
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Luke, when I run on my machine or bern, I see no packets being echoed. Is there something else I need to do to my machine to make this work?

pkazmier@bern:~/snabbswitch/src$ sudo flock -x /tmp/snabb.lock ./snabbswitch
[sudo] password for pkazmier: 
Testing vhost (virtio) support.
Echoing packets for 10 second(s).
packets transmitted: 0
packets received:    0

@lukego
Copy link
Member Author

@lukego lukego commented on 18aebc3 Feb 14, 2013 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pkazmier
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I haven't even had a chance to look at the code. Work has been a killer!

It looks like I just missed you in IRC (you popped in and then out a few minutes ago). I've been meaning to ask you guys, but don't know of the best place to do so.

     <petekaz> Good morning / evening.                          [07:56]               
     <petekaz> Doesn't STT seems like a giant hack? I'm still reading                 
               the spec, but reusing TCP like that is like a giant                    
               code smell.                                      [08:28]               
     <petekaz> Not to mention that losing STT segments over a physical                
               network would be as bad as IP fragmentation today.                     
                                                                [08:30]               
     <petekaz> I.e., something to be avoided.                                         
     <petekaz> I suppose you'd just want to configure a host with an                  
               artificially smaller MTU to ensure that your entire                    
               frame fits into one STT segment.                 [08:32]               
     <petekaz> I end up doing the same for traffic that's ultimately                  
               going through a GRE tunnel.                                            
     <petekaz> today, at work, we have built an overlay IP network                    
               ontop of another iP network using a combination of VLAN                
               segmentation for L2 and Virtual Routing Forwarding                     
               (Cisco's version of router virtualization - think of it                
               as a guest router on a host router) and GRE                            
               tunnels. Over those tunnels we make sure the routers                   
               mangle the MSS to account for the GRE overhead (ip tcp                 
               adjust-mss in IOS parlance).                     [08:36]               
     <petekaz> BTW, tried the new vhost code real fast:         [08:38]               
     <petekaz> esting vhost (virtio) support.                                         
     <petekaz> Echoing packets for 10 second(s).                                      
     <petekaz> packets transmitted: 0                                                 
     <petekaz> packets received:    0                                                 
     <petekaz> I assume I was supposed to see something other than 0s                 
               here?                                                                  
     <petekaz> :-)                                              [08:39]               
     <petekaz> I'll take a look at why later ... time to go to the day                
               job (technology management)                      [08:40]         

@lukego
Copy link
Member Author

@lukego lukego commented on 18aebc3 Feb 15, 2013 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@etnt
Copy link

@etnt etnt commented on 18aebc3 Feb 15, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been wondering if the possibility to create your own MAC addressing scheme ever has been used
(i.e set the second bit of the MSB in the MAC address to: 1)?
It would then be easy to create a segmentation schema with a large amount of 'vlans'/'unit' addresses.
Especially in a virtualized environment where you easily can setup the MACs of the VMs this could
potentially be useful (or?). In such an environment, tenants tends to live behind FWs so the local
MAC scheme is hidden. Further more, if running Openflow capable switches, the controller would
be able to build up a complete link-state DB (like OSPF I guess?) which would make it possible to
enforce the L2 separation and also to setup complete flow-paths through the network. Also, mobility
would be easy to handle I think.

But I guess I'm over-simplifying things due to my lack of experience from these kind of environments.

Cheers, Tobbe

@lukego
Copy link
Member Author

@lukego lukego commented on 18aebc3 Feb 15, 2013 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@etnt
Copy link

@etnt etnt commented on 18aebc3 Feb 15, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice dump-of-brain :-)
Hopefully we can meet for more in-depth discussions not to far off in the future....
Cheers, Tobbe

Please sign in to comment.