Skip to content

Commit

Permalink
Continued adventures in broken mesh networking. (#251)
Browse files Browse the repository at this point in the history
* Tweak config drive to remove Ubuntu 18.04 parse errors.

We needed to change the interface type. Fixes #250.

* Increase gunicorn timeout to fix Ubuntu downloads.

* Keep track of networks a bit better.

* Handle the network being entirely missing.

* Remove stray networks.

Fixes #161.

* Remove network interfaces which are not associated with a running instance.

Fixes #252.

* Handle missing fields in sf-client pretty output.

Fixes #253.
  • Loading branch information
mikalstill committed Aug 2, 2020
1 parent d96f683 commit b4d99a6
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 29 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Expand Up @@ -23,6 +23,7 @@ packages =
console_scripts =
sf-daemon = shakenfist.daemons.main:main
sf-client = shakenfist.client.main:cli
sf-networkexplainer = shakenfist.client.networkexplainer:main

This comment has been minimized.

Copy link
@mandoonandy

mandoonandy Aug 2, 2020

Contributor

Hmmm. Should I have approved this line?

sf-passwd = shakenfist.client.passwd:main
sf-upgrade = shakenfist.client.upgrade:main

Expand Down
2 changes: 1 addition & 1 deletion shakenfist/client/main.py
Expand Up @@ -597,7 +597,7 @@ def instance_list(ctx, all=False):
def _pretty_data(row, space_rules):
ret = ''
for key in space_rules:
ret += key + '=' + str(row[key]).ljust(space_rules[key]) + ' '
ret += key + '=' + str(row.get(key, '')).ljust(space_rules[key]) + ' '
return ret


Expand Down
85 changes: 64 additions & 21 deletions shakenfist/daemons/net.py
@@ -1,32 +1,19 @@
import logging
from logging import handlers as logging_handlers
import re
import setproctitle
import time

from oslo_concurrency import processutils

from shakenfist import config
from shakenfist import db
from shakenfist import net
from shakenfist import util


LOG = logging.getLogger(__file__)
LOG.setLevel(logging.INFO)
LOG.addHandler(logging_handlers.SysLogHandler(address='/dev/log'))


VXLAN_RE = re.compile(r'[0-9]+: vxlan-([0-9]+).*')


def _get_deployed_vxlans():
stdout, _ = processutils.execute('ip link', shell=True)
for line in stdout.split('\n'):
m = VXLAN_RE.match(line)
if m:
yield int(m.group(1))


class monitor(object):
def __init__(self):
setproctitle.setproctitle('sf net')
Expand All @@ -35,17 +22,73 @@ def run(self):
while True:
time.sleep(30)

# We do not reap unused networks from the network node, as they might be
# in use for instances on other hypervisor nodes.
# Discover what networks are present
_, _, vxid_to_mac = util.discover_interfaces()

# Determine what networks we should be on
host_networks = []
seen_vxids = []

if config.parsed.get('NODE_IP') != config.parsed.get('NETWORK_NODE_IP'):
host_networks = []
# For normal nodes, just the ones we have instances for
for inst in list(db.get_instances(only_node=config.parsed.get('NODE_NAME'))):
for iface in db.get_instance_interfaces(inst['uuid']):
if not iface['network_uuid'] in host_networks:
host_networks.append(iface['network_uuid'])
else:
# For network nodes, its all networks
for n in db.get_networks():
host_networks.append(n['uuid'])

# Network nodes also look for interfaces for absent instances
# and delete them
for ni in db.get_network_interfaces(n['uuid']):
inst = db.get_instance(ni['instance_uuid'])
if not inst:
db.hard_delete_network_interface(ni['uuid'])
LOG.info('Hard deleted stray network interface %s '
'associated with absent instance %s'
% (ni['uuid'], ni['instance_uuid']))
elif inst.get('state', 'unknown') in ['deleted', 'error', 'unknown']:
db.hard_delete_network_interface(ni['uuid'])
LOG.info('Hard deleted stray network interface %s '
'associated with %s instance %s'
% (ni['uuid'], inst.get('state', 'unknown'),
ni['instance_uuid']))

# Ensure we are on every network we have a host for
for network in host_networks:
n = net.from_db(network)
n.create()
n.ensure_mesh()
seen_vxids.append(n.vxlan_id)

# Determine if there are any extra vxids
extra_vxids = list(vxid_to_mac.keys())
for seen in seen_vxids:
if seen in extra_vxids:
extra_vxids.remove(seen)

# For now, just log extra vxids
if extra_vxids:
LOG.warn('Extra vxlans present! IDs are: %s'
% extra_vxids)

# Determine the network uuids for those vxids
vxid_to_uuid = {}
for n in db.get_networks():
vxid_to_uuid[n['vxid']] = n['uuid']

for network in host_networks:
n = net.from_db(network)
n.ensure_mesh()
for extra in extra_vxids:
if extra in vxid_to_uuid:
n = net.from_db(vxid_to_uuid[extra])
n.delete()
LOG.info('Extra vxlan %s (network %s) removed.'
% (extra, vxid_to_uuid[extra]))
else:
LOG.error('Extra vxlan %s does not map to any network.'
% extra)

# TODO(mikal): remove stray networks
# And record vxids in the database
db.persist_node_vxid_mapping(
config.parsed.get('NODE_NAME'), vxid_to_mac)
8 changes: 8 additions & 0 deletions shakenfist/db.py
Expand Up @@ -520,3 +520,11 @@ def persist_metadata(object_type, name, metadata):

def delete_metadata(object_type, name):
etcd.delete('metadata', object_type, name)


def persist_node_vxid_mapping(node, vxid_to_mac):
etcd.put('vxid_mapping', None, node, vxid_to_mac)


def get_node_vxid_mapping(node):
etcd.get('vxid_mapping', None, node)
2 changes: 1 addition & 1 deletion shakenfist/external_api/app.py
Expand Up @@ -1519,7 +1519,7 @@ def run(self):
processutils.execute(
('gunicorn3 --workers 10 --bind 0.0.0.0:%d '
'--log-syslog --log-syslog-prefix sf '
'--timeout 300 --name "sf api" '
'--timeout 600 --name "sf api" '
'shakenfist.external_api.app:app'
% config.parsed.get('API_PORT')),
shell=True, env_variables=os.environ)
12 changes: 8 additions & 4 deletions shakenfist/tests/test_virt.py
Expand Up @@ -165,14 +165,16 @@ def test_str(self):
@mock.patch('shakenfist.db.get_instance_interfaces',
return_value=[
{
'instance_uuid': 'fakeuuid',
'uuid': 'ifaceuuid',
'instance_uuid': 'instuuid',
'network_uuid': 'netuuid',
'macaddr': '1a:91:64:d2:15:39',
'ipv4': '127.0.0.5',
'order': 0
},
{
'instance_uuid': 'fakeuuid',
'uuid': 'ifaceuuid2',
'instance_uuid': 'instuuid',
'network_uuid': 'netuuid',
'macaddr': '1a:91:64:d2:15:40',
'ipv4': '127.0.0.6',
Expand Down Expand Up @@ -226,14 +228,16 @@ def test_make_config_drive(self, mock_net_from_db, mock_interfaces,
"id": "eth0",
"mtu": 1450,
"name": "eth0",
"type": "physical"
"type": "vif",
"vif_id": "ifaceuuid"
},
{
"ethernet_mac_address": "1a:91:64:d2:15:40",
"id": "eth1",
"mtu": 1450,
"name": "eth1",
"type": "physical"
"type": "vif",
"vif_id": "ifaceuuid2"
}
],
"networks": [
Expand Down
37 changes: 36 additions & 1 deletion shakenfist/util.py
Expand Up @@ -14,7 +14,6 @@
from oslo_concurrency import processutils

from shakenfist import db
from shakenfist import db


LOG = logging.getLogger(__file__)
Expand Down Expand Up @@ -172,3 +171,39 @@ def get_version():

def get_user_agent():
return 'Mozilla/5.0 (Ubuntu; Linux x86_64) Shaken Fist/%s' % get_version()


def discover_interfaces():
mac_to_iface = {
'00:00:00:00:00:00': 'broadcast'
}
iface_to_mac = {}
vxid_to_mac = {}

iface_name = None
iface_name_re = re.compile('^[0-9]+: ([^:]+): <')

link_ether = None
link_ether_re = re.compile('^ link/ether (.*) brd .*')

stdout, _ = processutils.execute(
'ip addr list', shell=True)
for line in stdout.split('\n'):
line = line.rstrip()

m = iface_name_re.match(line)
if m:
iface_name = m.group(1)
continue

m = link_ether_re.match(line)
if m:
link_ether = m.group(1)
mac_to_iface[link_ether] = iface_name
iface_to_mac[iface_name] = link_ether

if iface_name.startswith('vxlan-'):
vxid = int(iface_name.split('-')[1])
vxid_to_mac[vxid] = link_ether

return mac_to_iface, iface_to_mac, vxid_to_mac
3 changes: 2 additions & 1 deletion shakenfist/virt.py
Expand Up @@ -346,7 +346,8 @@ def _make_config_drive(self, disk_path):
'id': devname,
'name': devname,
'mtu': 1450,
'type': 'physical',
'type': 'vif',
'vif_id': iface['uuid']
}
)

Expand Down

0 comments on commit b4d99a6

Please sign in to comment.