Skip to content

Commit

Permalink
iSCSI: Fix flushing after multipath cfg change
Browse files Browse the repository at this point in the history
OS-Brick disconnect_volume code assumes that the use_multipath parameter
that is used to instantiate the connector has the same value than the
connector that was used on the original connect_volume call.

Unfortunately this is not necessarily true, because Nova can attach a
volume, then its multipath configuration can be enabled or disabled, and
then a detach can be issued.

This leads to a series of serious issues such as:

- Not flushing the single path on disconnect_volume (possible data loss)
  and leaving it as a leftover device on the host when Nova calls
  terminate-connection on Cinder.

- Not flushing the multipath device (possible data loss) and leaving it
  as a leftover device similarly to the other case.

This patch changes how we do disconnects, now we assume we are always
disconnecting multipaths, and fallback to doing the single path
disconnect we used to do if we can't go that route.

The case when we cannot do a multipathed detach is mostly when we did
the connect as a single path and the Cinder driver doesn't provide
portal_targets and portal_iqns in the connection info for non
multipathed initialize-connection calls.

This changes introduces an additional call when working with single
paths (checking the discoverydb), but it should be an acceptable
trade-off to not lose data.

Closes-Bug: #1921381
Change-Id: I066d456fb1fe9159d4be50ffd8abf9a6d8d07901
  • Loading branch information
Akrog committed Mar 26, 2021
1 parent 0e63fe8 commit d4205bd
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 84 deletions.
2 changes: 1 addition & 1 deletion os_brick/exception.py
Expand Up @@ -117,7 +117,7 @@ class TargetPortalNotFound(BrickException):
message = _("Unable to find target portal %(target_portal)s.")


class TargetPortalsNotFound(BrickException):
class TargetPortalsNotFound(TargetPortalNotFound):
message = _("Unable to find target portal in %(target_portals)s.")


Expand Down
17 changes: 11 additions & 6 deletions os_brick/initiator/connectors/base_iscsi.py
Expand Up @@ -29,13 +29,18 @@ def _iterate_all_targets(self, connection_properties):
props.pop(key, None)
yield props

@staticmethod
def _get_luns(con_props, iqns=None):
luns = con_props.get('target_luns')
num_luns = len(con_props['target_iqns']) if iqns is None else len(iqns)
return luns or [con_props['target_lun']] * num_luns

def _get_all_targets(self, connection_properties):
if all([key in connection_properties for key in ('target_portals',
'target_iqns',
'target_luns')]):
return zip(connection_properties['target_portals'],
connection_properties['target_iqns'],
connection_properties['target_luns'])
if all(key in connection_properties for key in ('target_portals',
'target_iqns')):
return list(zip(connection_properties['target_portals'],
connection_properties['target_iqns'],
self._get_luns(connection_properties)))

return [(connection_properties['target_portal'],
connection_properties['target_iqn'],
Expand Down
62 changes: 36 additions & 26 deletions os_brick/initiator/connectors/iscsi.py
Expand Up @@ -167,35 +167,45 @@ def _get_iscsi_sessions(self):
# entry: [tcp, [1], 192.168.121.250:3260,1 ...]
return [entry[2] for entry in self._get_iscsi_sessions_full()]

def _get_ips_iqns_luns(self, connection_properties, discover=True):
def _get_ips_iqns_luns(self, connection_properties, discover=True,
is_disconnect_call=False):
"""Build a list of ips, iqns, and luns.
Used only when doing multipath, we have 3 cases:
Used when doing singlepath and multipath, and we have 4 cases:
- All information is in the connection properties
- We have to do an iSCSI discovery to get the information
- We don't want to do another discovery and we query the discoverydb
- Discovery failed because it was actually a single pathed attachment
:param connection_properties: The dictionary that describes all
of the target volume attributes.
:type connection_properties: dict
:param discover: Whether doing an iSCSI discovery is acceptable.
:type discover: bool
:param is_disconnect_call: Whether this is a call coming from a user
disconnect_volume call or a call from some
other operation's cleanup.
:type is_disconnect_call: bool
:returns: list of tuples of (ip, iqn, lun)
"""
# There are cases where we don't know if the local attach was done
# using multipathing or single pathing, so assume multipathing.
try:
if ('target_portals' in connection_properties and
'target_iqns' in connection_properties):
# Use targets specified by connection_properties
ips_iqns_luns = list(
zip(connection_properties['target_portals'],
connection_properties['target_iqns'],
self._get_luns(connection_properties)))
ips_iqns_luns = self._get_all_targets(connection_properties)
else:
method = (self._discover_iscsi_portals if discover
else self._get_discoverydb_portals)
ips_iqns_luns = method(connection_properties)
except exception.TargetPortalsNotFound:
except exception.TargetPortalNotFound:
# Discovery failed, on disconnect this will happen if we
# are detaching a single pathed connection, so we use the
# connection properties to return the tuple.
if is_disconnect_call:
return self._get_all_targets(connection_properties)
raise
except Exception:
LOG.exception('Exception encountered during portal discovery')
Expand Down Expand Up @@ -311,12 +321,6 @@ def _validate_iface_transport(self, transport_iface):
def _get_transport(self):
return self.transport

@staticmethod
def _get_luns(con_props, iqns=None):
luns = con_props.get('target_luns')
num_luns = len(con_props['target_iqns']) if iqns is None else len(iqns)
return luns or [con_props.get('target_lun')] * num_luns

def _get_discoverydb_portals(self, connection_properties):
"""Retrieve iscsi portals information from the discoverydb.
Expand Down Expand Up @@ -792,7 +796,7 @@ def _connect_multipath_volume(self, connection_properties):
mpath)

def _get_connection_devices(self, connection_properties,
ips_iqns_luns=None):
ips_iqns_luns=None, is_disconnect_call=False):
"""Get map of devices by sessions from our connection.
For each of the TCP sessions that correspond to our connection
Expand All @@ -812,14 +816,15 @@ def _get_connection_devices(self, connection_properties,
connection properties (sendtargets was used on connect) so we may have
to retrieve the info from the discoverydb. Call _get_ips_iqns_luns to
do the right things.
This method currently assumes that it's only called by the
_cleanup_conection method.
"""
if not ips_iqns_luns:
if self.use_multipath:
# We are only called from disconnect, so don't discover
ips_iqns_luns = self._get_ips_iqns_luns(connection_properties,
discover=False)
else:
ips_iqns_luns = self._get_all_targets(connection_properties)
# This is a cleanup, don't do discovery
ips_iqns_luns = self._get_ips_iqns_luns(
connection_properties, discover=False,
is_disconnect_call=is_disconnect_call)
LOG.debug('Getting connected devices for (ips,iqns,luns)=%s',
ips_iqns_luns)
nodes = self._get_iscsi_nodes()
Expand Down Expand Up @@ -879,11 +884,12 @@ def disconnect_volume(self, connection_properties, device_info,
"""
return self._cleanup_connection(connection_properties, force=force,
ignore_errors=ignore_errors,
device_info=device_info)
device_info=device_info,
is_disconnect_call=True)

def _cleanup_connection(self, connection_properties, ips_iqns_luns=None,
force=False, ignore_errors=False,
device_info=None):
device_info=None, is_disconnect_call=False):
"""Cleans up connection flushing and removing devices and multipath.
:param connection_properties: The dictionary that describes all
Expand All @@ -900,13 +906,18 @@ def _cleanup_connection(self, connection_properties, ips_iqns_luns=None,
ignore errors or raise an exception once finished
the operation. Default is False.
:param device_info: Attached device information.
:param is_disconnect_call: Whether this is a call coming from a user
disconnect_volume call or a call from some
other operation's cleanup.
:type is_disconnect_call: bool
:type ignore_errors: bool
"""
exc = exception.ExceptionChainer()
try:
devices_map = self._get_connection_devices(connection_properties,
ips_iqns_luns)
except exception.TargetPortalsNotFound as exc:
ips_iqns_luns,
is_disconnect_call)
except exception.TargetPortalNotFound as exc:
# When discovery sendtargets failed on connect there is no
# information in the discoverydb, so there's nothing to clean.
LOG.debug('Skipping cleanup %s', exc)
Expand All @@ -922,8 +933,7 @@ def _cleanup_connection(self, connection_properties, ips_iqns_luns=None,
was_multipath = (path_used.startswith('/dev/dm-') or
'mpath' in path_used)
multipath_name = self._linuxscsi.remove_connection(
remove_devices, self.use_multipath, force, exc,
path_used, was_multipath)
remove_devices, force, exc, path_used, was_multipath)

# Disconnect sessions and remove nodes that are left without devices
disconnect = [conn for conn, (__, keep) in devices_map.items()
Expand Down
23 changes: 10 additions & 13 deletions os_brick/initiator/linuxscsi.py
Expand Up @@ -287,12 +287,11 @@ def requires_flush(path, path_used, was_multipath):
# instead it maps to /dev/mapped/crypt-XYZ
return not was_multipath and '/dev' != os.path.split(path_used)[0]

def remove_connection(self, devices_names, is_multipath, force=False,
exc=None, path_used=None, was_multipath=False):
def remove_connection(self, devices_names, force=False, exc=None,
path_used=None, was_multipath=False):
"""Remove LUNs and multipath associated with devices names.
:param devices_names: Iterable with real device names ('sda', 'sdb')
:param is_multipath: Whether this is a multipath connection or not
:param force: Whether to forcefully disconnect even if flush fails.
:param exc: ExceptionChainer where to add exceptions if forcing
:param path_used: What path was used by Nova/Cinder for I/O
Expand All @@ -301,19 +300,17 @@ def remove_connection(self, devices_names, is_multipath, force=False,
"""
if not devices_names:
return
multipath_name = None
exc = exception.ExceptionChainer() if exc is None else exc

multipath_dm = self.find_sysfs_multipath_dm(devices_names)
LOG.debug('Removing %(type)s devices %(devices)s',
{'type': 'multipathed' if is_multipath else 'single pathed',
{'type': 'multipathed' if multipath_dm else 'single pathed',
'devices': ', '.join(devices_names)})

if is_multipath:
multipath_dm = self.find_sysfs_multipath_dm(devices_names)
multipath_name = multipath_dm and self.get_dm_name(multipath_dm)
if multipath_name:
with exc.context(force, 'Flushing %s failed', multipath_name):
self.flush_multipath_device(multipath_name)
multipath_name = None
multipath_name = multipath_dm and self.get_dm_name(multipath_dm)
if multipath_name:
with exc.context(force, 'Flushing %s failed', multipath_name):
self.flush_multipath_device(multipath_name)
multipath_name = None

for device_name in devices_names:
dev_path = '/dev/' + device_name
Expand Down
27 changes: 20 additions & 7 deletions os_brick/tests/initiator/connectors/test_base_iscsi.py
Expand Up @@ -51,16 +51,29 @@ def test_iterate_all_targets(self, mock_get_all_targets):
self.assertEqual(expected_props, list_props[0])

def test_get_all_targets(self):
connection_properties = {
'target_portals': [mock.sentinel.target_portals],
'target_iqns': [mock.sentinel.target_iqns],
'target_luns': [mock.sentinel.target_luns]}
portals = [mock.sentinel.portals1, mock.sentinel.portals2]
iqns = [mock.sentinel.iqns1, mock.sentinel.iqns2]
luns = [mock.sentinel.luns1, mock.sentinel.luns2]
connection_properties = {'target_portals': portals,
'target_iqns': iqns,
'target_luns': luns}

all_targets = self.connector._get_all_targets(connection_properties)

expected_targets = zip(portals, iqns, luns)
self.assertEqual(list(expected_targets), list(all_targets))

def test_get_all_targets_no_target_luns(self):
portals = [mock.sentinel.portals1, mock.sentinel.portals2]
iqns = [mock.sentinel.iqns1, mock.sentinel.iqns2]
lun = mock.sentinel.luns
connection_properties = {'target_portals': portals,
'target_iqns': iqns,
'target_lun': lun}

all_targets = self.connector._get_all_targets(connection_properties)

expected_targets = zip([mock.sentinel.target_portals],
[mock.sentinel.target_iqns],
[mock.sentinel.target_luns])
expected_targets = zip(portals, iqns, [lun, lun])
self.assertEqual(list(expected_targets), list(all_targets))

def test_get_all_targets_single_target(self):
Expand Down
59 changes: 37 additions & 22 deletions os_brick/tests/initiator/connectors/test_iscsi.py
Expand Up @@ -140,7 +140,6 @@ def test_get_iscsi_nodes_corrupt(self, exec_mock):
@mock.patch.object(iscsi.ISCSIConnector, '_get_iscsi_nodes')
def test_get_connection_devices(self, nodes_mock, sessions_mock,
glob_mock, iql_mock):
self.connector.use_multipath = True
iql_mock.return_value = self.connector._get_all_targets(self.CON_PROPS)

# List sessions from other targets and non tcp sessions
Expand All @@ -166,7 +165,8 @@ def test_get_connection_devices(self, nodes_mock, sessions_mock,
('ip2:port2', 'tgt2'): ({'sdb'}, {'sdc'}),
('ip3:port3', 'tgt3'): (set(), set())}
self.assertDictEqual(expected, res)
iql_mock.assert_called_once_with(self.CON_PROPS, discover=False)
iql_mock.assert_called_once_with(self.CON_PROPS, discover=False,
is_disconnect_call=False)

@mock.patch('glob.glob')
@mock.patch.object(iscsi.ISCSIConnector, '_get_iscsi_sessions_full')
Expand Down Expand Up @@ -560,7 +560,8 @@ def test_disconnect_volume(self, cleanup_mock):
mock.sentinel.con_props,
force=mock.sentinel.Force,
ignore_errors=mock.sentinel.ignore_errors,
device_info=mock.sentinel.dev_info)
device_info=mock.sentinel.dev_info,
is_disconnect_call=True)

@ddt.data(True, False)
@mock.patch.object(iscsi.ISCSIConnector, '_get_transport')
Expand Down Expand Up @@ -692,19 +693,17 @@ def test_cleanup_connection(self, path_used, was_multipath, remove_mock,
(('ip2:port2', 'tgt2'), ({'sdb'}, {'sdc'})),
(('ip3:port3', 'tgt3'), (set(), set()))))

with mock.patch.object(self.connector,
'use_multipath') as use_mp_mock:
self.connector._cleanup_connection(
self.CON_PROPS, ips_iqns_luns=mock.sentinel.ips_iqns_luns,
force=False, ignore_errors=False,
device_info=mock.sentinel.device_info)
self.connector._cleanup_connection(
self.CON_PROPS, ips_iqns_luns=mock.sentinel.ips_iqns_luns,
force=False, ignore_errors=False,
device_info=mock.sentinel.device_info)

get_dev_path_mock.called_once_with(self.CON_PROPS,
mock.sentinel.device_info)
con_devs_mock.assert_called_once_with(self.CON_PROPS,
mock.sentinel.ips_iqns_luns)
remove_mock.assert_called_once_with({'sda', 'sdb'}, use_mp_mock,
False, mock.ANY,
mock.sentinel.ips_iqns_luns,
False)
remove_mock.assert_called_once_with({'sda', 'sdb'}, False, mock.ANY,
path_used, was_multipath)
discon_mock.assert_called_once_with(
self.CON_PROPS,
Expand All @@ -730,17 +729,16 @@ def test_cleanup_connection_force_failure(self, remove_mock, flush_mock,
(('ip2:port2', 'tgt2'), ({'sdb'}, {'sdc'})),
(('ip3:port3', 'tgt3'), (set(), set()))))

with mock.patch.object(self.connector, 'use_multipath',
wraps=True) as use_mp_mock:
self.assertRaises(exception.ExceptionChainer,
self.connector._cleanup_connection,
self.CON_PROPS,
ips_iqns_luns=mock.sentinel.ips_iqns_luns,
force=mock.sentinel.force, ignore_errors=False)
self.assertRaises(exception.ExceptionChainer,
self.connector._cleanup_connection,
self.CON_PROPS,
ips_iqns_luns=mock.sentinel.ips_iqns_luns,
force=mock.sentinel.force, ignore_errors=False)

con_devs_mock.assert_called_once_with(self.CON_PROPS,
mock.sentinel.ips_iqns_luns)
remove_mock.assert_called_once_with({'sda', 'sdb'}, use_mp_mock,
mock.sentinel.ips_iqns_luns,
False)
remove_mock.assert_called_once_with({'sda', 'sdb'},
mock.sentinel.force, mock.ANY,
'', False)
discon_mock.assert_called_once_with(
Expand Down Expand Up @@ -976,6 +974,21 @@ def test_get_ips_iqns_luns_discoverydb(self, discover_mock,
db_portals_mock.assert_called_once_with(self.SINGLE_CON_PROPS)
discover_mock.assert_not_called()

@mock.patch.object(iscsi.ISCSIConnector, '_get_all_targets')
@mock.patch.object(iscsi.ISCSIConnector, '_get_discoverydb_portals')
@mock.patch.object(iscsi.ISCSIConnector, '_discover_iscsi_portals')
def test_get_ips_iqns_luns_disconnect_single_path(self, discover_mock,
db_portals_mock,
get_targets_mock):
db_portals_mock.side_effect = exception.TargetPortalsNotFound
res = self.connector._get_ips_iqns_luns(self.SINGLE_CON_PROPS,
discover=False,
is_disconnect_call=True)
db_portals_mock.assert_called_once_with(self.SINGLE_CON_PROPS)
discover_mock.assert_not_called()
get_targets_mock.assert_called_once_with(self.SINGLE_CON_PROPS)
self.assertEqual(get_targets_mock.return_value, res)

@mock.patch.object(iscsi.ISCSIConnector, '_discover_iscsi_portals')
def test_get_ips_iqns_luns_no_target_iqns_share_iqn(self, discover_mock):
discover_mock.return_value = [('ip1:port1', 'tgt1', '1'),
Expand Down Expand Up @@ -1249,7 +1262,9 @@ def my_connect(rescans, props, data):

connect_mock.side_effect = my_connect

res = self.connector._connect_multipath_volume(self.CON_PROPS)
with mock.patch.object(self.connector,
'use_multipath'):
res = self.connector._connect_multipath_volume(self.CON_PROPS)

expected = {'type': 'block', 'scsi_wwn': '', 'multipath_id': '',
'path': '/dev/dm-0'}
Expand Down
3 changes: 2 additions & 1 deletion os_brick/tests/initiator/connectors/test_iser.py
Expand Up @@ -56,7 +56,8 @@ def test_get_connection_devices(
res = self.connector._get_connection_devices(self.connection_data)
expected = {('ip:port', 'target_1'): ({'sda'}, set())}
self.assertDictEqual(expected, res)
iql_mock.assert_called_once_with(self.connection_data, discover=False)
iql_mock.assert_called_once_with(self.connection_data, discover=False,
is_disconnect_call=False)

@mock.patch.object(iscsi.ISCSIConnector, '_get_iscsi_sessions_full')
@mock.patch.object(iscsi.ISCSIConnector, '_execute')
Expand Down

0 comments on commit d4205bd

Please sign in to comment.