From 3ffcdafe0dbdac60b7e116b506ee5b4580674e75 Mon Sep 17 00:00:00 2001 From: secynic Date: Tue, 15 Sep 2020 14:42:47 -0500 Subject: [PATCH 1/2] Fixed bug in root and sub-entities not getting queried/data (#247) --- CHANGES.rst | 4 + RDAP.rst | 13 +++ UPGRADING.rst | 4 + ipwhois/ipwhois.py | 8 +- ipwhois/rdap.py | 174 +++++++++++++++++++++++++------------ ipwhois/tests/test_rdap.py | 14 +-- 6 files changed, 155 insertions(+), 62 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b6beac5..2b25efe 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,10 @@ Changelog result was returned (#262 - ameidatou) - Fixed deprecation warnings due to invalid escape sequences (#272 - tirkarthi) +- Fixed bug in root and sub-entities not getting queried/data (#247) +- Added new argument root_ent_check to IPWhois.lookup_rdap and + RDAP.lookup. Set this to False to revert to old functionality - missing data, + but less queries (#247) - Added support for Python 3.8 (#267) - Fixed travis build warnings (#268) - Pinned requirements (#274) diff --git a/RDAP.rst b/RDAP.rst index 9d32b50..0f8d0fa 100644 --- a/RDAP.rst +++ b/RDAP.rst @@ -73,6 +73,10 @@ Arguments supported by IPWhois.lookup_rdap(). | | | pulling ASN information via dns, in order to | | | | get the ASN description. Defaults to True. | +--------------------+--------+-----------------------------------------------+ +| root_ent_check | bool | If True, will perform additional RDAP HTTP | +| | | queries for missing entity data at the root | +| | | level. Defaults to True. | ++--------------------+--------+-----------------------------------------------+ .. _rdap-output: @@ -593,3 +597,12 @@ this very low for bulk queries, or disable completely by setting retry_count=0. Note that setting this result too low may cause a larger number of IP lookups to fail. + +root_ent_check +^^^^^^^^^^^^^^ + +When root level entities (depth=0) are missing vcard data, additional +entity specific HTTP lookups are performed. In the past, you would expect +depth=0 to mean a single lookup per IP. This was a bug and has been fixed as of +v1.2.0. Set this to False to revert back to the old method, although you will be +missing entity specific data. diff --git a/UPGRADING.rst b/UPGRADING.rst index d689a4f..300da87 100644 --- a/UPGRADING.rst +++ b/UPGRADING.rst @@ -22,6 +22,10 @@ v1.2.0 nir.NIRWhois._get_nets_krnic, nir.NIRWhois._get_contact - Removed deprecated asn_alts parameter - Removed deprecated allow_permutations parameter +- Added new argument root_ent_check to IPWhois.lookup_rdap and + RDAP.lookup. Set this to False to revert to old functionality - missing data, + but less queries. If you leave this set to default of True, you will notice + more queries and potentially more rate-limiting. - Added support for Python 3.8 - Pinned requirements diff --git a/ipwhois/ipwhois.py b/ipwhois/ipwhois.py index 0c3e179..d5368e8 100644 --- a/ipwhois/ipwhois.py +++ b/ipwhois/ipwhois.py @@ -199,7 +199,7 @@ def lookup_rdap(self, inc_raw=False, retry_count=3, depth=0, excluded_entities=None, bootstrap=False, rate_limit_timeout=120, extra_org_map=None, inc_nir=True, nir_field_list=None, asn_methods=None, - get_asn_description=True): + get_asn_description=True, root_ent_check=True): """ The function for retrieving and parsing whois information for an IP address via HTTP (RDAP). @@ -247,6 +247,9 @@ def lookup_rdap(self, inc_raw=False, retry_count=3, depth=0, get_asn_description (:obj:`bool`): Whether to run an additional query when pulling ASN information via dns, in order to get the ASN description. Defaults to True. + root_ent_check (:obj:`bool`): If True, will perform + additional RDAP HTTP queries for missing entity data at the + root level. Defaults to True. Returns: dict: The IP RDAP lookup results @@ -305,7 +308,8 @@ def lookup_rdap(self, inc_raw=False, retry_count=3, depth=0, inc_raw=inc_raw, retry_count=retry_count, asn_data=asn_data, depth=depth, excluded_entities=excluded_entities, response=response, bootstrap=bootstrap, - rate_limit_timeout=rate_limit_timeout + rate_limit_timeout=rate_limit_timeout, + root_ent_check=root_ent_check ) # Add the RDAP information to the return dictionary. diff --git a/ipwhois/rdap.py b/ipwhois/rdap.py index d019f6d..1e80338 100644 --- a/ipwhois/rdap.py +++ b/ipwhois/rdap.py @@ -28,6 +28,7 @@ from .net import ip_address import logging import json +from collections import namedtuple log = logging.getLogger(__name__) @@ -688,9 +689,95 @@ def __init__(self, net): raise NetError('The provided net parameter is not an instance of ' 'ipwhois.net.Net') + def _get_entity(self, entity=None, roles=None, inc_raw=False, retry_count=3, + asn_data=None, bootstrap=False, rate_limit_timeout=120): + """ + The function for retrieving and parsing information for an entity via + RDAP (HTTP). + + Args: + entity (:obj:`str`): The entity name to lookup. + roles (:obj:`dict`): The mapping of entity handles to roles. + inc_raw (:obj:`bool`, optional): Whether to include the raw + results in the returned dictionary. Defaults to False. + retry_count (:obj:`int`): The number of times to retry in case + socket errors, timeouts, connection resets, etc. are + encountered. Defaults to 3. + asn_data (:obj:`dict`): Result from + :obj:`ipwhois.asn.IPASN.lookup`. Optional if the bootstrap + parameter is True. + bootstrap (:obj:`bool`): If True, performs lookups via ARIN + bootstrap rather than lookups based on ASN data. Defaults to + False. + rate_limit_timeout (:obj:`int`): The number of seconds to wait + before retrying when a rate limit notice is returned via + rdap+json. Defaults to 120. + + Returns: + namedtuple: + + :result (dict): Consists of the fields listed in the + ipwhois.rdap._RDAPEntity dict. The raw result is included for + each object if the inc_raw parameter is True. + :roles (dict): The mapping of entity handles to roles. + """ + + result = {} + + if bootstrap: + entity_url = '{0}/entity/{1}'.format( + BOOTSTRAP_URL, entity) + else: + tmp_reg = asn_data['asn_registry'] + entity_url = RIR_RDAP[tmp_reg]['entity_url'] + entity_url = str(entity_url).format(entity) + + try: + + # RDAP entity query + response = self._net.get_http_json( + url=entity_url, retry_count=retry_count, + rate_limit_timeout=rate_limit_timeout + ) + + # Parse the entity + result_ent = _RDAPEntity(response) + result_ent.parse() + result = result_ent.vars + + result['roles'] = None + try: + + result['roles'] = roles[entity] + + except KeyError: # pragma: no cover + + pass + + try: + + for tmp in response['entities']: + + if tmp['handle'] not in roles: + roles[tmp['handle']] = tmp['roles'] + + except (IndexError, KeyError): + + pass + + if inc_raw: + result['raw'] = response + + except (HTTPLookupError, InvalidEntityObject): + + pass + + return_tuple = namedtuple('return_tuple', ['result', 'roles']) + return return_tuple(result, roles) + def lookup(self, inc_raw=False, retry_count=3, asn_data=None, depth=0, excluded_entities=None, response=None, bootstrap=False, - rate_limit_timeout=120): + rate_limit_timeout=120, root_ent_check=True): """ The function for retrieving and parsing information for an IP address via RDAP (HTTP). @@ -716,6 +803,9 @@ def lookup(self, inc_raw=False, retry_count=3, asn_data=None, depth=0, rate_limit_timeout (:obj:`int`): The number of seconds to wait before retrying when a rate limit notice is returned via rdap+json. Defaults to 120. + root_ent_check (:obj:`bool`): If True, will perform + additional RDAP HTTP queries for missing entity data at the + root level. Defaults to True. Returns: dict: The IP RDAP lookup results @@ -792,10 +882,23 @@ def lookup(self, inc_raw=False, retry_count=3, asn_data=None, depth=0, if ent['handle'] not in [results['entities'], excluded_entities]: - result_ent = _RDAPEntity(ent) - result_ent.parse() + if 'vcardArray' not in ent and root_ent_check: + entity_object, roles = self._get_entity( + entity=ent['handle'], + roles=roles, + inc_raw=inc_raw, + retry_count=retry_count, + asn_data=asn_data, + bootstrap=bootstrap, + rate_limit_timeout=rate_limit_timeout + ) + results['objects'][ent['handle']] = entity_object - results['objects'][ent['handle']] = result_ent.vars + else: + result_ent = _RDAPEntity(ent) + result_ent.parse() + + results['objects'][ent['handle']] = result_ent.vars results['entities'].append(ent['handle']) @@ -835,57 +938,18 @@ def lookup(self, inc_raw=False, retry_count=3, asn_data=None, depth=0, list(new_objects.keys()) + excluded_entities): - if bootstrap: - entity_url = '{0}/entity/{1}'.format( - BOOTSTRAP_URL, ent) - else: - tmp_reg = asn_data['asn_registry'] - entity_url = RIR_RDAP[tmp_reg]['entity_url'] - entity_url = str(entity_url).format(ent) - - try: - - # RDAP entity query - response = self._net.get_http_json( - url=entity_url, retry_count=retry_count, - rate_limit_timeout=rate_limit_timeout - ) - - # Parse the entity - result_ent = _RDAPEntity(response) - result_ent.parse() - new_objects[ent] = result_ent.vars - - new_objects[ent]['roles'] = None - try: - - new_objects[ent]['roles'] = roles[ent] - - except KeyError: # pragma: no cover - - pass - - try: - - for tmp in response['entities']: - - if tmp['handle'] not in roles: - - roles[tmp['handle']] = tmp['roles'] - - except (IndexError, KeyError): - - pass - - if inc_raw: - - new_objects[ent]['raw'] = response - - except (HTTPLookupError, InvalidEntityObject): - - pass - - except TypeError: + entity_object, roles = self._get_entity( + entity=ent, + roles=roles, + inc_raw=inc_raw, + retry_count=retry_count, + asn_data=asn_data, + bootstrap=bootstrap, + rate_limit_timeout=rate_limit_timeout + ) + new_objects[ent] = entity_object + + except (KeyError, TypeError): pass diff --git a/ipwhois/tests/test_rdap.py b/ipwhois/tests/test_rdap.py index 2ccdb45..6d4277d 100644 --- a/ipwhois/tests/test_rdap.py +++ b/ipwhois/tests/test_rdap.py @@ -82,7 +82,8 @@ def test_lookup(self): 'endAddress': '74.125.225.229' }, asn_data=val['asn_data'], - depth=0), dict) + depth=0, + root_ent_check=False), dict) log.debug('Testing rdap.lookup entitiy checks') net = Net('74.125.225.229') @@ -99,7 +100,8 @@ def test_lookup(self): 'entities': entity }, asn_data=val['asn_data'], - depth=1), dict) + depth=0, + root_ent_check=False), dict) self.assertIsInstance(obj.lookup(response={ 'handle': 'test', @@ -109,9 +111,10 @@ def test_lookup(self): 'entities': entity }, asn_data=val['asn_data'], - depth=1, + depth=0, bootstrap=True, - inc_raw=True), dict) + inc_raw=True, + root_ent_check=False), dict) # No sub entities. This is for coverage, but won't error out. entity = [{'handle': 'test', 'roles': [ @@ -125,7 +128,8 @@ def test_lookup(self): 'entities': entity }, asn_data=val['asn_data'], - depth=1), dict) + depth=0, + root_ent_check=False), dict) class TestRDAPContact(TestCommon): From a54f2ba15f90645f052f36d604392b0bca35aa83 Mon Sep 17 00:00:00 2001 From: secynic Date: Tue, 15 Sep 2020 15:18:25 -0500 Subject: [PATCH 2/2] Removed old permutations check --- ipwhois/asn.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ipwhois/asn.py b/ipwhois/asn.py index 60625e2..e2cda71 100644 --- a/ipwhois/asn.py +++ b/ipwhois/asn.py @@ -447,11 +447,6 @@ def lookup(self, inc_raw=False, retry_count=3, extra_org_map=None, dns_success = False for index, lookup_method in enumerate(lookups): - if index > 0 and not asn_methods: - - raise ASNRegistryError('ASN registry lookup failed. ' - 'Permutations not allowed.') - if lookup_method == 'dns': try: