diff --git a/README.rst b/README.rst index a9fee30..21f7d8a 100755 --- a/README.rst +++ b/README.rst @@ -634,7 +634,7 @@ The following are the interfaces and instructions provided by the SDK: 1.dork_search(dork, page=0, resource="host", facets=None) search the data of the specified page according to dork - 2.multi_page_search(dork, page=1, resource="host", facets=None) + 2.multi_page_search(dork, page=1, start_page=1, resource="host", facets=None) search multiple pages of data according to dork 3.resources_info() get current user information diff --git a/docs/README_CN.md b/docs/README_CN.md index 4ce8217..d7e6c80 100755 --- a/docs/README_CN.md +++ b/docs/README_CN.md @@ -506,8 +506,8 @@ zm = ZoomEye(api_key="01234567-acbd-00000-1111-22222222222") 1.dork_search(dork, page=0, resource="host", facets=None) 根据 dork 搜索指定页的数据 - 2.multi_page_search(dork, page=1, resource="host", facets=None) - 根据 dork 搜索多页数据 + 2.multi_page_search(dork, page=1, start_page=1, resource="host", facets=None) + 根据 dork 搜索多页数据,支持从指定的页数开始下载 3.resources_info() 获取当前用户的信息 4.show_count() @@ -557,7 +557,7 @@ soft********11180040.b***c.net ['126.***.***.40'] {'product': [{'name': '', 'count': 28323128}, {'name': 'BusyBox telnetd', 'count': 10180912}, {'name': 'Linux telnetd', ...... ``` ->`multi_page_search()` 同样也可以进行搜索,当需要获取大量数据时使用该函数,其中 `page` 字段表示获取多少页的数据;而 `dork_search()` 仅获取指定页的数据。 +>`multi_page_search()` 同样也可以进行搜索,当需要获取大量数据时使用该函数,其中 `page` 字段表示获取多少页的数据, `start_page`字段表示从第几页开始获取;而 `dork_search()` 仅获取指定页的数据。 #### 5.数据筛选 在 SDK 中提供了 `dork_filter()` 函数,我们可以更加方便对数据进行筛选,提取指定的数据字段,如下: diff --git a/zoomeye/sdk.py b/zoomeye/sdk.py index 3a9d03b..5aebd0a 100644 --- a/zoomeye/sdk.py +++ b/zoomeye/sdk.py @@ -142,7 +142,8 @@ def _check_header(self): else: headers = {} # add user agent - headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36" + headers[ + "User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36" return headers def dork_search(self, dork, page=0, resource="host", facets=None): @@ -178,8 +179,8 @@ def dork_search(self, dork, page=0, resource="host", facets=None): return result - def multi_page_search(self, dork, page=1, resource="host", - facets=None) -> list: + def multi_page_search(self, dork, page=1, start_page=1, resource="host", + facets=None) -> (list, int, str): """ mainly used to search dork data from zoomeye data. please see: https://www.zoomeye.org/doc#host-search and @@ -189,6 +190,8 @@ def multi_page_search(self, dork, page=1, resource="host", dork to search :param page: int, specify the number of pages to return data, each page contains 20 data + :param start_page: int, + specify the number of start page to search :param resource: str, host search or web search :param facets: list or tuple @@ -207,12 +210,20 @@ def multi_page_search(self, dork, page=1, resource="host", dork_data = [] all_data = [] - for i in range(page): + is_search_done = "done" + for i in range(start_page - 1, page): + print("downloading contents from page{}".format(i+1)) if isinstance(facets, (tuple, list)): facets = ','.join(facets) params = {'query': dork, 'page': i + 1, 'facets': facets} - result = self._request(search_api, params=params, headers=headers) + try: + result = self._request(search_api, params=params, headers=headers) + except Exception as e: + # return the processed data + self.data_list = dork_data + self.raw_data = all_data + return dork_data, i, "search failed, the log as {}".format(e) if result and "matches" in result: self.total = result.get("total") all_data.append(result) @@ -231,7 +242,7 @@ def multi_page_search(self, dork, page=1, resource="host", # i added it to a list for easy viewing of each piece of data self.raw_data = all_data # return processed data - return dork_data + return dork_data, page, is_search_done def resources_info(self) -> dict: """ @@ -357,7 +368,6 @@ def generate_dot(self, q, source=0, page=1): return True, "successful! saving in {}".format(os.getcwd()) - def show_site_ip(data): """ show web search