Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ meilisearch = "==0.12.3"
requests-iap = "==0.2.0"

[dev-packages]
pylint = "==2.5.3"
pylint = "==2.6.0"

[requires]
python_version = "3.8"
132 changes: 47 additions & 85 deletions Pipfile.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions scraper/src/config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def _load_config(self, config):
self.config_content = copy.deepcopy(data)

return data
except ValueError:
raise ValueError('CONFIG is not a valid JSON')
except ValueError as value_error:
raise ValueError('CONFIG is not a valid JSON') from value_error
sys.exit(EXIT_CODE_WRONG_CONFIG)

def _parse(self):
Expand Down
2 changes: 1 addition & 1 deletion scraper/src/custom_dupefilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def custom_request_fingerprint(self, request, include_headers=None,
return cache[include_headers]

def __init__(self, path=None, debug=False, use_anchors=False):
super(CustomDupeFilter, self).__init__(path=path, debug=debug)
super().__init__(path=path, debug=debug)
# Spread config bool
self.use_anchors = use_anchors
self.fingerprints_with_scheme = set() # This set will not be scheme agnostic
Expand Down
4 changes: 2 additions & 2 deletions scraper/src/documentation_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __init__(self, config, meilisearch_helper, strategy, *args, **kwargs):
self.remove_get_params = config.remove_get_params
self.strict_redirect = config.strict_redirect
self.nb_hits_max = config.nb_hits_max
super(DocumentationSpider, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)

# Get rid of scheme consideration
# Start_urls must stays authentic URL in order to be reached, we build agnostic scheme regex based on those URL
Expand Down Expand Up @@ -120,7 +120,7 @@ def __init__(self, config, meilisearch_helper, strategy, *args, **kwargs):
self.force_sitemap_urls_crawling = config.force_sitemap_urls_crawling

# END _init_ part from SitemapSpider
super(DocumentationSpider, self)._compile_rules()
super()._compile_rules()

def start_requests(self):
# We crawl according to the sitemap
Expand Down
4 changes: 2 additions & 2 deletions scraper/src/js_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ def execute(self, url, js):
try:
parsed_result = json.loads(result)
return parsed_result
except ValueError:
raise ValueError('CONFIG is not a valid JSON')
except ValueError as value_error:
raise ValueError('CONFIG is not a valid JSON') from value_error
2 changes: 1 addition & 1 deletion scraper/src/strategies/default_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DefaultStrategy(AbstractStrategy):
dom = None

def __init__(self, config):
super(DefaultStrategy, self).__init__(config)
super().__init__(config)
self.levels = ['lvl0', 'lvl1', 'lvl2', 'lvl3', 'lvl4', 'lvl5', 'lvl6']
self.global_content = {}
self.page_rank = {}
Expand Down