Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ meilisearch = "==0.11.0"
requests-iap = "==0.2.0"

[dev-packages]
pylint = "==2.3.1"
pylint = "==2.5.3"

[requires]
python_version = "3.6"
15 changes: 11 additions & 4 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion scraper/src/config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from distutils.util import strtobool
import json
import os
import sys
import copy

from .config_validator import ConfigValidator
Expand Down Expand Up @@ -104,7 +105,7 @@ def _load_config(self, config):
return data
except ValueError:
raise ValueError('CONFIG is not a valid JSON')
exit(EXIT_CODE_WRONG_CONFIG)
sys.exit(EXIT_CODE_WRONG_CONFIG)

def _parse(self):
# Parse Env
Expand Down
3 changes: 2 additions & 1 deletion scraper/src/documentation_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from scrapy.spiders.sitemap import regex
import re
import os
import sys

# End of import for the sitemap behavior

Expand Down Expand Up @@ -156,7 +157,7 @@ def add_records(self, response, from_sitemap):
self.reason_to_stop = "Too much hits, Docs-Scraper only handle {} records".format(
int(self.nb_hits_max))
raise ValueError(self.reason_to_stop)
exit(EXIT_CODE_EXCEEDED_RECORDS)
sys.exit(EXIT_CODE_EXCEEDED_RECORDS)

def parse_from_sitemap(self, response):
if self.reason_to_stop is not None:
Expand Down
4 changes: 2 additions & 2 deletions scraper/src/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import unicodedata
from builtins import input
from cssselect import HTMLTranslator
import json


def confirm(message="Confirm"):
Expand Down Expand Up @@ -29,7 +30,6 @@ def is_number(s):
pass

try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
Expand Down
3 changes: 2 additions & 1 deletion scraper/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Docs-scraper main entry point
"""
import os
import sys
import json
import requests
from requests_iap import IAPAuth
Expand Down Expand Up @@ -109,7 +110,7 @@ def run_config(config):
else:
print('Crawling issue: nbHits 0 for ' + config.index_uid)
# meilisearch_helper.report_crawling_issue()
exit(EXIT_CODE_NO_RECORD)
sys.exit(EXIT_CODE_NO_RECORD)
print("")


Expand Down
3 changes: 2 additions & 1 deletion scraper/src/strategies/default_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ..helpers import to_json
import json
import hashlib
import sys


class DefaultStrategy(AbstractStrategy):
Expand Down Expand Up @@ -63,7 +64,7 @@ def _update_record_with_global_content(self, record, levels):
def get_records_from_dom(self, current_page_url=None):

if self.dom is None:
exit('DefaultStrategy.dom is not defined')
sys.exit('DefaultStrategy.dom is not defined')

# Reset it to be able to have a clean instance when testing
self.global_content = {}
Expand Down
5 changes: 1 addition & 4 deletions scraper/src/tests/config_loader/get_extra_facets_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# coding: utf-8
from ...config.config_loader import ConfigLoader
from .abstract import config

from .mocked_init import MockedInit

class TestGetExtraFacets:
def test_extra_facets_should_be_empty_by_default(self):
Expand All @@ -13,7 +13,6 @@ def test_extra_facets_should_be_empty_by_default(self):

def test_extra_facets_should_be_set_from_start_urls_variables_browser(self,
monkeypatch):
from .mocked_init import MockedInit
monkeypatch.setattr("selenium.webdriver.chrome",
lambda x: MockedInit())
monkeypatch.setattr("time.sleep", lambda x: "")
Expand All @@ -37,7 +36,6 @@ def test_extra_facets_should_be_set_from_start_urls_variables_browser(self,

def test_extra_facets_should_be_set_from_start_urls_variables_with_two_start_url_browser(
self, monkeypatch):
from .mocked_init import MockedInit
monkeypatch.setattr("selenium.webdriver.chrome",
lambda x: MockedInit())
monkeypatch.setattr("time.sleep", lambda x: "")
Expand Down Expand Up @@ -67,7 +65,6 @@ def test_extra_facets_should_be_set_from_start_urls_variables_with_two_start_url

def test_extra_facets_should_be_set_from_start_urls_variables_with_multiple_tags_browser(
self, monkeypatch):
from .mocked_init import MockedInit
monkeypatch.setattr("selenium.webdriver.chrome",
lambda x: MockedInit())
monkeypatch.setattr("time.sleep", lambda x: "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from ...config.config_loader import ConfigLoader
from ...config.browser_handler import BrowserHandler
from .abstract import config
from .mocked_init import MockedInit


class TestOpenSeleniumBrowser:
Expand All @@ -14,7 +15,6 @@ def test_browser_not_needed_by_default(self):
actual.js_render) is False

def test_browser_needed_when_js_render_true(self, monkeypatch):
from .mocked_init import MockedInit
monkeypatch.setattr("selenium.webdriver.chrome",
lambda x: MockedInit())
monkeypatch.setattr("time.sleep", lambda x: "")
Expand All @@ -30,7 +30,6 @@ def test_browser_needed_when_js_render_true(self, monkeypatch):

def test_browser_needed_when_config_contains_automatic_tag(self,
monkeypatch):
from .mocked_init import MockedInit
monkeypatch.setattr("selenium.webdriver.chrome",
lambda x: MockedInit())
monkeypatch.setattr("time.sleep", lambda x: "")
Expand Down
2 changes: 1 addition & 1 deletion scraper/src/tests/config_loader/start_urls_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from ...config.config_loader import ConfigLoader
from .abstract import config
from .mocked_init import MockedInit


class TestStartUrls:
Expand Down Expand Up @@ -70,7 +71,6 @@ def test_start_url_should_be_transform_to_object_if_string(self):

def test_start_urls_should_be_generated_when_there_is_automatic_tagging_browser(
self, monkeypatch):
from .mocked_init import MockedInit
monkeypatch.setattr("selenium.webdriver.chrome",
lambda x: MockedInit())
monkeypatch.setattr("time.sleep", lambda x: "")
Expand Down