Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 1 addition & 16 deletions scraper/src/config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@
"""

from collections import OrderedDict
from distutils.util import strtobool
import json
import os
import sys
import copy

from .config_validator import ConfigValidator
from .nb_hits_updater import NbHitsUpdater
from .urls_parser import UrlsParser
from .selectors_parser import SelectorsParser
from .browser_handler import BrowserHandler
Expand Down Expand Up @@ -46,7 +44,6 @@ class ConfigLoader:
strategy = 'default'
strict_redirect = True
strip_chars = u".,;:§¶"
update_nb_hits = None
use_anchors = False
user_agent = 'MeiliSearch docs-scraper'
only_content_level = False
Expand Down Expand Up @@ -111,9 +108,7 @@ def _parse(self):
# Parse Env
self.app_id = os.environ.get('MEILISEARCH_HOST_URL', None)
self.api_key = os.environ.get('MEILISEARCH_API_KEY', None)
self.update_nb_hits = os.environ.get('UPDATE_NB_HITS', None)
if self.update_nb_hits is not None:
self.update_nb_hits = bool(strtobool(self.update_nb_hits))

if self.index_uid_tmp is None:
self.index_uid_tmp = os.environ.get('index_uid_TMP', self.index_uid + '_tmp')

Expand All @@ -128,15 +123,5 @@ def _parse(self):
self.allowed_domains = UrlsParser.build_allowed_domains(
self.start_urls, self.stop_urls)

def update_nb_hits_value(self, nb_hits):
if self.config_file is not None:
# config loaded from file
previous_nb_hits = None if 'nb_hits' not in self.config_content else \
self.config_content['nb_hits']
nb_hit_updater = NbHitsUpdater(self.config_file,
self.config_content,
previous_nb_hits, nb_hits)
nb_hit_updater.update(self.update_nb_hits)

def get_extra_facets(self):
return UrlsParser.get_extra_facets(self.start_urls)
47 changes: 0 additions & 47 deletions scraper/src/config/nb_hits_updater.py

This file was deleted.

1 change: 0 additions & 1 deletion scraper/src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def run_config(config):
if DocumentationSpider.NB_INDEXED > 0:
# meilisearch_helper.commit_tmp_index()
print('Nb hits: {}'.format(DocumentationSpider.NB_INDEXED))
config.update_nb_hits_value(DocumentationSpider.NB_INDEXED)
else:
print('Crawling issue: nbHits 0 for ' + config.index_uid)
# meilisearch_helper.report_crawling_issue()
Expand Down