diff --git a/llama-index-integrations/readers/llama-index-readers-web/llama_index/readers/web/trafilatura_web/base.py b/llama-index-integrations/readers/llama-index-readers-web/llama_index/readers/web/trafilatura_web/base.py index 99e7cfffac898..b800ca03b6313 100644 --- a/llama-index-integrations/readers/llama-index-readers-web/llama_index/readers/web/trafilatura_web/base.py +++ b/llama-index-integrations/readers/llama-index-readers-web/llama_index/readers/web/trafilatura_web/base.py @@ -29,6 +29,7 @@ def load_data( include_formatting=False, include_links=False, show_progress=False, + no_ssl=False, **kwargs, ) -> List[Document]: """Load data from the urls. @@ -42,6 +43,7 @@ def load_data( include_formatting (bool, optional): Include formatting in the output. Defaults to False. include_links (bool, optional): Include links in the output. Defaults to False. show_progress (bool, optional): Show progress bar. Defaults to False + no_ssl (bool, optional): Bypass SSL verification. Defaults to False. kwargs: Additional keyword arguments for the `trafilatura.extract` function. Returns: @@ -61,7 +63,7 @@ def load_data( else: iterator = urls for url in iterator: - downloaded = trafilatura.fetch_url(url) + downloaded = trafilatura.fetch_url(url, no_ssl=no_ssl) response = trafilatura.extract( downloaded, include_comments=include_comments, diff --git a/llama-index-integrations/readers/llama-index-readers-web/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-web/pyproject.toml index 8268aa26bc4dc..e327e2d0f381f 100644 --- a/llama-index-integrations/readers/llama-index-readers-web/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-web/pyproject.toml @@ -44,7 +44,7 @@ license = "MIT" maintainers = ["HawkClaws", "Hironsan", "NA", "an-bluecat", "bborn", "jasonwcfan", "kravetsmic", "pandazki", "ruze00", "selamanse", "thejessezhang"] name = "llama-index-readers-web" readme = "README.md" -version = "0.1.18" +version = "0.1.19" [tool.poetry.dependencies] python = ">=3.8.1,<4.0"