Skip to content

Improve link-checking code and upgrade pytest #1216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ py==1.8.1;python_version<"3.5"
py==1.11.0;python_version>="3.5"
pytest==4.6.11;python_version<"3.5"
pytest==6.1.2;python_version>="3.5" and python_version<"3.6"
pytest==7.0.0;python_version>="3.6"
pytest==7.0.1;python_version>="3.6"
pytest-forked==1.3.0;python_version<"3.6"
pytest-forked==1.4.0;python_version>="3.6"
pytest-html==1.22.1;python_version<"3.6"
Expand Down Expand Up @@ -88,7 +88,7 @@ pygments==2.5.2;python_version<"3.5"
pygments==2.11.2;python_version>="3.5"
prompt-toolkit==1.0.18;python_version<"3.5"
prompt-toolkit==2.0.10;python_version>="3.5" and python_version<"3.6"
prompt-toolkit==3.0.27;python_version>="3.6"
prompt-toolkit==3.0.28;python_version>="3.6"
decorator==4.4.2;python_version<"3.5"
decorator==5.1.1;python_version>="3.5"
ipython==5.10.0;python_version<"3.5"
Expand Down
2 changes: 1 addition & 1 deletion seleniumbase/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# seleniumbase package
__version__ = "2.4.11"
__version__ = "2.4.12"
34 changes: 22 additions & 12 deletions seleniumbase/fixtures/base_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def test_anything(self):
from selenium.webdriver.remote.remote_connection import LOGGER
from seleniumbase import config as sb_config
from seleniumbase.config import settings
from seleniumbase.core import download_helper
from seleniumbase.core import log_helper
from seleniumbase.fixtures import constants
from seleniumbase.fixtures import css_to_xpath
Expand Down Expand Up @@ -4561,10 +4562,17 @@ def get_unique_links(self):
links = page_utils._get_unique_links(page_url, soup)
return links

def get_link_status_code(self, link, allow_redirects=False, timeout=5):
def get_link_status_code(
self,
link,
allow_redirects=False,
timeout=5,
verify=False,
):
"""Get the status code of a link.
If the timeout is set to less than 1, it becomes 1.
If the timeout is exceeded by requests.get(), it will return a 404.
If "verify" is False, will ignore certificate errors.
For a list of available status codes, see:
https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
"""
Expand All @@ -4573,7 +4581,10 @@ def get_link_status_code(self, link, allow_redirects=False, timeout=5):
if timeout < 1:
timeout = 1
status_code = page_utils._get_link_status_code(
link, allow_redirects=allow_redirects, timeout=timeout
link,
allow_redirects=allow_redirects,
timeout=timeout,
verify=verify,
)
return status_code

Expand Down Expand Up @@ -4604,10 +4615,12 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
links = []
for link in all_links:
if (
"javascript:" not in link
"data:" not in link
and "mailto:" not in link
and "data:" not in link
and "javascript:" not in link
and "://fonts.gstatic.com" not in link
and "://fonts.googleapis.com" not in link
and "://googleads.g.doubleclick.net" not in link
):
links.append(link)
if timeout:
Expand All @@ -4634,6 +4647,7 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
broken_links.append(link)
self.__requests_timeout = None # Reset the requests.get() timeout
if len(broken_links) > 0:
broken_links = sorted(broken_links)
bad_links_str = "\n".join(broken_links)
if len(broken_links) == 1:
self.fail("Broken link detected:\n%s" % bad_links_str)
Expand Down Expand Up @@ -4681,6 +4695,7 @@ def get_pdf_text(
wrap=False,
nav=False,
override=False,
caching=True,
):
"""Gets text from a PDF file.
PDF can be either a URL or a file path on the local file system.
Expand All @@ -4702,7 +4717,8 @@ def get_pdf_text(
(Not needed because the PDF will be downloaded anyway.)
override - If the PDF file to be downloaded already exists in the
downloaded_files/ folder, that PDF will be used
instead of downloading it again."""
instead of downloading it again.
caching - If resources should be cached via pdfminer."""
import warnings

with warnings.catch_warnings():
Expand All @@ -4716,8 +4732,6 @@ def get_pdf_text(
raise Exception("%s is not a PDF file! (Expecting a .pdf)" % pdf)
file_path = None
if page_utils.is_valid_url(pdf):
from seleniumbase.core import download_helper

downloads_folder = download_helper.get_downloads_folder()
if nav:
if self.get_current_url() != pdf:
Expand Down Expand Up @@ -4750,7 +4764,7 @@ def get_pdf_text(
password="",
page_numbers=page_search,
maxpages=maxpages,
caching=False,
caching=caching,
codec=codec,
)
pdf_text = self.__fix_unicode_conversion(pdf_text)
Expand Down Expand Up @@ -4996,8 +5010,6 @@ def get_downloads_folder(self):
any clicks that download files will also use this folder
rather than using the browser's default "downloads/" path."""
self.__check_scope()
from seleniumbase.core import download_helper

return download_helper.get_downloads_folder()

def get_browser_downloads_folder(self):
Expand All @@ -5020,8 +5032,6 @@ def get_browser_downloads_folder(self):
):
return os.path.join(os.path.expanduser("~"), "downloads")
else:
from seleniumbase.core import download_helper

return download_helper.get_downloads_folder()
return os.path.join(os.path.expanduser("~"), "downloads")

Expand Down
22 changes: 17 additions & 5 deletions seleniumbase/fixtures/page_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,19 +209,31 @@ def _get_unique_links(page_url, soup):
pass
unique_links.append(link)

return unique_links


def _get_link_status_code(link, allow_redirects=False, timeout=5):
links = unique_links
links = list(set(links)) # Make sure all duplicates were removed
links = sorted(links) # Sort all the links alphabetically
return links


def _get_link_status_code(
link,
allow_redirects=False,
timeout=5,
verify=False,
):
"""Get the status code of a link.
If the timeout is exceeded, will return a 404.
If "verify" is False, will ignore certificate errors.
For a list of available status codes, see:
https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
"""
status_code = None
try:
response = requests.get(
link, allow_redirects=allow_redirects, timeout=timeout
link,
allow_redirects=allow_redirects,
timeout=timeout,
verify=verify,
)
status_code = response.status_code
except Exception:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@
'py==1.11.0;python_version>="3.5"',
'pytest==4.6.11;python_version<"3.5"',
'pytest==6.1.2;python_version>="3.5" and python_version<"3.6"',
'pytest==7.0.0;python_version>="3.6"',
'pytest==7.0.1;python_version>="3.6"',
'pytest-forked==1.3.0;python_version<"3.6"',
'pytest-forked==1.4.0;python_version>="3.6"',
'pytest-html==1.22.1;python_version<"3.6"',
Expand Down Expand Up @@ -213,7 +213,7 @@
'pygments==2.11.2;python_version>="3.5"',
'prompt-toolkit==1.0.18;python_version<"3.5"',
'prompt-toolkit==2.0.10;python_version>="3.5" and python_version<"3.6"', # noqa: E501
'prompt-toolkit==3.0.27;python_version>="3.6"',
'prompt-toolkit==3.0.28;python_version>="3.6"',
'decorator==4.4.2;python_version<"3.5"',
'decorator==5.1.1;python_version>="3.5"',
'ipython==5.10.0;python_version<"3.5"',
Expand Down