seleniumbase · mdmintz · Feb 11, 2022 · Feb 11, 2022 · Feb 11, 2022 · Feb 11, 2022
diff --git a/requirements.txt b/requirements.txt
@@ -60,7 +60,7 @@ py==1.8.1;python_version<"3.5"
 py==1.11.0;python_version>="3.5"
 pytest==4.6.11;python_version<"3.5"
 pytest==6.1.2;python_version>="3.5" and python_version<"3.6"
-pytest==7.0.0;python_version>="3.6"
+pytest==7.0.1;python_version>="3.6"
 pytest-forked==1.3.0;python_version<"3.6"
 pytest-forked==1.4.0;python_version>="3.6"
 pytest-html==1.22.1;python_version<"3.6"
@@ -88,7 +88,7 @@ pygments==2.5.2;python_version<"3.5"
 pygments==2.11.2;python_version>="3.5"
 prompt-toolkit==1.0.18;python_version<"3.5"
 prompt-toolkit==2.0.10;python_version>="3.5" and python_version<"3.6"
-prompt-toolkit==3.0.27;python_version>="3.6"
+prompt-toolkit==3.0.28;python_version>="3.6"
 decorator==4.4.2;python_version<"3.5"
 decorator==5.1.1;python_version>="3.5"
 ipython==5.10.0;python_version<"3.5"

diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py
@@ -1,2 +1,2 @@
 # seleniumbase package
-__version__ = "2.4.11"
+__version__ = "2.4.12"
diff --git a/seleniumbase/fixtures/base_case.py b/seleniumbase/fixtures/base_case.py
@@ -54,6 +54,7 @@ def test_anything(self):
 from selenium.webdriver.remote.remote_connection import LOGGER
 from seleniumbase import config as sb_config
 from seleniumbase.config import settings
+from seleniumbase.core import download_helper
 from seleniumbase.core import log_helper
 from seleniumbase.fixtures import constants
 from seleniumbase.fixtures import css_to_xpath
@@ -4561,10 +4562,17 @@ def get_unique_links(self):
         links = page_utils._get_unique_links(page_url, soup)
         return links
 
-    def get_link_status_code(self, link, allow_redirects=False, timeout=5):
+    def get_link_status_code(
+        self,
+        link,
+        allow_redirects=False,
+        timeout=5,
+        verify=False,
+    ):
         """Get the status code of a link.
         If the timeout is set to less than 1, it becomes 1.
         If the timeout is exceeded by requests.get(), it will return a 404.
+        If "verify" is False, will ignore certificate errors.
         For a list of available status codes, see:
         https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
         """
@@ -4573,7 +4581,10 @@ def get_link_status_code(self, link, allow_redirects=False, timeout=5):
         if timeout < 1:
             timeout = 1
         status_code = page_utils._get_link_status_code(
-            link, allow_redirects=allow_redirects, timeout=timeout
+            link,
+            allow_redirects=allow_redirects,
+            timeout=timeout,
+            verify=verify,
         )
         return status_code
 
@@ -4604,10 +4615,12 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
         links = []
         for link in all_links:
             if (
-                "javascript:" not in link
+                "data:" not in link
                 and "mailto:" not in link
-                and "data:" not in link
+                and "javascript:" not in link
                 and "://fonts.gstatic.com" not in link
+                and "://fonts.googleapis.com" not in link
+                and "://googleads.g.doubleclick.net" not in link
             ):
                 links.append(link)
         if timeout:
@@ -4634,6 +4647,7 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
                     broken_links.append(link)
         self.__requests_timeout = None  # Reset the requests.get() timeout
         if len(broken_links) > 0:
+            broken_links = sorted(broken_links)
             bad_links_str = "\n".join(broken_links)
             if len(broken_links) == 1:
                 self.fail("Broken link detected:\n%s" % bad_links_str)
@@ -4681,6 +4695,7 @@ def get_pdf_text(
         wrap=False,
         nav=False,
         override=False,
+        caching=True,
     ):
         """Gets text from a PDF file.
         PDF can be either a URL or a file path on the local file system.
@@ -4702,7 +4717,8 @@ def get_pdf_text(
               (Not needed because the PDF will be downloaded anyway.)
         override - If the PDF file to be downloaded already exists in the
                    downloaded_files/ folder, that PDF will be used
-                   instead of downloading it again."""
+                   instead of downloading it again.
+        caching - If resources should be cached via pdfminer."""
         import warnings
 
         with warnings.catch_warnings():
@@ -4716,8 +4732,6 @@ def get_pdf_text(
             raise Exception("%s is not a PDF file! (Expecting a .pdf)" % pdf)
         file_path = None
         if page_utils.is_valid_url(pdf):
-            from seleniumbase.core import download_helper
-
             downloads_folder = download_helper.get_downloads_folder()
             if nav:
                 if self.get_current_url() != pdf:
@@ -4750,7 +4764,7 @@ def get_pdf_text(
             password="",
             page_numbers=page_search,
             maxpages=maxpages,
-            caching=False,
+            caching=caching,
             codec=codec,
         )
         pdf_text = self.__fix_unicode_conversion(pdf_text)
@@ -4996,8 +5010,6 @@ def get_downloads_folder(self):
           any clicks that download files will also use this folder
           rather than using the browser's default "downloads/" path."""
         self.__check_scope()
-        from seleniumbase.core import download_helper
-
         return download_helper.get_downloads_folder()
 
     def get_browser_downloads_folder(self):
@@ -5020,8 +5032,6 @@ def get_browser_downloads_folder(self):
         ):
             return os.path.join(os.path.expanduser("~"), "downloads")
         else:
-            from seleniumbase.core import download_helper
-
             return download_helper.get_downloads_folder()
         return os.path.join(os.path.expanduser("~"), "downloads")
 

diff --git a/seleniumbase/fixtures/page_utils.py b/seleniumbase/fixtures/page_utils.py
@@ -209,19 +209,31 @@ def _get_unique_links(page_url, soup):
                 pass
             unique_links.append(link)
 
-    return unique_links
-
-
-def _get_link_status_code(link, allow_redirects=False, timeout=5):
+    links = unique_links
+    links = list(set(links))  # Make sure all duplicates were removed
+    links = sorted(links)  # Sort all the links alphabetically
+    return links
+
+
+def _get_link_status_code(
+    link,
+    allow_redirects=False,
+    timeout=5,
+    verify=False,
+):
     """Get the status code of a link.
     If the timeout is exceeded, will return a 404.
+    If "verify" is False, will ignore certificate errors.
     For a list of available status codes, see:
     https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
     """
     status_code = None
     try:
         response = requests.get(
-            link, allow_redirects=allow_redirects, timeout=timeout
+            link,
+            allow_redirects=allow_redirects,
+            timeout=timeout,
+            verify=verify,
         )
         status_code = response.status_code
     except Exception:

diff --git a/setup.py b/setup.py
@@ -185,7 +185,7 @@
         'py==1.11.0;python_version>="3.5"',
         'pytest==4.6.11;python_version<"3.5"',
         'pytest==6.1.2;python_version>="3.5" and python_version<"3.6"',
-        'pytest==7.0.0;python_version>="3.6"',
+        'pytest==7.0.1;python_version>="3.6"',
         'pytest-forked==1.3.0;python_version<"3.6"',
         'pytest-forked==1.4.0;python_version>="3.6"',
         'pytest-html==1.22.1;python_version<"3.6"',
@@ -213,7 +213,7 @@
         'pygments==2.11.2;python_version>="3.5"',
         'prompt-toolkit==1.0.18;python_version<"3.5"',
         'prompt-toolkit==2.0.10;python_version>="3.5" and python_version<"3.6"',  # noqa: E501
-        'prompt-toolkit==3.0.27;python_version>="3.6"',
+        'prompt-toolkit==3.0.28;python_version>="3.6"',
         'decorator==4.4.2;python_version<"3.5"',
         'decorator==5.1.1;python_version>="3.5"',
         'ipython==5.10.0;python_version<"3.5"',