diff --git a/CHANGELOG.md b/CHANGELOG.md index cfdf64e..7bf08c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ Release History -=============== +================ + +1.1.7 (2025-11-23) +------------------ +**Improvements:** + +- Optimized logging. +- Fixed cookie response handling. + +1.1.6 (2025-10-14) +------------------ +**Enhancements:** +This pull request introduces two major enhancements that significantly improve the library's anti‑detection capabilities and overall robustness: + +**A Smart Rotator System** +- Automatically rotates proxies, headers, and TLS identifiers to mimic authentic traffic. +- Introduced three new rotator classes: `ProxyRotator`, `HeaderRotator`, and `TLSIdentifierRotator`. +- Client and AsyncClient now enable header and TLS identifier rotation by default, using built‑in realistic templates. +- Unified parameters accept a single value, a list, or a pre‑configured Rotator instance. +- Proxy feedback loop (`mark_result`/`amark_result`) optimizes weighted rotation strategy. + +**Robust Library Management** +- Dependency‑free, self‑managing mechanism for the core `tls-client` C library lifecycle. +- Removed `requests` and `tqdm`; now uses built‑in `urllib` and [json](cci:1://file:///Users/twofarm/Desktop/works/tls_requests/tls_requests/models/response.py:204:4-205:43). +- TLSLibrary is version‑aware, automatically downloading the correct version from GitHub when needed. +- Automatic cleanup of old library files after successful updates. 1.0.7 (2024-12-14) ------------------- diff --git a/docs/index.md b/docs/index.md index 068f638..c34de97 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,6 +2,13 @@ **A powerful and lightweight Python library for making secure and reliable HTTP/TLS fingerprint requests.** * * * +## Table of Contents + +- [Installation](#installation) +- [Quick Start](#quick-start) +- [Key Benefits](#key-benefits) +- [Cookie Management](#cookie-management) +- [Documentation](#documentation) **Installation** ---------------- @@ -20,6 +27,17 @@ pip install wrapper-tls-requests pip install git+https://github.com/thewebscraping/tls-requests.git ``` +> **Note**: After installation you can update the TLS library manually using: +> ```bash +> python -m tls_requests.models.libraries +> ``` +> +> **Logging**: The library now uses the standard `logging` module. Configure it in your application, e.g.: +> ```python +> import logging +> logging.basicConfig(level=logging.INFO) +> ``` + ### Quick Start Start using TLS Requests with just a few lines of code: diff --git a/docs/quickstart.md b/docs/quickstart.md index bdc36a8..77d2a87 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -11,6 +11,8 @@ Begin by importing the library: ```pycon >>> import tls_requests +>>> import logging +>>> logging.basicConfig(level=logging.INFO) ``` Making HTTP Requests @@ -24,6 +26,7 @@ Fetch a webpage using a GET request: >>> r = tls_requests.get('https://httpbin.org/get') >>> r +>>> # Cookies now have proper domain backfilled from request URL ``` ### POST Request diff --git a/mkdocs.yml b/mkdocs.yml index 80dcce4..189f3e3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,5 +43,5 @@ markdown_extensions: css_class: highlight - mkautodoc -extra_css: - - css/custom.css +# extra_css: # custom CSS removed because file is missing +# - css/custom.css diff --git a/tls_requests/__version__.py b/tls_requests/__version__.py index 5d7a62e..29eb95a 100644 --- a/tls_requests/__version__.py +++ b/tls_requests/__version__.py @@ -5,5 +5,5 @@ __url__ = "https://github.com/thewebscraping/tls-requests" __author__ = "Tu Pham" __author_email__ = "thetwofarm@gmail.com" -__version__ = "1.1.6" +__version__ = "1.1.7" __license__ = "MIT" diff --git a/tls_requests/models/cookies.py b/tls_requests/models/cookies.py index 061eacd..f8401db 100644 --- a/tls_requests/models/cookies.py +++ b/tls_requests/models/cookies.py @@ -561,7 +561,7 @@ def set(self, name, value, **kwargs) -> Optional[Cookie]: self.cookiejar.set_cookie(cookie) return cookie - def get(self, name, default=None, domain="", path="/") -> str: + def get(self, name, default=None, domain=None, path=None) -> str: return self.cookiejar.get(name, default, domain, path) def delete(self, name: str, domain: str = None, path: str = None) -> None: diff --git a/tls_requests/models/libraries.py b/tls_requests/models/libraries.py index c214890..ad0c1d4 100644 --- a/tls_requests/models/libraries.py +++ b/tls_requests/models/libraries.py @@ -12,6 +12,10 @@ from platform import machine from typing import List, Optional, Tuple +from tls_requests.utils import get_logger + +logger = get_logger("TLSLibrary") + __all__ = ["TLSLibrary"] LATEST_VERSION_TAG_NAME = "v1.11.2" @@ -60,7 +64,6 @@ PATTERN_RE = re.compile(r"%s-%s.*%s" % (PLATFORM, MACHINE, FILE_EXT), re.I) PATTERN_UBUNTU_RE = re.compile(r"%s-%s.*%s" % ("ubuntu", MACHINE, FILE_EXT), re.I) - TLS_LIBRARY_PATH = os.getenv("TLS_LIBRARY_PATH") @@ -131,6 +134,7 @@ class TLSLibrary: """ _PATH: str = None + _LIBRARY: Optional[ctypes.CDLL] = None _STATIC_API_DATA = { "name": "v1.11.2", "tag_name": "v1.11.2", @@ -250,9 +254,9 @@ def cleanup_files(cls, keep_file: str = None): if is_remove: try: os.remove(file_path) - print(f"Removed old library file: {file_path}") + logger.info(f"Removed old library file: {file_path}") except OSError as e: - print(f"Error removing old library file {file_path}: {e}") + logger.error(f"Error removing old library file {file_path}: {e}") @classmethod def fetch_api(cls, version: str = None, retries: int = 3): @@ -280,7 +284,7 @@ def _find_release(data, version_: str = None): _find_release(json.loads(content)) break except Exception as ex: - print("Unable to fetch GitHub API: %s" % ex) + logger.error("Unable to fetch GitHub API: %s" % ex) if not asset_urls and not ubuntu_urls: _find_release([cls._STATIC_API_DATA]) @@ -302,10 +306,23 @@ def find(cls) -> str: def find_all(cls) -> List[str]: return [src for src in glob.glob(os.path.join(BIN_DIR, r"*")) if src.lower().endswith(("so", "dll", "dylib"))] + @classmethod + def update(cls): + """Forces a download of the latest library version.""" + logger.info(f"Updating TLS library to version {LATEST_VERSION_TAG_NAME}...") + downloaded_fp = cls.download(version=LATEST_VERSION_TAG_NAME) + if downloaded_fp: + cls.cleanup_files(keep_file=downloaded_fp) + logger.info("Update complete.") + return downloaded_fp + logger.error("Update failed.") + + upgrade = update + @classmethod def download(cls, version: str = None) -> str: try: - print( + logger.info( "System Info - Platform: %s, Machine: %s, File Ext : %s." % ( PLATFORM, @@ -319,7 +336,7 @@ def download(cls, version: str = None) -> str: download_url = url break - print("Library Download URL: %s" % download_url) + logger.info("Library Download URL: %s" % download_url) if download_url: destination_name = download_url.split("/")[-1] destination = os.path.join(BIN_DIR, destination_name) @@ -352,13 +369,12 @@ def download(cls, version: str = None) -> str: sys.stdout.write(f"\rDownloading {destination_name}: [{bar}] {percent:.1f}%") sys.stdout.flush() - print() # Newline after download completes return destination except (urllib.error.URLError, urllib.error.HTTPError) as ex: - print("Unable to download file: %s" % ex) + logger.error("Unable to download file: %s" % ex) except Exception as e: - print("An unexpected error occurred during download: %s" % e) + logger.error("An unexpected error occurred during download: %s" % e) @classmethod def set_path(cls, fp: str): @@ -370,22 +386,32 @@ def load(cls): Loads the TLS library. It checks for the correct version, downloads it if the local version is outdated or missing, and then loads it into memory. """ + target_version = cls._parse_version(LATEST_VERSION_TAG_NAME) + + if cls._LIBRARY and cls._PATH: + cached_version = cls._parse_version_from_filename(cls._PATH) + if cached_version == target_version: + return cls._LIBRARY def _load_library(fp_): try: lib = ctypes.cdll.LoadLibrary(fp_) cls.set_path(fp_) - print(f"Successfully loaded TLS library: {fp_}") + cls._LIBRARY = lib + logger.info(f"Successfully loaded TLS library: {fp_}") return lib except Exception as ex: - print(f"Unable to load TLS library '{fp_}', details: {ex}") + logger.error(f"Unable to load TLS library '{fp_}', details: {ex}") try: os.remove(fp_) except (FileNotFoundError, PermissionError): pass - target_version = cls._parse_version(LATEST_VERSION_TAG_NAME) - print(f"Required library version: {LATEST_VERSION_TAG_NAME}") + if TLS_LIBRARY_PATH: + logger.info(f"Loading TLS library from environment variable: {TLS_LIBRARY_PATH}") + return _load_library(TLS_LIBRARY_PATH) + + logger.debug(f"Required library version: {LATEST_VERSION_TAG_NAME}") local_files = cls.find_all() newest_local_version = (0, 0, 0) newest_local_file = None @@ -396,17 +422,21 @@ def _load_library(fp_): if file_version > newest_local_version: newest_local_version = file_version newest_local_file = file_path - print( + logger.debug( f"Found newest local library: {newest_local_file} (version {'.'.join(map(str, newest_local_version))})" ) else: - print("No local library found.") + logger.debug("No local library found.") if newest_local_version < target_version: if newest_local_file: - print(f"Local library is outdated. Upgrading to {LATEST_VERSION_TAG_NAME}...") + logger.warning( + f"Local library is outdated (Found: {'.'.join(map(str, newest_local_version))}, " + f"Required: {LATEST_VERSION_TAG_NAME}). " + f"Auto-downloading... To manually upgrade, run: `python -m tls_requests.models.libraries`" + ) else: - print(f"Downloading required library version {LATEST_VERSION_TAG_NAME}...") + logger.info(f"Downloading required library version {LATEST_VERSION_TAG_NAME}...") downloaded_fp = cls.download(version=LATEST_VERSION_TAG_NAME) if downloaded_fp: @@ -414,6 +444,11 @@ def _load_library(fp_): library = _load_library(downloaded_fp) if library: return library + + logger.error( + f"Failed to download the required TLS library {LATEST_VERSION_TAG_NAME}. " + "Please check your connection or download it manually from GitHub." + ) raise OSError("Failed to download the required TLS library.") if newest_local_file: @@ -423,3 +458,7 @@ def _load_library(fp_): return library raise OSError("Could not find or load a compatible TLS library.") + + +if __name__ == "__main__": + TLSLibrary.update() diff --git a/tls_requests/models/response.py b/tls_requests/models/response.py index e164c8b..eaacf02 100644 --- a/tls_requests/models/response.py +++ b/tls_requests/models/response.py @@ -98,9 +98,14 @@ def http_version(self) -> str: @property def cookies(self) -> Cookies: - if self._cookies is None: - self._cookies = Cookies() - self._cookies.extract_cookies(self, self.request) + if self._request: + # Fix missing domain in cookies by backfilling from request URL + # Ref: https://github.com/thewebscraping/tls-requests/issues/47 + for cookie in self._cookies.cookiejar: + if not cookie.domain: + cookie.domain = self._request.url.host + cookie.domain_specified = False + cookie.domain_initial_dot = False return self._cookies @property diff --git a/tls_requests/utils.py b/tls_requests/utils.py index 1be6af6..21e2d9c 100644 --- a/tls_requests/utils.py +++ b/tls_requests/utils.py @@ -5,8 +5,8 @@ import logging from typing import Any, AnyStr, Union -FORMAT = "%(levelname)s:%(asctime)s:%(name)s:%(funcName)s:%(lineno)d >>> %(message)s" -DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ" +FORMAT = "[%(asctime)s] %(levelname)-8s %(name)s:%(funcName)s:%(lineno)d - %(message)s" +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" def import_module(name: Union[str, list[str]]): @@ -30,10 +30,18 @@ def import_module(name: Union[str, list[str]]): jsonlib = json -def get_logger(name: str = "TLSRequests", level: int | str = logging.INFO) -> logging.Logger: - logging.basicConfig(format=FORMAT, datefmt=DATE_FORMAT, level=level) +def get_logger( + name: str = "TLSRequests", level: int | str = logging.INFO +) -> logging.Logger: logger = logging.getLogger(name) logger.setLevel(level) + + if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter(FORMAT, datefmt=DATE_FORMAT) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger