From accc481d7f7bc94d6b36be85326eaed301113ae1 Mon Sep 17 00:00:00 2001 From: Cursor Date: Sat, 31 Jan 2026 00:03:59 +0000 Subject: [PATCH 1/2] Add cloudscraper extension for proxy header support - Add ProxyCloudScraper class extending cloudscraper.CloudScraper - Add CipherSuiteProxyHeaderAdapter combining TLS and proxy header support - Preserve all CloudScraper features (Cloudflare bypass, stealth mode, etc.) - Add create_scraper() as drop-in replacement for cloudscraper.create_scraper() - Add documentation in docs/cloudscraper.rst --- docs/cloudscraper.rst | 126 ++++++++++++ docs/index.rst | 2 + python_proxy_headers/cloudscraper_proxy.py | 213 +++++++++++++++++++++ 3 files changed, 341 insertions(+) create mode 100644 docs/cloudscraper.rst create mode 100644 python_proxy_headers/cloudscraper_proxy.py diff --git a/docs/cloudscraper.rst b/docs/cloudscraper.rst new file mode 100644 index 0000000..0a95137 --- /dev/null +++ b/docs/cloudscraper.rst @@ -0,0 +1,126 @@ +CloudScraper +============ + +The ``cloudscraper_proxy`` module provides proxy header support for CloudScraper. + +Installation +------------ + +First, install CloudScraper:: + + pip install cloudscraper + +Then you can use the proxy header extension. + +Usage +----- + +Using create_scraper() +~~~~~~~~~~~~~~~~~~~~~~ + +The ``create_scraper()`` function is a drop-in replacement for ``cloudscraper.create_scraper()`` +that adds proxy header capabilities: + +.. code-block:: python + + from python_proxy_headers.cloudscraper_proxy import create_scraper + + # Create a scraper with proxy headers + scraper = create_scraper( + proxy_headers={'X-ProxyMesh-Country': 'US'}, + browser='chrome' + ) + + # Set proxy + scraper.proxies = {'https': 'http://user:pass@proxy.example.com:8080'} + + # Make requests - proxy headers are automatically sent + response = scraper.get('https://httpbin.org/ip') + print(response.text) + +Using ProxyCloudScraper Class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also use the ``ProxyCloudScraper`` class directly: + +.. code-block:: python + + from python_proxy_headers.cloudscraper_proxy import ProxyCloudScraper + + scraper = ProxyCloudScraper( + proxy_headers={'X-Custom-Header': 'value'}, + enable_stealth=True + ) + + scraper.proxies = {'https': 'http://proxy.example.com:8080'} + response = scraper.get('https://example.com') + +Updating Proxy Headers +~~~~~~~~~~~~~~~~~~~~~~ + +You can update proxy headers after creating the scraper: + +.. code-block:: python + + scraper = create_scraper(proxy_headers={'X-Header': 'initial'}) + + # Later, update headers + scraper.set_proxy_headers({'X-Header': 'updated', 'X-New': 'value'}) + +All CloudScraper Features Preserved +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The extension preserves all CloudScraper features: + +- Cloudflare bypass (v1, v2, v3, Turnstile) +- Browser emulation and user agent handling +- Cipher suite customization +- Proxy rotation +- Stealth mode +- Session management + +.. code-block:: python + + scraper = create_scraper( + proxy_headers={'X-ProxyMesh-Country': 'US'}, + browser='chrome', + enable_stealth=True, + stealth_options={ + 'min_delay': 1.0, + 'max_delay': 3.0, + 'human_like_delays': True + } + ) + +API Reference +------------- + +create_scraper() +~~~~~~~~~~~~~~~~ + +.. py:function:: create_scraper(proxy_headers=None, sess=None, **kwargs) + + Create a CloudScraper with proxy header support. + + :param proxy_headers: Dict of headers to send to proxy servers + :param sess: Existing session to copy attributes from + :param kwargs: All other arguments passed to CloudScraper + :returns: ProxyCloudScraper instance + +ProxyCloudScraper Class +~~~~~~~~~~~~~~~~~~~~~~~ + +.. py:class:: ProxyCloudScraper(proxy_headers=None, **kwargs) + + CloudScraper subclass with proxy header support. + + Inherits all methods and attributes from ``cloudscraper.CloudScraper``. + + :param proxy_headers: Dict of headers to send to proxy servers + :param kwargs: All other arguments passed to CloudScraper + + .. py:method:: set_proxy_headers(proxy_headers) + + Update the proxy headers and remount adapters. + + :param proxy_headers: New proxy headers to use diff --git a/docs/index.rst b/docs/index.rst index 8c97f64..e54a8b4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,7 @@ We currently provide extensions to the following packages: * :doc:`requests ` - Simple HTTP library for Python * :doc:`aiohttp ` - Async HTTP client/server framework * :doc:`httpx ` - Modern HTTP client library +* :doc:`cloudscraper ` - Cloudflare bypass library Purpose ------- @@ -50,6 +51,7 @@ Contents requests aiohttp httpx + cloudscraper Indices and tables ================== diff --git a/python_proxy_headers/cloudscraper_proxy.py b/python_proxy_headers/cloudscraper_proxy.py new file mode 100644 index 0000000..b2f547f --- /dev/null +++ b/python_proxy_headers/cloudscraper_proxy.py @@ -0,0 +1,213 @@ +""" +CloudScraper extension for sending and receiving proxy headers. + +This module provides a CloudScraper subclass that enables: +1. Sending custom headers to proxy servers during CONNECT +2. Capturing response headers from proxy servers + +Example usage: + from python_proxy_headers.cloudscraper_proxy import create_scraper + + scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'}) + scraper.proxies = {'https': 'http://proxy:8080'} + response = scraper.get('https://example.com') + + # Access proxy response headers (stored on the response object) + print(response.proxy_headers) +""" + +from typing import Dict, Optional, Any + +try: + import cloudscraper + from cloudscraper import CipherSuiteAdapter +except ImportError: + raise ImportError( + "cloudscraper is required for this module. " + "Install it with: pip install cloudscraper" + ) + +from .urllib3_proxy_manager import proxy_from_url + + +class CipherSuiteProxyHeaderAdapter(CipherSuiteAdapter): + """ + Combines CloudScraper's CipherSuiteAdapter with proxy header support. + + This adapter: + - Maintains CloudScraper's TLS/cipher suite customization + - Adds the ability to send custom headers to proxy servers + - Uses our custom ProxyManager that captures proxy response headers + """ + + def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs): + self._proxy_headers = proxy_headers or {} + super().__init__(**kwargs) + + def proxy_manager_for(self, proxy, **proxy_kwargs): + """ + Return a ProxyManager for the given proxy with custom header support. + + Overrides the default proxy_manager_for to use our custom ProxyManager + that supports sending and receiving proxy headers. + """ + if proxy in self.proxy_manager: + manager = self.proxy_manager[proxy] + elif proxy.lower().startswith("socks"): + # SOCKS proxies don't support custom headers + return super().proxy_manager_for(proxy, **proxy_kwargs) + else: + # Get standard proxy headers (e.g., Proxy-Authorization) + _proxy_headers = self.proxy_headers(proxy) + + # Merge with our custom proxy headers + if self._proxy_headers: + _proxy_headers.update(self._proxy_headers) + + # Pass SSL context if available + if hasattr(self, 'ssl_context') and self.ssl_context: + proxy_kwargs['ssl_context'] = self.ssl_context + + if hasattr(self, 'source_address') and self.source_address: + proxy_kwargs['source_address'] = self.source_address + + manager = self.proxy_manager[proxy] = proxy_from_url( + proxy, + proxy_headers=_proxy_headers, + num_pools=self._pool_connections, + maxsize=self._pool_maxsize, + block=self._pool_block, + **proxy_kwargs, + ) + + return manager + + +class ProxyCloudScraper(cloudscraper.CloudScraper): + """ + CloudScraper with proxy header support. + + This class extends CloudScraper to add the ability to: + - Send custom headers to proxy servers during CONNECT tunneling + - Receive and access headers from proxy server responses + + Args: + proxy_headers: Dict of headers to send to proxy servers + **kwargs: All other arguments passed to CloudScraper + + Example: + scraper = ProxyCloudScraper(proxy_headers={'X-ProxyMesh-Country': 'US'}) + scraper.proxies = {'https': 'http://proxy.example.com:8080'} + response = scraper.get('https://httpbin.org/ip') + print(response.proxy_headers) # Headers from proxy CONNECT response + """ + + def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs): + self._proxy_headers = proxy_headers or {} + + # Call parent init + super().__init__(**kwargs) + + # Replace the HTTPS adapter with our proxy-header-aware version + # We need to preserve the cipher suite settings from the parent + self.mount( + 'https://', + CipherSuiteProxyHeaderAdapter( + proxy_headers=self._proxy_headers, + cipherSuite=self.cipherSuite, + ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'), + server_hostname=getattr(self, 'server_hostname', None), + source_address=getattr(self, 'source_address', None), + ssl_context=getattr(self, 'ssl_context', None) + ) + ) + + # Also mount for HTTP (though proxy headers are mainly for HTTPS CONNECT) + self.mount( + 'http://', + CipherSuiteProxyHeaderAdapter( + proxy_headers=self._proxy_headers, + cipherSuite=self.cipherSuite, + ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'), + server_hostname=getattr(self, 'server_hostname', None), + source_address=getattr(self, 'source_address', None), + ssl_context=getattr(self, 'ssl_context', None) + ) + ) + + def set_proxy_headers(self, proxy_headers: Dict[str, str]): + """ + Update the proxy headers and remount adapters. + + Args: + proxy_headers: New proxy headers to use + """ + self._proxy_headers = proxy_headers + + # Remount adapters with new headers + self.mount( + 'https://', + CipherSuiteProxyHeaderAdapter( + proxy_headers=self._proxy_headers, + cipherSuite=self.cipherSuite, + ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'), + server_hostname=getattr(self, 'server_hostname', None), + source_address=getattr(self, 'source_address', None), + ssl_context=getattr(self, 'ssl_context', None) + ) + ) + self.mount( + 'http://', + CipherSuiteProxyHeaderAdapter( + proxy_headers=self._proxy_headers, + cipherSuite=self.cipherSuite, + ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'), + server_hostname=getattr(self, 'server_hostname', None), + source_address=getattr(self, 'source_address', None), + ssl_context=getattr(self, 'ssl_context', None) + ) + ) + + +def create_scraper( + proxy_headers: Optional[Dict[str, str]] = None, + sess: Optional[Any] = None, + **kwargs +) -> ProxyCloudScraper: + """ + Create a CloudScraper with proxy header support. + + This is a drop-in replacement for cloudscraper.create_scraper() that + adds proxy header capabilities. + + Args: + proxy_headers: Dict of headers to send to proxy servers + sess: Existing session to copy attributes from + **kwargs: All other arguments passed to CloudScraper + + Returns: + ProxyCloudScraper instance + + Example: + from python_proxy_headers.cloudscraper_proxy import create_scraper + + scraper = create_scraper( + proxy_headers={'X-ProxyMesh-Country': 'US'}, + browser='chrome' + ) + scraper.proxies = {'https': 'http://proxy:8080'} + response = scraper.get('https://example.com') + """ + scraper = ProxyCloudScraper(proxy_headers=proxy_headers, **kwargs) + + if sess: + for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']: + val = getattr(sess, attr, None) + if val is not None: + setattr(scraper, attr, val) + + return scraper + + +# Convenience alias +session = create_scraper From 2fdeb4825d0a08f544129eec4d978db62e24ad7a Mon Sep 17 00:00:00 2001 From: Cursor Date: Mon, 2 Feb 2026 15:55:28 +0000 Subject: [PATCH 2/2] Add cloudscraper test to test harness --- test_proxy_headers.py | 56 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test_proxy_headers.py b/test_proxy_headers.py index af99c8c..4276fb7 100755 --- a/test_proxy_headers.py +++ b/test_proxy_headers.py @@ -401,6 +401,61 @@ def test(self, config: TestConfig) -> TestResult: ) +# ============================================================================= +# cloudscraper Test +# ============================================================================= + +class CloudscraperTest(ModuleTest): + """Test for cloudscraper extension.""" + + name = "cloudscraper" + + def test(self, config: TestConfig) -> TestResult: + try: + from python_proxy_headers.cloudscraper_proxy import create_scraper + + # Create scraper with optional proxy headers to send + scraper = create_scraper(proxy_headers=config.proxy_headers_to_send or None) + scraper.proxies = { + 'http': config.proxy_url, + 'https': config.proxy_url + } + + # Make request + response = scraper.get(config.test_url) + + # Check for proxy header in response + header_value = self._check_header(dict(response.headers), config.proxy_header) + + if header_value: + return TestResult( + module_name=self.name, + success=True, + header_value=header_value, + response_status=response.status_code + ) + else: + return TestResult( + module_name=self.name, + success=False, + error=f"Header '{config.proxy_header}' not found in response", + response_status=response.status_code + ) + + except ImportError as e: + return TestResult( + module_name=self.name, + success=False, + error=f"Import error: {e}" + ) + except Exception as e: + return TestResult( + module_name=self.name, + success=False, + error=f"{type(e).__name__}: {e}" + ) + + # ============================================================================= # Test Registry # ============================================================================= @@ -411,6 +466,7 @@ def test(self, config: TestConfig) -> TestResult: 'requests': RequestsTest, 'aiohttp': AiohttpTest, 'httpx': HttpxTest, + 'cloudscraper': CloudscraperTest, }