From d11d4fcca80cb5b747cfe931881baf0c9c9c4cf5 Mon Sep 17 00:00:00 2001 From: Cursor Date: Sat, 31 Jan 2026 00:02:28 +0000 Subject: [PATCH 1/3] Add pycurl extension for proxy header support - Add ProxyCurl class wrapping pycurl with proxy header capabilities - Support sending custom headers to proxy via CURLOPT_PROXYHEADER - Capture proxy CONNECT response headers via HEADERFUNCTION callback - Add convenience functions (get, post, etc.) matching existing API style - Add documentation in docs/pycurl.rst --- docs/index.rst | 2 + docs/pycurl.rst | 135 +++++++++++ python_proxy_headers/pycurl_proxy.py | 351 +++++++++++++++++++++++++++ 3 files changed, 488 insertions(+) create mode 100644 docs/pycurl.rst create mode 100644 python_proxy_headers/pycurl_proxy.py diff --git a/docs/index.rst b/docs/index.rst index 8c97f64..20c778d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,7 @@ We currently provide extensions to the following packages: * :doc:`requests ` - Simple HTTP library for Python * :doc:`aiohttp ` - Async HTTP client/server framework * :doc:`httpx ` - Modern HTTP client library +* :doc:`pycurl ` - Python interface to libcurl Purpose ------- @@ -50,6 +51,7 @@ Contents requests aiohttp httpx + pycurl Indices and tables ================== diff --git a/docs/pycurl.rst b/docs/pycurl.rst new file mode 100644 index 0000000..9349e50 --- /dev/null +++ b/docs/pycurl.rst @@ -0,0 +1,135 @@ +PycURL +====== + +The ``pycurl_proxy`` module provides proxy header support for PycURL. + +Installation +------------ + +First, install PycURL:: + + pip install pycurl + +Then you can use the proxy header extension. + +Usage +----- + +Using the ProxyCurl Class +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``ProxyCurl`` class wraps pycurl to provide easy proxy header handling: + +.. code-block:: python + + from python_proxy_headers.pycurl_proxy import ProxyCurl + + # Create a ProxyCurl instance with proxy headers + curl = ProxyCurl(proxy_headers={'X-ProxyMesh-Country': 'US'}) + + # Make a request through a proxy + response = curl.get( + 'https://httpbin.org/ip', + proxy='http://user:pass@proxy.example.com:8080' + ) + + # Access the response + print(response.status_code) + print(response.text) + + # Access headers from the proxy's CONNECT response + print(response.proxy_headers) + print(response.proxy_status_code) + +Using Convenience Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For one-off requests, use the module-level functions: + +.. code-block:: python + + from python_proxy_headers import pycurl_proxy + + response = pycurl_proxy.get( + 'https://httpbin.org/ip', + proxy='http://proxy.example.com:8080', + proxy_headers={'X-Custom-Header': 'value'} + ) + + print(response.text) + print(response.proxy_headers) + +API Reference +------------- + +ProxyCurl Class +~~~~~~~~~~~~~~~ + +.. py:class:: ProxyCurl(proxy_headers=None) + + PycURL wrapper with proxy header support. + + :param proxy_headers: Dict of headers to send to the proxy server + + .. py:method:: request(method, url, proxy=None, proxy_headers=None, headers=None, data=None, timeout=None, verify=True) + + Make an HTTP request with proxy header support. + + :param method: HTTP method (GET, POST, etc.) + :param url: Target URL + :param proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080') + :param proxy_headers: Headers to send to the proxy (merged with instance headers) + :param headers: Headers to send to the origin server + :param data: Request body for POST/PUT + :param timeout: Request timeout in seconds + :param verify: Whether to verify SSL certificates + :returns: ProxyResponse object + + .. py:method:: get(url, **kwargs) + .. py:method:: post(url, **kwargs) + .. py:method:: put(url, **kwargs) + .. py:method:: delete(url, **kwargs) + .. py:method:: head(url, **kwargs) + .. py:method:: options(url, **kwargs) + .. py:method:: patch(url, **kwargs) + +ProxyResponse Class +~~~~~~~~~~~~~~~~~~~ + +.. py:class:: ProxyResponse + + Response object containing body and headers from both proxy and origin. + + .. py:attribute:: status_code + :type: int + + HTTP status code from the origin server. + + .. py:attribute:: headers + :type: dict + + Headers from the origin server response. + + .. py:attribute:: content + :type: bytes + + Response body as bytes. + + .. py:attribute:: proxy_headers + :type: dict + + Headers from the proxy's CONNECT response (HTTPS only). + + .. py:attribute:: proxy_status_code + :type: int or None + + Status code from the proxy's CONNECT response (HTTPS only). + + .. py:attribute:: text + :type: str + + Response body decoded as UTF-8. + + .. py:method:: raise_for_status() + + Raise an exception if the status code indicates an error. diff --git a/python_proxy_headers/pycurl_proxy.py b/python_proxy_headers/pycurl_proxy.py new file mode 100644 index 0000000..3d9795c --- /dev/null +++ b/python_proxy_headers/pycurl_proxy.py @@ -0,0 +1,351 @@ +""" +PycURL extension for sending and receiving proxy headers. + +This module provides wrappers around pycurl that enable: +1. Sending custom headers to proxy servers during CONNECT +2. Capturing response headers from proxy servers + +Example usage: + from python_proxy_headers.pycurl_proxy import ProxyCurl, request + + # Using the ProxyCurl class + curl = ProxyCurl(proxy_headers={'X-ProxyMesh-Country': 'US'}) + response = curl.get('https://example.com', proxy='http://proxy:8080') + print(response.proxy_headers) # Headers from proxy CONNECT response + + # Using convenience function + response = request('GET', 'https://example.com', + proxy='http://proxy:8080', + proxy_headers={'X-Custom': 'value'}) +""" + +from io import BytesIO +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Any +from urllib.parse import urlparse + +try: + import pycurl +except ImportError: + raise ImportError( + "pycurl is required for this module. " + "Install it with: pip install pycurl" + ) + + +@dataclass +class ProxyResponse: + """Response object containing body and headers from both proxy and origin.""" + status_code: int + headers: Dict[str, str] + content: bytes + proxy_headers: Dict[str, str] = field(default_factory=dict) + proxy_status_code: Optional[int] = None + + @property + def text(self) -> str: + """Return response body as text.""" + return self.content.decode('utf-8', errors='replace') + + def raise_for_status(self): + """Raise an exception if status code indicates an error.""" + if self.status_code >= 400: + raise Exception(f"HTTP Error {self.status_code}") + + +class ProxyCurl: + """ + PycURL wrapper with proxy header support. + + This class wraps pycurl.Curl to provide easy access to: + - Sending custom headers to proxy servers + - Receiving headers from proxy CONNECT responses + + Args: + proxy_headers: Dict of headers to send to the proxy server + + Example: + curl = ProxyCurl(proxy_headers={'X-ProxyMesh-Country': 'US'}) + response = curl.get('https://httpbin.org/ip', proxy='http://proxy:8080') + print(response.proxy_headers) + """ + + def __init__(self, proxy_headers: Optional[Dict[str, str]] = None): + self._proxy_headers = proxy_headers or {} + self._curl = pycurl.Curl() + + def close(self): + """Close the underlying curl handle.""" + if self._curl: + self._curl.close() + self._curl = None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def request( + self, + method: str, + url: str, + proxy: Optional[str] = None, + proxy_headers: Optional[Dict[str, str]] = None, + headers: Optional[Dict[str, str]] = None, + data: Optional[bytes] = None, + timeout: Optional[int] = None, + verify: bool = True, + **kwargs + ) -> ProxyResponse: + """ + Make an HTTP request with proxy header support. + + Args: + method: HTTP method (GET, POST, etc.) + url: Target URL + proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080') + proxy_headers: Headers to send to the proxy (merged with instance headers) + headers: Headers to send to the origin server + data: Request body for POST/PUT + timeout: Request timeout in seconds + verify: Whether to verify SSL certificates + + Returns: + ProxyResponse with body, headers, and proxy_headers + """ + c = pycurl.Curl() + + try: + # Response buffers + body_buffer = BytesIO() + header_lines: List[bytes] = [] + + # Track if we're in CONNECT phase (for HTTPS through proxy) + parsed_url = urlparse(url) + is_https_via_proxy = proxy and parsed_url.scheme == 'https' + + # Header callback to capture all headers + def header_callback(header_line: bytes) -> int: + header_lines.append(header_line) + return len(header_line) + + # Basic options + c.setopt(pycurl.URL, url) + c.setopt(pycurl.WRITEFUNCTION, body_buffer.write) + c.setopt(pycurl.HEADERFUNCTION, header_callback) + + # HTTP method + if method.upper() == 'GET': + c.setopt(pycurl.HTTPGET, 1) + elif method.upper() == 'POST': + c.setopt(pycurl.POST, 1) + if data: + c.setopt(pycurl.POSTFIELDS, data) + elif method.upper() == 'PUT': + c.setopt(pycurl.UPLOAD, 1) + if data: + c.setopt(pycurl.POSTFIELDS, data) + elif method.upper() == 'DELETE': + c.setopt(pycurl.CUSTOMREQUEST, 'DELETE') + elif method.upper() == 'HEAD': + c.setopt(pycurl.NOBODY, 1) + elif method.upper() == 'OPTIONS': + c.setopt(pycurl.CUSTOMREQUEST, 'OPTIONS') + elif method.upper() == 'PATCH': + c.setopt(pycurl.CUSTOMREQUEST, 'PATCH') + if data: + c.setopt(pycurl.POSTFIELDS, data) + + # Request headers for origin + if headers: + header_list = [f"{k}: {v}" for k, v in headers.items()] + c.setopt(pycurl.HTTPHEADER, header_list) + + # Proxy configuration + if proxy: + c.setopt(pycurl.PROXY, proxy) + + # Merge instance proxy headers with request-specific ones + all_proxy_headers = {**self._proxy_headers} + if proxy_headers: + all_proxy_headers.update(proxy_headers) + + # Set proxy headers (CURLOPT_PROXYHEADER) + if all_proxy_headers: + proxy_header_list = [f"{k}: {v}" for k, v in all_proxy_headers.items()] + # PROXYHEADER option value is 10228 in libcurl + # pycurl may expose it as PROXYHEADER or we use the numeric value + try: + c.setopt(pycurl.PROXYHEADER, proxy_header_list) + except AttributeError: + # Fallback to numeric option if not exposed + c.setopt(10228, proxy_header_list) + + # Enable header sending to proxy for CONNECT + # CURLOPT_HEADEROPT = 229 + try: + c.setopt(pycurl.HEADEROPT, pycurl.HEADER_SEPARATE) + except AttributeError: + # CURLHEADER_SEPARATE = 1 + try: + c.setopt(229, 1) + except pycurl.error: + pass # Option may not be available in older versions + + # Timeout + if timeout: + c.setopt(pycurl.TIMEOUT, timeout) + + # SSL verification + if not verify: + c.setopt(pycurl.SSL_VERIFYPEER, 0) + c.setopt(pycurl.SSL_VERIFYHOST, 0) + + # Perform the request + c.perform() + + # Get status code + status_code = c.getinfo(pycurl.RESPONSE_CODE) + + # Parse headers + origin_headers = {} + proxy_response_headers = {} + proxy_status = None + + # Parse header lines + # For HTTPS via proxy, we get headers from both CONNECT response and final response + # They're separated by blank lines (HTTP/1.1 ... headers ... blank ... HTTP/1.1 ... headers) + current_headers = {} + current_status = None + header_sections = [] + + for line in header_lines: + line_str = line.decode('utf-8', errors='replace').strip() + + if line_str.startswith('HTTP/'): + # New response section + if current_headers or current_status: + header_sections.append((current_status, current_headers)) + current_headers = {} + # Parse status line: HTTP/1.1 200 OK + parts = line_str.split(' ', 2) + if len(parts) >= 2: + try: + current_status = int(parts[1]) + except ValueError: + current_status = None + elif ':' in line_str: + key, value = line_str.split(':', 1) + current_headers[key.strip()] = value.strip() + + # Don't forget the last section + if current_headers or current_status: + header_sections.append((current_status, current_headers)) + + # For HTTPS through proxy: + # - First section is CONNECT response (from proxy) + # - Last section is actual response (from origin) + if is_https_via_proxy and len(header_sections) >= 2: + proxy_status, proxy_response_headers = header_sections[0] + status_code_from_headers, origin_headers = header_sections[-1] + elif header_sections: + _, origin_headers = header_sections[-1] + + return ProxyResponse( + status_code=status_code, + headers=origin_headers, + content=body_buffer.getvalue(), + proxy_headers=proxy_response_headers, + proxy_status_code=proxy_status + ) + + finally: + c.close() + + def get(self, url: str, **kwargs) -> ProxyResponse: + """Make a GET request.""" + return self.request('GET', url, **kwargs) + + def post(self, url: str, **kwargs) -> ProxyResponse: + """Make a POST request.""" + return self.request('POST', url, **kwargs) + + def put(self, url: str, **kwargs) -> ProxyResponse: + """Make a PUT request.""" + return self.request('PUT', url, **kwargs) + + def delete(self, url: str, **kwargs) -> ProxyResponse: + """Make a DELETE request.""" + return self.request('DELETE', url, **kwargs) + + def head(self, url: str, **kwargs) -> ProxyResponse: + """Make a HEAD request.""" + return self.request('HEAD', url, **kwargs) + + def options(self, url: str, **kwargs) -> ProxyResponse: + """Make an OPTIONS request.""" + return self.request('OPTIONS', url, **kwargs) + + def patch(self, url: str, **kwargs) -> ProxyResponse: + """Make a PATCH request.""" + return self.request('PATCH', url, **kwargs) + + +def request( + method: str, + url: str, + proxy: Optional[str] = None, + proxy_headers: Optional[Dict[str, str]] = None, + **kwargs +) -> ProxyResponse: + """ + Make a one-off request with proxy header support. + + Args: + method: HTTP method + url: Target URL + proxy: Proxy URL + proxy_headers: Headers to send to the proxy + **kwargs: Additional arguments passed to ProxyCurl.request + + Returns: + ProxyResponse object + """ + with ProxyCurl(proxy_headers=proxy_headers) as curl: + return curl.request(method, url, proxy=proxy, **kwargs) + + +def get(url: str, **kwargs) -> ProxyResponse: + """Make a GET request with proxy header support.""" + return request('GET', url, **kwargs) + + +def post(url: str, **kwargs) -> ProxyResponse: + """Make a POST request with proxy header support.""" + return request('POST', url, **kwargs) + + +def put(url: str, **kwargs) -> ProxyResponse: + """Make a PUT request with proxy header support.""" + return request('PUT', url, **kwargs) + + +def delete(url: str, **kwargs) -> ProxyResponse: + """Make a DELETE request with proxy header support.""" + return request('DELETE', url, **kwargs) + + +def head(url: str, **kwargs) -> ProxyResponse: + """Make a HEAD request with proxy header support.""" + return request('HEAD', url, **kwargs) + + +def options(url: str, **kwargs) -> ProxyResponse: + """Make an OPTIONS request with proxy header support.""" + return request('OPTIONS', url, **kwargs) + + +def patch(url: str, **kwargs) -> ProxyResponse: + """Make a PATCH request with proxy header support.""" + return request('PATCH', url, **kwargs) From 946b71a73e235a2b26544fb0cfbb9f42b62698f9 Mon Sep 17 00:00:00 2001 From: Cursor Date: Mon, 2 Feb 2026 15:55:08 +0000 Subject: [PATCH 2/3] Add pycurl test to test harness --- test_proxy_headers.py | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/test_proxy_headers.py b/test_proxy_headers.py index af99c8c..99b34a5 100755 --- a/test_proxy_headers.py +++ b/test_proxy_headers.py @@ -401,6 +401,58 @@ def test(self, config: TestConfig) -> TestResult: ) +# ============================================================================= +# pycurl Test +# ============================================================================= + +class PycurlTest(ModuleTest): + """Test for pycurl extension.""" + + name = "pycurl" + + def test(self, config: TestConfig) -> TestResult: + try: + from python_proxy_headers.pycurl_proxy import ProxyCurl + + # Create ProxyCurl with optional proxy headers to send + with ProxyCurl(proxy_headers=config.proxy_headers_to_send or None) as curl: + # Make request through proxy + response = curl.get(config.test_url, proxy=config.proxy_url) + + # Check for proxy header in response headers (merged) or proxy_headers + header_value = self._check_header(response.headers, config.proxy_header) + if not header_value: + header_value = self._check_header(response.proxy_headers, config.proxy_header) + + if header_value: + return TestResult( + module_name=self.name, + success=True, + header_value=header_value, + response_status=response.status_code + ) + else: + return TestResult( + module_name=self.name, + success=False, + error=f"Header '{config.proxy_header}' not found in response", + response_status=response.status_code + ) + + except ImportError as e: + return TestResult( + module_name=self.name, + success=False, + error=f"Import error: {e}" + ) + except Exception as e: + return TestResult( + module_name=self.name, + success=False, + error=f"{type(e).__name__}: {e}" + ) + + # ============================================================================= # Test Registry # ============================================================================= @@ -411,6 +463,7 @@ def test(self, config: TestConfig) -> TestResult: 'requests': RequestsTest, 'aiohttp': AiohttpTest, 'httpx': HttpxTest, + 'pycurl': PycurlTest, } From 84f4ded505febed94fa5326dc19056e494436cfd Mon Sep 17 00:00:00 2001 From: Cursor Date: Tue, 3 Feb 2026 14:17:07 +0000 Subject: [PATCH 3/3] Simplify pycurl module with low-level helpers Rewrite pycurl_proxy to provide: 1. Low-level helpers for existing pycurl code: - set_proxy_headers(curl, headers) - sets PROXYHEADER on any Curl instance - HeaderCapture(curl) - captures and parses proxy/origin headers 2. High-level convenience functions (get, post, etc.) for simple use cases This approach is more "pycurl-native" - users can add proxy header support to existing code with just 2 lines instead of rewriting their code. --- docs/pycurl.rst | 152 ++++--- python_proxy_headers/pycurl_proxy.py | 568 ++++++++++++++------------- test_proxy_headers.py | 52 +-- 3 files changed, 427 insertions(+), 345 deletions(-) diff --git a/docs/pycurl.rst b/docs/pycurl.rst index 9349e50..61e8385 100644 --- a/docs/pycurl.rst +++ b/docs/pycurl.rst @@ -15,90 +15,142 @@ Then you can use the proxy header extension. Usage ----- -Using the ProxyCurl Class -~~~~~~~~~~~~~~~~~~~~~~~~~ +Low-Level Helpers (for existing pycurl code) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``ProxyCurl`` class wraps pycurl to provide easy proxy header handling: +If you already have pycurl code, you can add proxy header support with minimal changes: .. code-block:: python - from python_proxy_headers.pycurl_proxy import ProxyCurl + import pycurl + from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture - # Create a ProxyCurl instance with proxy headers - curl = ProxyCurl(proxy_headers={'X-ProxyMesh-Country': 'US'}) + c = pycurl.Curl() + c.setopt(pycurl.URL, 'https://httpbin.org/ip') + c.setopt(pycurl.PROXY, 'http://proxy.example.com:8080') - # Make a request through a proxy - response = curl.get( - 'https://httpbin.org/ip', - proxy='http://user:pass@proxy.example.com:8080' - ) + # Add custom headers to send to the proxy + set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'}) - # Access the response - print(response.status_code) - print(response.text) + # Capture response headers (installs HEADERFUNCTION callback) + capture = HeaderCapture(c) + + c.perform() # Access headers from the proxy's CONNECT response - print(response.proxy_headers) - print(response.proxy_status_code) + print(capture.proxy_headers) # {'X-ProxyMesh-IP': '1.2.3.4', ...} + print(capture.proxy_status) # 200 + + # Access headers from the origin server + print(capture.origin_headers) # {'Content-Type': 'application/json', ...} -Using Convenience Functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~ + c.close() -For one-off requests, use the module-level functions: +High-Level Convenience Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For simpler use cases, use the module-level functions: .. code-block:: python - from python_proxy_headers import pycurl_proxy + from python_proxy_headers.pycurl_proxy import get - response = pycurl_proxy.get( + response = get( 'https://httpbin.org/ip', proxy='http://proxy.example.com:8080', - proxy_headers={'X-Custom-Header': 'value'} + proxy_headers={'X-ProxyMesh-Country': 'US'} ) + print(response.status_code) print(response.text) print(response.proxy_headers) API Reference ------------- -ProxyCurl Class -~~~~~~~~~~~~~~~ +Low-Level Functions +~~~~~~~~~~~~~~~~~~~ -.. py:class:: ProxyCurl(proxy_headers=None) +.. py:function:: set_proxy_headers(curl, headers) - PycURL wrapper with proxy header support. + Set custom headers to send to the proxy server during CONNECT. - :param proxy_headers: Dict of headers to send to the proxy server + :param curl: A pycurl.Curl instance + :param headers: Dict of headers to send to the proxy - .. py:method:: request(method, url, proxy=None, proxy_headers=None, headers=None, data=None, timeout=None, verify=True) +.. py:class:: HeaderCapture(curl=None) - Make an HTTP request with proxy header support. + Captures and parses HTTP response headers from pycurl requests. - :param method: HTTP method (GET, POST, etc.) - :param url: Target URL - :param proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080') - :param proxy_headers: Headers to send to the proxy (merged with instance headers) - :param headers: Headers to send to the origin server - :param data: Request body for POST/PUT - :param timeout: Request timeout in seconds - :param verify: Whether to verify SSL certificates - :returns: ProxyResponse object + :param curl: Optional pycurl.Curl instance. If provided, automatically + installs the HEADERFUNCTION callback. - .. py:method:: get(url, **kwargs) - .. py:method:: post(url, **kwargs) - .. py:method:: put(url, **kwargs) - .. py:method:: delete(url, **kwargs) - .. py:method:: head(url, **kwargs) - .. py:method:: options(url, **kwargs) - .. py:method:: patch(url, **kwargs) + .. py:method:: install(curl) -ProxyResponse Class -~~~~~~~~~~~~~~~~~~~ + Install the header callback on a pycurl.Curl instance. + + :param curl: A pycurl.Curl instance + :returns: self, for chaining + + .. py:method:: reset() + + Clear captured headers for reuse. + + .. py:attribute:: proxy_headers + :type: dict + + Headers from the proxy's CONNECT response. + + .. py:attribute:: proxy_status + :type: int or None + + Status code from the proxy's CONNECT response. + + .. py:attribute:: origin_headers + :type: dict + + Headers from the origin server's response. + + .. py:attribute:: origin_status + :type: int or None + + Status code from the origin server's response. + + .. py:attribute:: all_headers + :type: dict + + All headers merged (proxy headers first, then origin). + +High-Level Functions +~~~~~~~~~~~~~~~~~~~~ + +.. py:function:: request(method, url, proxy=None, proxy_headers=None, headers=None, data=None, timeout=None, verify=True) + + Make an HTTP request with proxy header support. + + :param method: HTTP method (GET, POST, etc.) + :param url: Target URL + :param proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080') + :param proxy_headers: Headers to send to the proxy + :param headers: Headers to send to the origin server + :param data: Request body for POST/PUT/PATCH + :param timeout: Request timeout in seconds + :param verify: Whether to verify SSL certificates + :returns: Response object + +.. py:function:: get(url, **kwargs) +.. py:function:: post(url, **kwargs) +.. py:function:: put(url, **kwargs) +.. py:function:: delete(url, **kwargs) +.. py:function:: head(url, **kwargs) +.. py:function:: patch(url, **kwargs) + +Response Class +~~~~~~~~~~~~~~ -.. py:class:: ProxyResponse +.. py:class:: Response - Response object containing body and headers from both proxy and origin. + Response object from high-level API. .. py:attribute:: status_code :type: int @@ -120,7 +172,7 @@ ProxyResponse Class Headers from the proxy's CONNECT response (HTTPS only). - .. py:attribute:: proxy_status_code + .. py:attribute:: proxy_status :type: int or None Status code from the proxy's CONNECT response (HTTPS only). diff --git a/python_proxy_headers/pycurl_proxy.py b/python_proxy_headers/pycurl_proxy.py index 3d9795c..ca60d73 100644 --- a/python_proxy_headers/pycurl_proxy.py +++ b/python_proxy_headers/pycurl_proxy.py @@ -1,28 +1,42 @@ """ PycURL extension for sending and receiving proxy headers. -This module provides wrappers around pycurl that enable: -1. Sending custom headers to proxy servers during CONNECT -2. Capturing response headers from proxy servers +This module provides helper functions and classes for working with proxy headers +in pycurl. It can be used in two ways: -Example usage: - from python_proxy_headers.pycurl_proxy import ProxyCurl, request +1. Low-level helpers for existing pycurl code: - # Using the ProxyCurl class - curl = ProxyCurl(proxy_headers={'X-ProxyMesh-Country': 'US'}) - response = curl.get('https://example.com', proxy='http://proxy:8080') - print(response.proxy_headers) # Headers from proxy CONNECT response + import pycurl + from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture + + c = pycurl.Curl() + c.setopt(pycurl.URL, 'https://example.com') + c.setopt(pycurl.PROXY, 'http://proxy:8080') + + # Add proxy headers + set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'}) + + # Capture response headers (including proxy CONNECT headers) + capture = HeaderCapture(c) + + c.perform() + + print(capture.proxy_headers) # Headers from proxy CONNECT response + print(capture.origin_headers) # Headers from origin server + +2. High-level convenience functions: - # Using convenience function - response = request('GET', 'https://example.com', - proxy='http://proxy:8080', - proxy_headers={'X-Custom': 'value'}) + from python_proxy_headers.pycurl_proxy import get + + response = get('https://example.com', + proxy='http://proxy:8080', + proxy_headers={'X-ProxyMesh-Country': 'US'}) + print(response.proxy_headers) """ from io import BytesIO from dataclasses import dataclass, field -from typing import Dict, List, Optional, Any -from urllib.parse import urlparse +from typing import Dict, List, Optional, Tuple try: import pycurl @@ -33,263 +47,217 @@ ) -@dataclass -class ProxyResponse: - """Response object containing body and headers from both proxy and origin.""" - status_code: int - headers: Dict[str, str] - content: bytes - proxy_headers: Dict[str, str] = field(default_factory=dict) - proxy_status_code: Optional[int] = None - - @property - def text(self) -> str: - """Return response body as text.""" - return self.content.decode('utf-8', errors='replace') - - def raise_for_status(self): - """Raise an exception if status code indicates an error.""" - if self.status_code >= 400: - raise Exception(f"HTTP Error {self.status_code}") - +# ============================================================================= +# Low-level helper functions +# ============================================================================= -class ProxyCurl: +def set_proxy_headers(curl, headers: Dict[str, str]) -> None: """ - PycURL wrapper with proxy header support. - - This class wraps pycurl.Curl to provide easy access to: - - Sending custom headers to proxy servers - - Receiving headers from proxy CONNECT responses + Set custom headers to send to the proxy server during CONNECT. Args: - proxy_headers: Dict of headers to send to the proxy server + curl: A pycurl.Curl instance + headers: Dict of headers to send to the proxy Example: - curl = ProxyCurl(proxy_headers={'X-ProxyMesh-Country': 'US'}) - response = curl.get('https://httpbin.org/ip', proxy='http://proxy:8080') - print(response.proxy_headers) + c = pycurl.Curl() + c.setopt(pycurl.PROXY, 'http://proxy:8080') + set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'}) + c.perform() """ + if not headers: + return - def __init__(self, proxy_headers: Optional[Dict[str, str]] = None): - self._proxy_headers = proxy_headers or {} - self._curl = pycurl.Curl() + header_list = [f"{k}: {v}" for k, v in headers.items()] - def close(self): - """Close the underlying curl handle.""" - if self._curl: - self._curl.close() - self._curl = None + # Set CURLOPT_PROXYHEADER + try: + curl.setopt(pycurl.PROXYHEADER, header_list) + except AttributeError: + # Fallback to numeric option (10228) if not exposed + curl.setopt(10228, header_list) - def __enter__(self): - return self + # Set CURLOPT_HEADEROPT to CURLHEADER_SEPARATE so proxy headers + # are only sent to the proxy, not the origin + try: + curl.setopt(pycurl.HEADEROPT, pycurl.HEADER_SEPARATE) + except AttributeError: + try: + curl.setopt(229, 1) # CURLOPT_HEADEROPT = 229, CURLHEADER_SEPARATE = 1 + except pycurl.error: + pass # Option may not be available in older libcurl versions + + +class HeaderCapture: + """ + Captures and parses HTTP response headers from pycurl requests. - def __exit__(self, *args): - self.close() + For HTTPS requests through a proxy, this separates: + - proxy_headers: Headers from the proxy's CONNECT response + - origin_headers: Headers from the origin server's response - def request( - self, - method: str, - url: str, - proxy: Optional[str] = None, - proxy_headers: Optional[Dict[str, str]] = None, - headers: Optional[Dict[str, str]] = None, - data: Optional[bytes] = None, - timeout: Optional[int] = None, - verify: bool = True, - **kwargs - ) -> ProxyResponse: + Example: + c = pycurl.Curl() + c.setopt(pycurl.URL, 'https://example.com') + c.setopt(pycurl.PROXY, 'http://proxy:8080') + + capture = HeaderCapture(c) # Installs HEADERFUNCTION callback + + c.perform() + + print(capture.proxy_headers) # {'X-ProxyMesh-IP': '1.2.3.4', ...} + print(capture.origin_headers) # {'Content-Type': 'text/html', ...} + print(capture.proxy_status) # 200 + """ + + def __init__(self, curl=None): """ - Make an HTTP request with proxy header support. + Initialize header capture. Args: - method: HTTP method (GET, POST, etc.) - url: Target URL - proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080') - proxy_headers: Headers to send to the proxy (merged with instance headers) - headers: Headers to send to the origin server - data: Request body for POST/PUT - timeout: Request timeout in seconds - verify: Whether to verify SSL certificates + curl: Optional pycurl.Curl instance. If provided, automatically + installs the HEADERFUNCTION callback. + """ + self._header_lines: List[bytes] = [] + self._parsed = False + self._sections: List[Tuple[Optional[int], Dict[str, str]]] = [] + + if curl is not None: + self.install(curl) + + def install(self, curl) -> 'HeaderCapture': + """ + Install the header callback on a pycurl.Curl instance. + + Args: + curl: A pycurl.Curl instance Returns: - ProxyResponse with body, headers, and proxy_headers + self, for chaining """ - c = pycurl.Curl() + curl.setopt(pycurl.HEADERFUNCTION, self._header_callback) + return self + + def _header_callback(self, header_line: bytes) -> int: + """Callback for pycurl HEADERFUNCTION.""" + self._header_lines.append(header_line) + self._parsed = False # Invalidate cache + return len(header_line) + + def _parse(self) -> None: + """Parse collected header lines into sections.""" + if self._parsed: + return - try: - # Response buffers - body_buffer = BytesIO() - header_lines: List[bytes] = [] - - # Track if we're in CONNECT phase (for HTTPS through proxy) - parsed_url = urlparse(url) - is_https_via_proxy = proxy and parsed_url.scheme == 'https' - - # Header callback to capture all headers - def header_callback(header_line: bytes) -> int: - header_lines.append(header_line) - return len(header_line) - - # Basic options - c.setopt(pycurl.URL, url) - c.setopt(pycurl.WRITEFUNCTION, body_buffer.write) - c.setopt(pycurl.HEADERFUNCTION, header_callback) - - # HTTP method - if method.upper() == 'GET': - c.setopt(pycurl.HTTPGET, 1) - elif method.upper() == 'POST': - c.setopt(pycurl.POST, 1) - if data: - c.setopt(pycurl.POSTFIELDS, data) - elif method.upper() == 'PUT': - c.setopt(pycurl.UPLOAD, 1) - if data: - c.setopt(pycurl.POSTFIELDS, data) - elif method.upper() == 'DELETE': - c.setopt(pycurl.CUSTOMREQUEST, 'DELETE') - elif method.upper() == 'HEAD': - c.setopt(pycurl.NOBODY, 1) - elif method.upper() == 'OPTIONS': - c.setopt(pycurl.CUSTOMREQUEST, 'OPTIONS') - elif method.upper() == 'PATCH': - c.setopt(pycurl.CUSTOMREQUEST, 'PATCH') - if data: - c.setopt(pycurl.POSTFIELDS, data) - - # Request headers for origin - if headers: - header_list = [f"{k}: {v}" for k, v in headers.items()] - c.setopt(pycurl.HTTPHEADER, header_list) + self._sections = [] + current_headers: Dict[str, str] = {} + current_status: Optional[int] = None + + for line in self._header_lines: + line_str = line.decode('utf-8', errors='replace').strip() - # Proxy configuration - if proxy: - c.setopt(pycurl.PROXY, proxy) - - # Merge instance proxy headers with request-specific ones - all_proxy_headers = {**self._proxy_headers} - if proxy_headers: - all_proxy_headers.update(proxy_headers) - - # Set proxy headers (CURLOPT_PROXYHEADER) - if all_proxy_headers: - proxy_header_list = [f"{k}: {v}" for k, v in all_proxy_headers.items()] - # PROXYHEADER option value is 10228 in libcurl - # pycurl may expose it as PROXYHEADER or we use the numeric value + if line_str.startswith('HTTP/'): + # New response section - save previous if exists + if current_headers or current_status is not None: + self._sections.append((current_status, current_headers)) + current_headers = {} + # Parse status line: HTTP/1.1 200 OK + parts = line_str.split(' ', 2) + if len(parts) >= 2: try: - c.setopt(pycurl.PROXYHEADER, proxy_header_list) - except AttributeError: - # Fallback to numeric option if not exposed - c.setopt(10228, proxy_header_list) - - # Enable header sending to proxy for CONNECT - # CURLOPT_HEADEROPT = 229 - try: - c.setopt(pycurl.HEADEROPT, pycurl.HEADER_SEPARATE) - except AttributeError: - # CURLHEADER_SEPARATE = 1 - try: - c.setopt(229, 1) - except pycurl.error: - pass # Option may not be available in older versions - - # Timeout - if timeout: - c.setopt(pycurl.TIMEOUT, timeout) - - # SSL verification - if not verify: - c.setopt(pycurl.SSL_VERIFYPEER, 0) - c.setopt(pycurl.SSL_VERIFYHOST, 0) - - # Perform the request - c.perform() - - # Get status code - status_code = c.getinfo(pycurl.RESPONSE_CODE) - - # Parse headers - origin_headers = {} - proxy_response_headers = {} - proxy_status = None - - # Parse header lines - # For HTTPS via proxy, we get headers from both CONNECT response and final response - # They're separated by blank lines (HTTP/1.1 ... headers ... blank ... HTTP/1.1 ... headers) - current_headers = {} - current_status = None - header_sections = [] - - for line in header_lines: - line_str = line.decode('utf-8', errors='replace').strip() - - if line_str.startswith('HTTP/'): - # New response section - if current_headers or current_status: - header_sections.append((current_status, current_headers)) - current_headers = {} - # Parse status line: HTTP/1.1 200 OK - parts = line_str.split(' ', 2) - if len(parts) >= 2: - try: - current_status = int(parts[1]) - except ValueError: - current_status = None - elif ':' in line_str: - key, value = line_str.split(':', 1) - current_headers[key.strip()] = value.strip() - - # Don't forget the last section - if current_headers or current_status: - header_sections.append((current_status, current_headers)) - - # For HTTPS through proxy: - # - First section is CONNECT response (from proxy) - # - Last section is actual response (from origin) - if is_https_via_proxy and len(header_sections) >= 2: - proxy_status, proxy_response_headers = header_sections[0] - status_code_from_headers, origin_headers = header_sections[-1] - elif header_sections: - _, origin_headers = header_sections[-1] - - return ProxyResponse( - status_code=status_code, - headers=origin_headers, - content=body_buffer.getvalue(), - proxy_headers=proxy_response_headers, - proxy_status_code=proxy_status - ) - - finally: - c.close() + current_status = int(parts[1]) + except ValueError: + current_status = None + else: + current_status = None + elif ':' in line_str: + key, value = line_str.split(':', 1) + current_headers[key.strip()] = value.strip() + + # Don't forget the last section + if current_headers or current_status is not None: + self._sections.append((current_status, current_headers)) + + self._parsed = True - def get(self, url: str, **kwargs) -> ProxyResponse: - """Make a GET request.""" - return self.request('GET', url, **kwargs) + def reset(self) -> None: + """Clear captured headers for reuse.""" + self._header_lines.clear() + self._sections.clear() + self._parsed = False + + @property + def proxy_headers(self) -> Dict[str, str]: + """ + Headers from the proxy's CONNECT response. + + Returns empty dict if not an HTTPS-via-proxy request or no headers captured. + """ + self._parse() + if len(self._sections) >= 2: + return self._sections[0][1] + return {} - def post(self, url: str, **kwargs) -> ProxyResponse: - """Make a POST request.""" - return self.request('POST', url, **kwargs) + @property + def proxy_status(self) -> Optional[int]: + """ + Status code from the proxy's CONNECT response. + + Returns None if not an HTTPS-via-proxy request. + """ + self._parse() + if len(self._sections) >= 2: + return self._sections[0][0] + return None - def put(self, url: str, **kwargs) -> ProxyResponse: - """Make a PUT request.""" - return self.request('PUT', url, **kwargs) + @property + def origin_headers(self) -> Dict[str, str]: + """Headers from the origin server's response.""" + self._parse() + if self._sections: + return self._sections[-1][1] + return {} - def delete(self, url: str, **kwargs) -> ProxyResponse: - """Make a DELETE request.""" - return self.request('DELETE', url, **kwargs) + @property + def origin_status(self) -> Optional[int]: + """Status code from the origin server's response.""" + self._parse() + if self._sections: + return self._sections[-1][0] + return None - def head(self, url: str, **kwargs) -> ProxyResponse: - """Make a HEAD request.""" - return self.request('HEAD', url, **kwargs) + @property + def all_headers(self) -> Dict[str, str]: + """All headers merged (proxy headers take precedence for conflicts).""" + self._parse() + merged = {} + for _, headers in self._sections: + merged.update(headers) + return merged + + +# ============================================================================= +# High-level convenience API +# ============================================================================= + +@dataclass +class Response: + """Response object from high-level API.""" + status_code: int + headers: Dict[str, str] + content: bytes + proxy_headers: Dict[str, str] = field(default_factory=dict) + proxy_status: Optional[int] = None - def options(self, url: str, **kwargs) -> ProxyResponse: - """Make an OPTIONS request.""" - return self.request('OPTIONS', url, **kwargs) + @property + def text(self) -> str: + """Response body as text.""" + return self.content.decode('utf-8', errors='replace') - def patch(self, url: str, **kwargs) -> ProxyResponse: - """Make a PATCH request.""" - return self.request('PATCH', url, **kwargs) + def raise_for_status(self) -> None: + """Raise exception if status code indicates error.""" + if self.status_code >= 400: + raise Exception(f"HTTP Error {self.status_code}") def request( @@ -297,55 +265,115 @@ def request( url: str, proxy: Optional[str] = None, proxy_headers: Optional[Dict[str, str]] = None, - **kwargs -) -> ProxyResponse: + headers: Optional[Dict[str, str]] = None, + data: Optional[bytes] = None, + timeout: Optional[int] = None, + verify: bool = True, +) -> Response: """ - Make a one-off request with proxy header support. + Make an HTTP request with proxy header support. Args: - method: HTTP method + method: HTTP method (GET, POST, etc.) url: Target URL - proxy: Proxy URL + proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080') proxy_headers: Headers to send to the proxy - **kwargs: Additional arguments passed to ProxyCurl.request + headers: Headers to send to the origin server + data: Request body for POST/PUT/PATCH + timeout: Request timeout in seconds + verify: Whether to verify SSL certificates Returns: - ProxyResponse object + Response object with body, headers, and proxy_headers """ - with ProxyCurl(proxy_headers=proxy_headers) as curl: - return curl.request(method, url, proxy=proxy, **kwargs) + c = pycurl.Curl() + body = BytesIO() + capture = HeaderCapture(c) + + try: + c.setopt(pycurl.URL, url) + c.setopt(pycurl.WRITEFUNCTION, body.write) + + # HTTP method + method = method.upper() + if method == 'GET': + c.setopt(pycurl.HTTPGET, 1) + elif method == 'POST': + c.setopt(pycurl.POST, 1) + if data: + c.setopt(pycurl.POSTFIELDS, data) + elif method == 'PUT': + c.setopt(pycurl.CUSTOMREQUEST, 'PUT') + if data: + c.setopt(pycurl.POSTFIELDS, data) + elif method == 'DELETE': + c.setopt(pycurl.CUSTOMREQUEST, 'DELETE') + elif method == 'HEAD': + c.setopt(pycurl.NOBODY, 1) + elif method == 'PATCH': + c.setopt(pycurl.CUSTOMREQUEST, 'PATCH') + if data: + c.setopt(pycurl.POSTFIELDS, data) + else: + c.setopt(pycurl.CUSTOMREQUEST, method) + + # Request headers + if headers: + c.setopt(pycurl.HTTPHEADER, [f"{k}: {v}" for k, v in headers.items()]) + + # Proxy + if proxy: + c.setopt(pycurl.PROXY, proxy) + if proxy_headers: + set_proxy_headers(c, proxy_headers) + + # Timeout + if timeout: + c.setopt(pycurl.TIMEOUT, timeout) + + # SSL + if not verify: + c.setopt(pycurl.SSL_VERIFYPEER, 0) + c.setopt(pycurl.SSL_VERIFYHOST, 0) + + c.perform() + + return Response( + status_code=c.getinfo(pycurl.RESPONSE_CODE), + headers=capture.origin_headers, + content=body.getvalue(), + proxy_headers=capture.proxy_headers, + proxy_status=capture.proxy_status, + ) + finally: + c.close() -def get(url: str, **kwargs) -> ProxyResponse: - """Make a GET request with proxy header support.""" +def get(url: str, **kwargs) -> Response: + """Make a GET request.""" return request('GET', url, **kwargs) -def post(url: str, **kwargs) -> ProxyResponse: - """Make a POST request with proxy header support.""" +def post(url: str, **kwargs) -> Response: + """Make a POST request.""" return request('POST', url, **kwargs) -def put(url: str, **kwargs) -> ProxyResponse: - """Make a PUT request with proxy header support.""" +def put(url: str, **kwargs) -> Response: + """Make a PUT request.""" return request('PUT', url, **kwargs) -def delete(url: str, **kwargs) -> ProxyResponse: - """Make a DELETE request with proxy header support.""" +def delete(url: str, **kwargs) -> Response: + """Make a DELETE request.""" return request('DELETE', url, **kwargs) -def head(url: str, **kwargs) -> ProxyResponse: - """Make a HEAD request with proxy header support.""" +def head(url: str, **kwargs) -> Response: + """Make a HEAD request.""" return request('HEAD', url, **kwargs) -def options(url: str, **kwargs) -> ProxyResponse: - """Make an OPTIONS request with proxy header support.""" - return request('OPTIONS', url, **kwargs) - - -def patch(url: str, **kwargs) -> ProxyResponse: - """Make a PATCH request with proxy header support.""" +def patch(url: str, **kwargs) -> Response: + """Make a PATCH request.""" return request('PATCH', url, **kwargs) diff --git a/test_proxy_headers.py b/test_proxy_headers.py index ccb5153..225c0fb 100755 --- a/test_proxy_headers.py +++ b/test_proxy_headers.py @@ -412,32 +412,34 @@ class PycurlTest(ModuleTest): def test(self, config: TestConfig) -> TestResult: try: - from python_proxy_headers.pycurl_proxy import ProxyCurl + from python_proxy_headers.pycurl_proxy import get - # Create ProxyCurl with optional proxy headers to send - with ProxyCurl(proxy_headers=config.proxy_headers_to_send or None) as curl: - # Make request through proxy - response = curl.get(config.test_url, proxy=config.proxy_url) - - # Check for proxy header in response headers (merged) or proxy_headers - header_value = self._check_header(response.headers, config.proxy_header) - if not header_value: - header_value = self._check_header(response.proxy_headers, config.proxy_header) - - if header_value: - return TestResult( - module_name=self.name, - success=True, - header_value=header_value, - response_status=response.status_code - ) - else: - return TestResult( - module_name=self.name, - success=False, - error=f"Header '{config.proxy_header}' not found in response", - response_status=response.status_code - ) + # Make request using high-level API + response = get( + config.test_url, + proxy=config.proxy_url, + proxy_headers=config.proxy_headers_to_send or None + ) + + # Check for proxy header in response headers or proxy_headers + header_value = self._check_header(response.headers, config.proxy_header) + if not header_value: + header_value = self._check_header(response.proxy_headers, config.proxy_header) + + if header_value: + return TestResult( + module_name=self.name, + success=True, + header_value=header_value, + response_status=response.status_code + ) + else: + return TestResult( + module_name=self.name, + success=False, + error=f"Header '{config.proxy_header}' not found in response", + response_status=response.status_code + ) except ImportError as e: return TestResult(