diff --git a/Makefile b/Makefile index 83bc5c5..07252d3 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ fmt: lint: @command -v pylint || $(PYTHON) -m pip install -r requirements.txt - $(shell pylint ja3requests) + @pylint ja3requests .PHONY: dist dist: diff --git a/README.md b/README.md index 057723e..83b388d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ + + # ja3requests An http request library that can customize ja3 fingerprints. diff --git a/ja3requests/__version__.py b/ja3requests/__version__.py index 9358665..122502c 100644 --- a/ja3requests/__version__.py +++ b/ja3requests/__version__.py @@ -8,7 +8,7 @@ __title__ = "ja3requests" __description__ = "An http request library that can customize ja3 fingerprints." __url__ = "https://github.com/lxjmaster/ja3requests" -__version__ = "0.0.3" +__version__ = "1.0.0" __author__ = "Mast Luo" __author_email__ = "379501669@qq.com" __license__ = "Apache-2.0 license" diff --git a/ja3requests/base/_connection.py b/ja3requests/base/_connection.py index db1526f..16f5238 100644 --- a/ja3requests/base/_connection.py +++ b/ja3requests/base/_connection.py @@ -7,9 +7,11 @@ class BaseHttpConnection: + """ + Basic HTTP Connection + """ def __init__(self): - self._scheme = None self._host = None self._port = None @@ -25,96 +27,204 @@ def __init__(self): @property def scheme(self): + """ + Scheme + :return: + """ return self._scheme @scheme.setter def scheme(self, attr): + """ + Set Scheme + :param attr: + :return: + """ self._scheme = attr @property def host(self): + """ + Host + :return: + """ return self._host @host.setter def host(self, attr): + """ + Set Host + :param attr: + :return: + """ self._host = attr @property def port(self): + """ + Port + :return: + """ return self._port @port.setter def port(self, attr): + """ + Set Port + :param attr: + :return: + """ self._port = attr @property def source_address(self): + """ + Source Address + :return: + """ return self._source_address @source_address.setter def source_address(self, attr): + """ + Set Source Address + :param attr: + :return: + """ self._source_address = attr @property def destination_address(self): + """ + Destination Address + :return: + """ return self._destination_address @destination_address.setter def destination_address(self, attr): + """ + Set Destination Address + :param attr: + :return: + """ self._destination_address = attr @property def path(self): + """ + Path + :return: + """ return self._path @path.setter def path(self, attr): + """ + Set Path + :param attr: + :return: + """ self._path = attr @property def timeout(self): + """ + Timeout + :return: + """ return self._timeout @timeout.setter def timeout(self, attr): + """ + Set Timeout + :param attr: + :return: + """ self._timeout = attr @property def proxy(self): + """ + Proxy + :return: + """ return self._proxy @proxy.setter def proxy(self, attr): + """ + Set Proxy + :param attr: + :return: + """ self._proxy = attr @property def proxy_username(self): + """ + Proxy username + :return: + """ return self._proxy_username @proxy_username.setter def proxy_username(self, attr): + """ + Set Proxy Username + :param attr: + :return: + """ self._proxy_username = attr @property def proxy_password(self): + """ + Proxy Password + :return: + """ return self._proxy_password @proxy_password.setter def proxy_password(self, attr): + """ + Set Proxy Password + :param attr: + :return: + """ self._proxy_password = attr @property def connection(self): + """ + Connection + :return: + """ return self._connection @connection.setter def connection(self, attr): + """ + Set Connection + :param attr: + :return: + """ self._connection = attr @property def is_close(self): + """ + Connection is closed + :return: + """ return self._is_close @is_close.setter def is_close(self, attr): + """ + Set connection close + :param attr: + :return: + """ self._is_close = attr diff --git a/ja3requests/base/_context.py b/ja3requests/base/_context.py index 34eb044..6660d05 100644 --- a/ja3requests/base/_context.py +++ b/ja3requests/base/_context.py @@ -7,6 +7,9 @@ class BaseContext: + """ + Basic connection context. + """ def __init__(self): self._protocol = None @@ -19,56 +22,119 @@ def __init__(self): @property def protocol(self): + """ + Protocol + :return: + """ return self._protocol @protocol.setter def protocol(self, attr): + """ + Set protocol + :param attr: + :return: + """ self._protocol = attr @property def version(self): + """ + Version + :return: + """ return self._version @version.setter def version(self, attr): + """ + Set version + :param attr: + :return: + """ self._version = attr @property def start_line(self): + """ + Start line + :return: + """ return self._start_line @start_line.setter def start_line(self, attr): + """ + Set start line + :param attr: + :return: + """ self._start_line = attr @property def method(self): + """ + Method + :return: + """ return self._method @method.setter def method(self, attr): + """ + Set method + :param attr: + :return: + """ self._method = attr @property def headers(self): + """ + Headers + :return: + """ return self._headers @headers.setter def headers(self, attr): + """ + Set headers + :param attr: + :return: + """ self._headers = attr @property def body(self): + """ + Body + :return: + """ return self._body @body.setter def body(self, attr): + """ + Set body + :param attr: + :return: + """ self._body = attr @property def message(self): + """ + Message + :return: + """ return self._message @message.setter def message(self, attr): + """ + Set message + :param attr: + :return: + """ self._message = attr diff --git a/ja3requests/base/_request.py b/ja3requests/base/_request.py index ff5815b..4000390 100644 --- a/ja3requests/base/_request.py +++ b/ja3requests/base/_request.py @@ -7,9 +7,11 @@ class BaseRequest: + """ + The basic request. + """ def __init__(self): - self._method = None self._source = None self._url = None @@ -27,118 +29,259 @@ def __init__(self): @property def method(self): + """ + Request method + >>> "GET" + :return: + """ return self._method @method.setter def method(self, attr): + """ + Set request method. + :param attr: + :return: + """ self._method = attr @property def source(self): + """ + Source Address. + :return: + """ return self._source @source.setter def source(self, attr): + """ + Set source address. + :param attr: + :return: + """ self._source = attr @property def url(self): + """ + Request url. + :return: + """ return self._url @url.setter def url(self, attr): + """ + Set request url. + :param attr: + :return: + """ self._url = attr @property def scheme(self): + """ + Request Scheme. eg. HTTP, HTTPS + :return: + """ return self._scheme @scheme.setter def scheme(self, attr): + """ + Set scheme. + :param attr: + :return: + """ self._scheme = attr @property def port(self): + """ + Remote address port. + :return: + """ return self._port @port.setter def port(self, attr): + """ + Set port. + :param attr: + :return: + """ self._port = attr @property def headers(self): + """Headers + Request headers. + >>> {"Host": "www.example.com", "Accept": "*/*"} + :return: + """ return self._headers @headers.setter def headers(self, attr): + """ + Set request headers. + :param attr: + :return: + """ self._headers = attr @property def params(self): + """ + Request params. eg. ?page=1&page_size=10&desc=1 + >>> [("page", 1), ("page_size", 10),] + :return: + """ return self._params @params.setter def params(self, attr): + """ + Set params. + :param attr: + :return: + """ self._params = attr @property def data(self): + """ + Post request data. + >>> {"username": "admin", "password": "admin"} + :return: + """ return self._data @data.setter def data(self, attr): + """ + Set post request data. + :param attr: + :return: + """ self._data = attr @property def cookies(self): + """ + Request cookies. + >>> {"UUID": "xxxxxxx"} + :return: + """ return self._cookies @cookies.setter def cookies(self, attr): + """ + Set request cookies. + :param attr: + :return: + """ self._cookies = attr @property def files(self): + """ + Request files. + :return: + """ return self._files @files.setter def files(self, attr): + """ + Set files. + :param attr: + :return: + """ self._files = attr @property def auth(self): + """ + Request Authorization. + >>> {"username": "admin", "password": "admin"} + :return: + """ return self._auth @auth.setter def auth(self, attr): + """ + Set authorization. + :param attr: + :return: + """ self._auth = attr @property def json(self): + """ + Post json. + :return: + """ return self._json @json.setter def json(self, attr): + """ + Set json for post request. + :param attr: + :return: + """ self._json = attr @property def timeout(self): + """ + Request timeout. + :return: + """ return self._timeout @timeout.setter def timeout(self, attr): + """ + Set request timeout. + :param attr: + :return: + """ self._timeout = attr @property def proxies(self): + """ + Request proxies. + >>> {"http": "username:password@host:port", "https": "username:password@host:port"} + :return: + """ return self._proxies @proxies.setter def proxies(self, attr): + """ + Set proxies. + :param attr: + :return: + """ self._proxies = attr def is_http(self): + """ + Is http request. + :return: + """ return self._scheme == "http" def is_https(self): + """ + Is https request. + :return: + """ return self._scheme == "https" diff --git a/ja3requests/base/_response.py b/ja3requests/base/_response.py index eb9a62e..9b7917e 100644 --- a/ja3requests/base/_response.py +++ b/ja3requests/base/_response.py @@ -7,9 +7,11 @@ class BaseResponse: + """ + The basic response. + """ def __init__(self): - self._raw = None self._protocol_version = None self._status_code = None @@ -19,48 +21,106 @@ def __init__(self): @property def raw(self): + """Raw Response + Receive from remote connection. + >>> b"HTTP/1.1 200 OK..." + :return: + """ return self._raw @raw.setter def raw(self, attr): + """ + Set raw response. + :param attr: + :return: + """ self._raw = attr @property def protocol_version(self): + """ + Protocol Version + >>> b"HTTP/1.1" + :return: + """ return self._protocol_version @protocol_version.setter def protocol_version(self, attr): + """ + Set protocol version + :param attr: + :return: + """ self._protocol_version = attr @property def status_code(self): + """STATUS CODE + The response status code, e.g(200, 203, 400, 404...) + >>> b"200" + :return: + """ return self._status_code @status_code.setter def status_code(self, attr): + """ + Set response status code. + :param attr: + :return: + """ self._status_code = attr @property def status_text(self): + """ + Response status text. eg. HTTP/1.1 200 OK + >>> b"OK" + :return: + """ return self._status_text @status_text.setter def status_text(self, attr): + """ + Set response status text. + :param attr: + :return: + """ self._status_text = attr @property def headers(self): + """Headers + Response headers + :return: + """ return self._headers @headers.setter def headers(self, attr): + """ + Set response headers. + :param attr: + :return: + """ self._headers = attr @property def body(self): + """ + Response Body + :return: + """ return self._body @body.setter def body(self, attr): + """ + Set response body. + :param attr: + :return: + """ self._body = attr diff --git a/ja3requests/base/_sessions.py b/ja3requests/base/_sessions.py index bf318ce..ef09db9 100644 --- a/ja3requests/base/_sessions.py +++ b/ja3requests/base/_sessions.py @@ -8,7 +8,7 @@ class BaseSession: """ - The base request session. + The basic request session. """ def __init__(self): diff --git a/ja3requests/connections.py b/ja3requests/connections.py index 15141b0..740443d 100644 --- a/ja3requests/connections.py +++ b/ja3requests/connections.py @@ -10,20 +10,17 @@ from .exceptions import InvalidHost from .base import BaseHttpConnection from .protocol.sockets import create_connection -from .protocol.exceptions import SocketTimeout, ConnectTimeoutError, ReadTimeout - - -DEFAULT_HTTP_SCHEME = "http" -DEFAULT_HTTPS_SCHEME = "https" - -DEFAULT_HTTP_PORT = 80 -DEFAULT_HTTPS_PORT = 443 +from .const import DEFAULT_HTTP_SCHEME +from .const import DEFAULT_HTTP_PORT +from .protocol.exceptions import SocketTimeout, ConnectTimeoutError class HTTPConnection(BaseHttpConnection): + """ + HTTP connection. + """ def __init__(self): - super().__init__() self.scheme = DEFAULT_HTTP_SCHEME self.port = DEFAULT_HTTP_PORT @@ -44,10 +41,10 @@ def _new_conn(self): self.timeout, self.source_address, ) - except SocketTimeout: + except SocketTimeout as err: raise ConnectTimeoutError( f"Connection to {self.destination_address} timeout out. timeout={self.timeout}" - ) + ) from err return conn @@ -57,14 +54,27 @@ def _ready_connect(self, **kwargs): :param kwargs: :return: """ - if kwargs.get("scheme", None): - self.scheme = kwargs["scheme"] - - if kwargs.get("port", None): - self.port = kwargs["port"] - - if kwargs.get("source_address", None): - self.source_address = kwargs["source_address"] + self.scheme = kwargs["scheme"] if kwargs.get("scheme", None) else self.scheme + self.port = kwargs["port"] if kwargs.get("port", None) else self.port + self.source_address = ( + kwargs["source_address"] + if kwargs.get("source_address", None) + else self.source_address + ) + self.timeout = ( + kwargs["timeout"] if kwargs.get("timeout", None) else self.timeout + ) + self.proxy = kwargs["proxy"] if kwargs.get("proxy", None) else self.proxy + self.proxy_username = ( + kwargs["proxy_username"] + if kwargs.get("proxy_username", None) + else self.proxy_username + ) + self.proxy_password = ( + kwargs["proxy_password"] + if kwargs.get("proxy_password", None) + else self.proxy_password + ) if kwargs.get("host", None): host = kwargs["host"].replace("http://", "").split("/") @@ -78,32 +88,33 @@ def _ready_connect(self, **kwargs): else: self.destination_address = self.host else: - raise InvalidHost(f"Invalid Host: {kwargs['host']!r}, can not parse destination address or path.") - - if kwargs.get("timeout", None): - self.timeout = kwargs["timeout"] - - if kwargs.get("proxy", None): - self.proxy = kwargs["proxy"] - - if kwargs.get("proxy_username", None): - self.proxy_username = kwargs["proxy_username"] - - if kwargs.get("proxy_password", None): - self.proxy_password = kwargs["proxy_password"] + raise InvalidHost( + f"Invalid Host: {kwargs['host']!r}, can not parse destination address or path." + ) def connect( - self, - scheme=None, - port=None, - source_address=None, - host=None, - timeout=None, - proxy=None, - proxy_username=None, - proxy_password=None, + self, + scheme=None, + port=None, + source_address=None, + host=None, + timeout=None, + proxy=None, + proxy_username=None, + proxy_password=None, ): - + """ + Create an http connection. + :param scheme: + :param port: + :param source_address: + :param host: + :param timeout: + :param proxy: + :param proxy_username: + :param proxy_password: + :return: + """ self._ready_connect( scheme=scheme, port=port, @@ -122,44 +133,17 @@ def send(self, context): Send socket. :return: """ - self.connection.sendall( - context.message - ) + self.connection.sendall(context.message) - data = self.receive() - response = HTTPResponse(data) + response = HTTPResponse(sock=self.connection, method=context.method) response.begin() return response - # response_data = b"" - # # - # self.connection.settimeout(3) - # try: - # while True: - # data = self.connection.recv(2048) - # if not data: - # break - # response_data += data - # except TimeoutError: - # pass - # - # print(response_data) - # return response_data - - def receive(self): - - response_data = bytes() - while True: - data = self.connection.recv(2048) - if not data: - self.is_close = True - break - - response_data += data - yield response_data - def close(self): - + """ + Close connection. + :return: + """ if self.connection: self.connection.close() diff --git a/ja3requests/const.py b/ja3requests/const.py index 6f27e96..1fef383 100644 --- a/ja3requests/const.py +++ b/ja3requests/const.py @@ -31,6 +31,13 @@ def __setattr__(self, key, value): const = _Const() +const.MAX_LINE = 65536 +const.MAX_HEADERS = 100 +const.DEFAULT_CHUNKED_SIZE = 2048 +const.DEFAULT_HTTP_SCHEME = "http" +const.DEFAULT_HTTPS_SCHEME = "https" +const.DEFAULT_HTTP_PORT = 80 +const.DEFAULT_HTTPS_PORT = 443 const.DEFAULT_REDIRECT_LIMIT = 8 # max redirect sys.modules[__name__] = const diff --git a/ja3requests/context.py b/ja3requests/context.py index 74bbff5..1276415 100644 --- a/ja3requests/context.py +++ b/ja3requests/context.py @@ -14,9 +14,11 @@ class HTTPContext(BaseContext): + """ + HTTPContext + """ def __init__(self, connection): - super().__init__() self.protocol = DEFAULT_HTTP_CONTEXT_PROTOCOL self.version = DEFAULT_HTTP_VERSION @@ -24,27 +26,31 @@ def __init__(self, connection): @property def message(self): - self.start_line = " ".join( - [self.method, self.connection.path, self.version] - ) - self._message = "\r\n".join( - [self.start_line, self.put_headers()] - ) + """ + HTTP Context message to send + :return: + """ + self.start_line = " ".join([self.method, self.connection.path, self.version]) + self._message = "\r\n".join([self.start_line, self.put_headers()]) self._message += "\r\n\r\n" return self._message.encode() - def set_payload( - self, - **kwargs - ): - + def set_payload(self, **kwargs): + """ + Set context payload + :param kwargs: + :return: + """ for k, v in kwargs.items(): if hasattr(self, k): setattr(self, k, v) def put_headers(self): - + """ + Set context headers + :return: + """ headers = "" if self.headers is not None: if not self.headers.get("host", None): diff --git a/ja3requests/exceptions.py b/ja3requests/exceptions.py index d992349..573c7aa 100644 --- a/ja3requests/exceptions.py +++ b/ja3requests/exceptions.py @@ -62,4 +62,10 @@ class InvalidStatusLine(RequestException, ValueError): class InvalidResponseHeaders(RequestException, ValueError): """ Raised it when cant receive response headers. - """ \ No newline at end of file + """ + + +class IssueError(ValueError): + """ + This situation may not be considered yet, please issue it + """ diff --git a/ja3requests/protocol/exceptions.py b/ja3requests/protocol/exceptions.py index 9aa77bc..5fce210 100644 --- a/ja3requests/protocol/exceptions.py +++ b/ja3requests/protocol/exceptions.py @@ -13,7 +13,7 @@ class SocketException(Exception): class SocketTimeout(OSError): - """ Timeout expired. """ + """Timeout expired.""" class LocationParseError(SocketException, ValueError): @@ -37,4 +37,4 @@ class ConnectTimeoutError(SocketTimeoutError): class ReadTimeout(SocketTimeoutError): """ Raised when socket receive timeout. - """ \ No newline at end of file + """ diff --git a/ja3requests/protocol/sockets.py b/ja3requests/protocol/sockets.py index aa9bd45..77f3122 100644 --- a/ja3requests/protocol/sockets.py +++ b/ja3requests/protocol/sockets.py @@ -1,3 +1,4 @@ +# pylint: skip-file """ ja3requests.protocol.sockets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -10,11 +11,19 @@ def create_connection( - address, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, - socket_options=None, + address, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, + socket_options=None, ): + """ + Create a socket connection. + :param address: + :param timeout: + :param source_address: + :param socket_options: + :return: + """ if socket_options is None: socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] @@ -56,7 +65,6 @@ def create_connection( def _set_socket_options(sock, options): - if options is None: return @@ -65,7 +73,6 @@ def _set_socket_options(sock, options): def allowed_gai_family(): - family = socket.AF_INET if HAS_IPV6: family = socket.AF_UNSPEC diff --git a/ja3requests/request.py b/ja3requests/request.py index 9f51a6b..26c5512 100644 --- a/ja3requests/request.py +++ b/ja3requests/request.py @@ -4,29 +4,45 @@ This module create a request struct and ready request object. """ -from .base import BaseRequest -from .utils import default_headers -from .context import HTTPContext -from .connections import HTTPConnection -from .exceptions import NotAllowedRequestMethod, MissingScheme, NotAllowedScheme, InvalidParams + + import warnings from http.cookiejar import CookieJar from urllib.parse import urlparse, urlencode from typing import Any, AnyStr, Dict, List, Union, ByteString, Tuple +from .base import BaseRequest +from .utils import default_headers +from .context import HTTPContext +from .connections import HTTPConnection +from .exceptions import ( + NotAllowedRequestMethod, + MissingScheme, + NotAllowedScheme, + InvalidParams, +) class ReadyRequest(BaseRequest): + """ + Ready a request, e.g.(check url, check params) + """ def __init__( - self, - method: AnyStr, - url: AnyStr, - params: Union[Dict[Any, Any], List[Tuple[Any, Any]], Tuple[Tuple[Any, Any]], ByteString, AnyStr] = None, - data: Union[Dict[AnyStr, Any], List, Tuple, ByteString] = None, - headers: Dict[AnyStr, AnyStr] = None, - cookies: Union[Dict[AnyStr, AnyStr], CookieJar] = None, - auth: Tuple = None, - json: Dict[AnyStr, AnyStr] = None, + self, + method: AnyStr, + url: AnyStr, + params: Union[ + Dict[Any, Any], + List[Tuple[Any, Any]], + Tuple[Tuple[Any, Any]], + ByteString, + AnyStr, + ] = None, + data: Union[Dict[AnyStr, Any], List, Tuple, ByteString] = None, + headers: Dict[AnyStr, AnyStr] = None, + cookies: Union[Dict[AnyStr, AnyStr], CookieJar] = None, + auth: Tuple = None, + json: Dict[AnyStr, AnyStr] = None, ): super().__init__() self.method = method @@ -83,9 +99,7 @@ def ready_url(self): # Just allow http or https if parse.scheme not in ["http", "https"]: - raise NotAllowedScheme( - f"Schema: {parse.scheme} not allowed." - ) + raise NotAllowedScheme(f"Schema: {parse.scheme} not allowed.") self.scheme = parse.scheme if self.scheme == "https": @@ -144,7 +158,10 @@ def ready_headers(self): for k, v in self.headers.items(): header = k.lower() if header in header_list: - warnings.warn(f"Duplicate header: {k}, you should check the request headers.", RuntimeWarning) + warnings.warn( + f"Duplicate header: {k}, you should check the request headers.", + RuntimeWarning, + ) header_list.append(header) new_headers[header] = v @@ -186,7 +203,10 @@ def ready(self): self.ready_json() def request(self): - + """ + Create a Request object. + :return: + """ req = Request() req.clone(self) @@ -194,12 +214,19 @@ def request(self): class Request(BaseRequest): + """ + Request object to send. + """ def __repr__(self): return f"" def clone(self, ready_request: ReadyRequest): - + """ + Clone arguments from ReadyRequest + :param ready_request: + :return: + """ for k, v in ready_request.__dict__.items(): setattr(self, k, v) @@ -219,7 +246,7 @@ def send(self): self.timeout, proxy, proxy_username, - proxy_password + proxy_password, ) context = HTTPContext(conn) context.set_payload( @@ -241,9 +268,11 @@ def create_connect(self): elif self.is_https(): # TODO: HTTPS # conn = HTTPSConnection() - raise NotImplementedError("HTTPSConnection not implemented yet.") + raise NotImplementedError("HTTPS not implemented yet.") else: - raise MissingScheme(f"Scheme: {self.scheme}, parse scheme failed, can't create connection.") + raise MissingScheme( + f"Scheme: {self.scheme}, parse scheme failed, can't create connection." + ) return conn diff --git a/ja3requests/response.py b/ja3requests/response.py index 178617a..043cc88 100644 --- a/ja3requests/response.py +++ b/ja3requests/response.py @@ -6,94 +6,188 @@ """ +import json +import gzip +import zlib +import brotli from .base import BaseResponse -from .exceptions import InvalidStatusLine, InvalidResponseHeaders +from .const import MAX_LINE, MAX_HEADERS +from .exceptions import InvalidStatusLine, InvalidResponseHeaders, IssueError class HTTPResponse(BaseResponse): + """ + An HTTP response from socket connection. + """ - def __init__(self, response=None): + def __init__(self, sock, method=None): super().__init__() - self.response = response + self.fp = sock.makefile("rb") + self._method = method + self._chunked = False + self._content_encoding = None + self._content_length = 0 def __repr__(self): + return ( + f"" + ) + + def _close_conn(self): + fp = self.fp + self.fp = None + fp.close() + + def _read_status_line(self): + line = self.fp.readline(MAX_LINE + 1) + if len(line) > MAX_LINE: + raise InvalidStatusLine( + f"The status line is too long, exceeding the {MAX_LINE} Max limit" + ) - return f"" - - def _seek(self): - - if self.raw is not None and self.raw.endswith(b"\r\n\r\n"): - return self.raw + if not line: + raise InvalidStatusLine( + f"The remote servers return an invalid response status line: {line!r}" + ) - data = b"" try: - data = next(self.response) - self.raw = data - except StopIteration: - pass + protocol_version, status_code, status_text = line.split(None, 2) + self.protocol_version = protocol_version + self.status_code = status_code + self.status_text = status_text.strip() + except ValueError as err: + raise InvalidStatusLine(f"Can't parse status line: {line!r}") from err - return data + if not self.protocol_version.startswith(b"HTTP/"): + self._close_conn() + raise InvalidStatusLine(f"The status line version not support: {line!r}") - def _get_lines(self): + return protocol_version, status_code, status_text - lines = self._seek().split(b"\r\n", 1) - if len(lines) > 0: - self.protocol_version, self.status_code, self.status_text = status_lines = lines[0].split(b" ", 2) - else: - raise InvalidStatusLine(f"Invalid response status line: {lines!r}") + def _read_headers(self): + headers = [] + while True: + line = self.fp.readline(MAX_LINE + 1) + if len(line) > MAX_LINE: + raise InvalidResponseHeaders( + f"The response headers is too long, exceeding the {MAX_LINE} Max limit" + ) + + headers.append(line) + if len(headers) > MAX_HEADERS: + raise InvalidResponseHeaders( + f"The response headers is too long, exceeding the {MAX_LINE} Max limit" + ) + + if line in (b"\r\n", b"\n", b""): + headers.pop() + break - return status_lines + return headers - def _get_headers(self): + def _parse_headers(self, headers_list=None): + headers = {} + headers_list = headers_list if headers_list is not None else self.headers + if headers_list is None: + raise ValueError("Required headers to parse.") - lines = self._seek().split(b"\r\n\r\n", 1) - if len(lines) > 0: - self.headers = headers = lines[0].split(b"\r\n", 1)[1] - else: - raise InvalidResponseHeaders(f"Invalid response headers: {lines!r}") + self.headers = b"" + for header in headers_list[1:]: + self.headers += header + name, value = header.strip().split(b": ") + headers.setdefault(name.lower(), value) return headers - def _get_body(self): + def read_body(self): + """ + Read body from remote connection. + :return: + """ + body = b"" + + if self.fp is None: + return body - data = self._seek() - chunk = data.endswith(b"\r\n\r\n") - while not chunk: - data = self._seek() - chunk = data.endswith(b"\r\n\r\n") + if self._method == "HEAD": + self._close_conn() + return body - lines = data.split(b"\r\n\r\n", 1) - if len(lines) > 1: - if lines[1] == b"": - self.body = body = b"" - return body + if self._chunked: + body = self._read_chunked() - self.body = body = lines[1].split(b"\r\n", 1)[1] - else: - raise InvalidResponseHeaders(f"Invalid response headers: {lines!r}") + if self._content_length > 0: + body = self.fp.read(self._content_length) + + if self._content_encoding is not None or self._content_encoding != b"": + if self._content_encoding == b"gzip": + body = gzip.decompress(body) + elif self._content_encoding == b"deflate": + try: + body = zlib.decompress(body, -zlib.MAX_WBITS) + except zlib.error: + body = zlib.decompress(body) + elif self._content_encoding == b"br": + body = brotli.decompress(body) return body + def _read_chunked(self): + chunked_data = b"" + while True: + chunked_size = self.fp.readline(MAX_LINE + 1).strip() + if chunked_size == b"": + continue + if chunked_size == b"0": + break + size = int(chunked_size, 16) + chunked_data += self.fp.read(size) + + return chunked_data + def begin(self): + """ + Receive data from remote connection and begin parse message. + :return: + """ + if self.headers is not None: + return + + self._read_status_line() + self.headers = self._read_headers() + headers = self._parse_headers() + + self._content_encoding = headers.get(b"content-encoding", b"") + + transfer_encoding = headers.get(b"transfer-encoding", b"") + if transfer_encoding == b"chunked": + self._chunked = True + elif transfer_encoding != b"": + raise IssueError( + "This situation may not be considered yet, please issue it" + ) - self._get_lines() - self._get_headers() - self._get_body() + self._content_length = int(headers.get(b"content-length", 0)) class Response(BaseResponse): + """Response + + """ def __init__(self, response=None): super().__init__() self.response = response def __repr__(self): - return f"" @property def headers(self): - + """ + Response Headers. + :return: + """ headers = [] if self.response is None: return headers @@ -101,28 +195,32 @@ def headers(self): headers_raw = self.response.headers.decode() header_list = headers_raw.split("\r\n") for header_item in header_list: - name, value = header_item.split(":", 1) - headers.append( - { - name.strip(): value.strip() - } - ) + if header_item == "": + continue + name, value = header_item.split(": ", 1) + headers.append({name.strip(): value.strip()}) return headers @property def body(self): - + """ + Response Body. + :return: + """ body = b"" if self.response is None: return body - return self.response.body + return self.response.read_body() @property def status_code(self): - - status_code = 400 + """ + Response Status Code + :return: + """ + status_code = -1 if self.response is None: return status_code @@ -130,9 +228,23 @@ def status_code(self): @property def content(self): + """ + Response Content + :return: + """ + return self.body - content = self.body.split(b"\r\n\r\n", 1) - if len(content) > 0: - return content[0] - - return b"" + @property + def text(self): + """ + Response Text + :return: + """ + return self.content.decode("utf8") + + def json(self): + """ + Response JSON + :return: + """ + return json.loads(self.body) diff --git a/ja3requests/sessions.py b/ja3requests/sessions.py index 5964a22..72b0871 100644 --- a/ja3requests/sessions.py +++ b/ja3requests/sessions.py @@ -34,17 +34,7 @@ def __init__(self): self.headers = default_headers() self.max_redirects = DEFAULT_REDIRECT_LIMIT - def ready( - self, - method, - url, - params, - data, - headers, - cookies, - auth, - json - ): + def ready(self, method, url, params, data, headers, cookies, auth, json): """ Ready to send request. :return: @@ -102,7 +92,7 @@ def request( headers=headers, cookies=cookies, auth=auth, - json=json + json=json, ) req = ready_request.request() diff --git a/main.py b/main.py index 6aa60f0..f135ac2 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,74 @@ from ja3requests.sessions import Session + +headers = { + "connection": "keep-alive", + "Accept-Encoding": "deflate, br, gzip" +} with Session() as session: - response = session.get("http://www.baidu.com") + # response = session.get("http://127.0.0.1:8080", headers=headers) + # response = session.get("http://www.baidu.com", headers=headers) + response = session.get("http://www.aliyun.com") + print(response.headers) print(response) - print(response.status_code) - print(response.content) + # print(response.status_code) + # print(response.content) + print(response.text) + # print(response.json()) + + # with open("baidu.html", "w+") as f: + # f.write(response.text) + + +# import requests +# +# requests.get("http://127.0.0.1:8080", headers=headers) + +# import socket +# +# # create a socket object +# client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +# +# # connect to the server +# host = 'www.baidu.com' +# port = 80 +# client_socket.connect((host, port)) +# +# request = "GET / HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n".format(host) +# client_socket.send(request.encode()) +# +# response = b'' +# while True: +# data = client_socket.recv(1024) +# if not data: +# break +# response += data +# +# # decode the response headers +# response_headers = response.decode().split('\r\n') +# +# content_length = None +# transfer_encoding = None +# +# for header in response_headers: +# if 'Content-Length' in header: +# content_length = int(header.split(': ')[1]) +# elif 'Transfer-Encoding' in header: +# transfer_encoding = header.split(': ')[1] +# +# print(content_length, transfer_encoding) +# if transfer_encoding == 'chunked': +# response_body = b'' +# while True: +# chunk_size = int(response[:response.find(b'\r\n')], 16) +# if chunk_size == 0: +# break +# response = response[response.find(b'\r\n')+2:] +# response_body += response[:chunk_size] +# response = response[chunk_size+2:] +# else: +# # read the response body using content length +# response_body = client_socket.recv(content_length) +# +# print(response[len(response)-content_length:]) +# client_socket.close()