From 6e8c1aa82b7271dad78b909f2eeb844b6ee5ff76 Mon Sep 17 00:00:00 2001 From: tiedu Date: Fri, 2 Nov 2018 12:02:03 +0800 Subject: [PATCH 1/5] Support chunk upload --- qcloud_cos/cos_client.py | 6 ++--- qcloud_cos/cos_comm.py | 54 ++++++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py index 2e932a47..2f8c3e2d 100644 --- a/qcloud_cos/cos_client.py +++ b/qcloud_cos/cos_client.py @@ -260,7 +260,6 @@ def put_object(self, Bucket, Body, Key, EnableMD5=False, **kwargs): logger.info("put object, url=:{url} ,headers=:{headers}".format( url=url, headers=headers)) - Body = deal_with_empty_file_stream(Body) if EnableMD5: md5_str = get_content_md5(Body) if md5_str: @@ -639,7 +638,6 @@ def upload_part(self, Bucket, Key, Body, PartNumber, UploadId, EnableMD5=False, url=url, headers=headers, params=params)) - Body = deal_with_empty_file_stream(Body) if EnableMD5: md5_str = get_content_md5(Body) if md5_str: @@ -2480,19 +2478,19 @@ def append_object(self, Bucket, Key, Position, Data, **kwargs): :kwargs(dict): 设置上传的headers. :return(dict): 上传成功返回的结果,包含ETag等信息. """ + check_object_content_length(Data) headers = mapped(kwargs) params = {'append': '', 'position': Position} url = self._conf.uri(bucket=Bucket, path=Key) logger.info("append object, url=:{url} ,headers=:{headers}".format( url=url, headers=headers)) - Body = deal_with_empty_file_stream(Data) rt = self.send_request( method='POST', url=url, bucket=Bucket, auth=CosS3Auth(self._conf._secret_id, self._conf._secret_key, Key, params=params), - data=Body, + data=Data, headers=headers, params=params) response = rt.headers diff --git a/qcloud_cos/cos_comm.py b/qcloud_cos/cos_comm.py index d1d30a45..f5ad6029 100644 --- a/qcloud_cos/cos_comm.py +++ b/qcloud_cos/cos_comm.py @@ -16,7 +16,8 @@ from .cos_exception import CosClientError from .cos_exception import CosServiceError -SINGLE_UPLOAD_LENGTH = 5*1024*1024*1024 # 单次上传文件最大为5G +SINGLE_UPLOAD_LENGTH = 5*1024*1024*1024 # 单次上传文件最大为5GB +DEFAULT_CHUNK_SIZE = 1024*1024 # 计算MD5值时,文件单次读取的块大小为1MB LOGGING_UIN = 'id="qcs::cam::uin/100001001014:uin/100001001014"' # kwargs中params到http headers的映射 maplist = { @@ -102,9 +103,20 @@ def get_content_md5(body): return get_md5(body) elif hasattr(body, 'tell') and hasattr(body, 'seek') and hasattr(body, 'read'): file_position = body.tell() # 记录文件当前位置 - md5_str = get_md5(body.read()) - body.seek(file_position) # 恢复初始的文件位置 + # avoid OOM + md5 = hashlib.md5('') + chunk = body.read(DEFAULT_CHUNK_SIZE) + while chunk: + md5.update(chunk) + chunk = body.read(DEFAULT_CHUNK_SIZE) + md5_str = base64.standard_b64encode(md5.digest()) + try: + body.seek(file_position) # 恢复初始的文件位置 + except Exception as e: + raise CosClientError('seek unsupported to calculate md5!') return md5_str + else: + raise CosClientError('unsupported body type to calculate md5!') return None @@ -328,35 +340,35 @@ def gen_copy_source_range(begin_range, end_range): return range +def get_file_like_object_length(data): + try: + total_length = os.fstat(data.fileno()).st_size + except IOError: + if hasattr(data, '__len__'): + total_length = len(data) + else: + # support BytesIO file-like object + total_length = len(data.getvalue()) + current_position = data.tell() + content_len = total_length - current_position + return content_len + + def check_object_content_length(data): """put_object接口和upload_part接口的文件大小不允许超过5G""" content_len = 0 if type(data) is string_types: content_len = len(to_bytes(data)) elif hasattr(data, 'fileno') and hasattr(data, 'tell'): - fileno = data.fileno() - total_length = os.fstat(fileno).st_size - current_position = data.tell() - content_len = total_length - current_position + content_len = get_file_like_object_length(data) + else: + # can not get the content-length, use chunked to upload the file + pass if content_len > SINGLE_UPLOAD_LENGTH: raise CosClientError('The object size you upload can not be larger than 5GB in put_object or upload_part') return None -def deal_with_empty_file_stream(data): - """对于文件流的剩余长度为0的情况下,返回空字节流""" - if hasattr(data, 'fileno') and hasattr(data, 'tell'): - try: - fileno = data.fileno() - total_length = os.fstat(fileno).st_size - current_position = data.tell() - if total_length - current_position == 0: - return b"" - except io.UnsupportedOperation: - return b"" - return data - - def format_dict(data, key_lst): """转换返回dict中的可重复字段为list""" for key in key_lst: From c2827fb9903db97dc6ceffb2cd2d412565cdcd60 Mon Sep 17 00:00:00 2001 From: tiedu Date: Fri, 2 Nov 2018 13:32:58 +0800 Subject: [PATCH 2/5] Fix md5 calc --- qcloud_cos/cos_comm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qcloud_cos/cos_comm.py b/qcloud_cos/cos_comm.py index f5ad6029..ccdf4c59 100644 --- a/qcloud_cos/cos_comm.py +++ b/qcloud_cos/cos_comm.py @@ -104,7 +104,7 @@ def get_content_md5(body): elif hasattr(body, 'tell') and hasattr(body, 'seek') and hasattr(body, 'read'): file_position = body.tell() # 记录文件当前位置 # avoid OOM - md5 = hashlib.md5('') + md5 = hashlib.md5() chunk = body.read(DEFAULT_CHUNK_SIZE) while chunk: md5.update(chunk) From 26ac40857bf7785bc881c3721ab0cac3394652cb Mon Sep 17 00:00:00 2001 From: tiedu Date: Fri, 2 Nov 2018 21:37:03 +0800 Subject: [PATCH 3/5] modify ut for chunked upload and add param for autocompress --- qcloud_cos/cos_client.py | 62 ++++++++++++++++++++++++++++++++-------- qcloud_cos/streambody.py | 18 +++++++++--- ut/test.py | 29 ++++++++++++++++++- 3 files changed, 92 insertions(+), 17 deletions(-) diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py index 81330a9a..a6276f80 100644 --- a/qcloud_cos/cos_client.py +++ b/qcloud_cos/cos_client.py @@ -12,7 +12,7 @@ import xml.etree.ElementTree from requests import Request, Session from datetime import datetime -from six.moves.urllib.parse import quote, unquote +from six.moves.urllib.parse import quote, unquote, urlencode from hashlib import md5 from dicttoxml import dicttoxml from .streambody import StreamBody @@ -130,6 +130,16 @@ def set_ip_port(self, IP, Port=None): self._ip = to_unicode(IP) self._port = Port + def set_credential(self, SecretId, SecretKey, Token=None): + """设置访问的身份,包括secret_id,secret_key,临时秘钥token默认为空 + :param SecretId(string): 秘钥SecretId. + :param SecretKey(string): 秘钥SecretKey. + :param Token(string): 临时秘钥使用的token. + """ + self._secret_id = to_unicode(SecretId) + self._secret_key = to_unicode(SecretKey) + self._token = to_unicode(Token) + class CosS3Client(object): """cos客户端类,封装相应请求""" @@ -164,7 +174,7 @@ def get_auth(self, Method, Bucket, Key, Expired=300, Headers={}, Params={}): client = CosS3Client(config) # 获取上传请求的签名 auth_string = client.get_auth( - Method='PUT' + Method='PUT,' Bucket='bucket', Key='test.txt', Expired=600, @@ -289,7 +299,7 @@ def get_object(self, Bucket, Key, **kwargs): client = CosS3Client(config) # 下载cos上的文件到本地 response = client.get_object( - Bucket='bucket' + Bucket='bucket', Key='test.txt' ) response['Body'].get_stream_to_file('local_file.txt') @@ -328,12 +338,43 @@ def get_object(self, Bucket, Key, **kwargs): return response - def get_presigned_download_url(self, Bucket, Key, Expired=300): + def get_presigned_url(self, Bucket, Key, Method, Expired=300, Params={}, Headers={}): + """生成预签名的url + + :param Bucket(string): 存储桶名称. + :param Key(string): COS路径. + :param Method(string): HTTP请求的方法, 'PUT'|'POST'|'GET'|'DELETE'|'HEAD' + :param Expired(int): 签名过期时间. + :param Params(dict): 签入签名的参数 + :param Headers(dict): 签入签名的头部 + :return(string): 预先签名的URL. + + .. code-block:: python + + config = CosConfig(Region=region, SecretId=secret_id, SecretKey=secret_key, Token=token) # 获取配置对象 + client = CosS3Client(config) + # 获取预签名链接 + response = client.get_presigned_download_url( + Bucket='bucket', + Key='test.txt', + Method='PUT' + ) + """ + url = self._conf.uri(bucket=Bucket, path=Key) + sign = self.get_auth(Method=Method, Bucket=Bucket, Key=Key, Expired=Expired, Headers=Headers, Params=Params) + url = url + '?sign=' + quote(sign) + if Params: + url = url + '&' + urlencode(Params) + return url + + def get_presigned_download_url(self, Bucket, Key, Expired=300, Params={}, Headers={}): """生成预签名的下载url :param Bucket(string): 存储桶名称. :param Key(string): COS路径. :param Expired(int): 签名过期时间. + :param Params(dict): 签入签名的参数 + :param Headers(dict): 签入签名的头部 :return(string): 预先签名的下载URL. .. code-block:: python @@ -342,14 +383,11 @@ def get_presigned_download_url(self, Bucket, Key, Expired=300): client = CosS3Client(config) # 获取预签名文件下载链接 response = client.get_presigned_download_url( - Bucket='bucket' + Bucket='bucket', Key='test.txt' ) """ - url = self._conf.uri(bucket=Bucket, path=Key) - sign = self.get_auth(Method='GET', Bucket=Bucket, Key=Key, Expired=Expired) - url = url + '?sign=' + quote(sign) - return url + return self.get_presigned_url(Bucket, Key, 'GET', Expired, Params, Headers) def delete_object(self, Bucket, Key, **kwargs): """单文件删除接口 @@ -365,7 +403,7 @@ def delete_object(self, Bucket, Key, **kwargs): client = CosS3Client(config) # 删除一个文件 response = client.delete_object( - Bucket='bucket' + Bucket='bucket', Key='test.txt' ) """ @@ -413,7 +451,7 @@ def delete_objects(self, Bucket, Delete={}, **kwargs): ] } response = client.delete_objects( - Bucket='bucket' + Bucket='bucket', Delete=objects ) """ @@ -454,7 +492,7 @@ def head_object(self, Bucket, Key, **kwargs): client = CosS3Client(config) # 查询文件属性 response = client.head_object( - Bucket='bucket' + Bucket='bucket', Key='test.txt' ) """ diff --git a/qcloud_cos/streambody.py b/qcloud_cos/streambody.py index 0330eb7a..65e5fbf5 100644 --- a/qcloud_cos/streambody.py +++ b/qcloud_cos/streambody.py @@ -11,7 +11,7 @@ def get_raw_stream(self): def get_stream(self, chunk_size=1024): return self._rt.iter_content(chunk_size=chunk_size) - def get_stream_to_file(self, file_name): + def get_stream_to_file(self, file_name, auto_decompress=False): use_chunked = False if 'Content-Length' in self._rt.headers: content_len = int(self._rt.headers['Content-Length']) @@ -19,12 +19,22 @@ def get_stream_to_file(self, file_name): use_chunked = True else: raise IOError("download failed without Content-Length header or Transfer-Encoding header") + use_encoding = False + if 'Content-Encoding' in self._rt.headers: + use_encoding = True file_len = 0 with open(file_name, 'wb') as fp: - for chunk in self._rt.iter_content(chunk_size=1024): - if chunk: + if use_encoding and not auto_decompress: + chunk = self._rt.raw.read(1024) + while chunk: file_len += len(chunk) fp.write(chunk) - if not use_chunked and file_len != content_len: + chunk = self._rt.raw.read(1024) + else: + for chunk in self._rt.iter_content(chunk_size=1024): + if chunk: + file_len += len(chunk) + fp.write(chunk) + if not use_chunked and not (use_encoding and auto_decompress) and file_len != content_len: raise IOError("download failed with incomplete file") diff --git a/ut/test.py b/ut/test.py index 8b67c6bf..a507e89e 100644 --- a/ut/test.py +++ b/ut/test.py @@ -94,7 +94,7 @@ def test_put_get_delete_object_10MB(): ) assert etag == put_response['ETag'] # head object - head_response = client.get_object( + head_response = client.head_object( Bucket=test_bucket, Key=file_name ) @@ -869,6 +869,31 @@ def test_put_get_bucket_policy(): ) +def test_put_file_like_object(): + """利用BytesIo来模拟文件上传""" + import io + input = io.BytesIO(b"123456") + rt = client.put_object( + Bucket=test_bucket, + Key='test_file_like_object', + Body=input, + EnableMD5=True + ) + assert rt + + +def test_put_chunked_object(): + """利用BytesIo来模拟文件上传""" + import requests + input = requests.get(client.get_presigned_download_url(test_bucket, test_object)) + rt = client.put_object( + Bucket=test_bucket, + Key='test_chunked_object', + Body=input + ) + assert rt + + if __name__ == "__main__": setUp() test_put_object_enable_md5() @@ -886,4 +911,6 @@ def test_put_get_bucket_policy(): test_put_get_bucket_logging() test_put_get_delete_website() test_put_get_bucket_policy() + test_put_file_like_object() + test_put_chunked_object() tearDown() From 7a4047e977e47cfe8bf2710e787495a2da8df18d Mon Sep 17 00:00:00 2001 From: tiedu Date: Mon, 5 Nov 2018 11:26:38 +0800 Subject: [PATCH 4/5] release 1.6.1 --- qcloud_cos/cos_client.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py index a6276f80..ea2b8afe 100644 --- a/qcloud_cos/cos_client.py +++ b/qcloud_cos/cos_client.py @@ -193,7 +193,7 @@ def send_request(self, method, url, bucket, timeout=30, **kwargs): if self._conf._timeout is not None: # 用户自定义超时时间 timeout = self._conf._timeout - kwargs['headers']['User-Agent'] = 'cos-python-sdk-v5.1.5.7' + kwargs['headers']['User-Agent'] = 'cos-python-sdk-v5.1.6.1' if self._conf._token is not None: kwargs['headers']['x-cos-security-token'] = self._conf._token if bucket is not None: diff --git a/setup.py b/setup.py index 852f787f..0bed04a1 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def long_description(): setup( name='cos-python-sdk-v5', - version='1.5.7', + version='1.6.1', url='https://www.qcloud.com/', license='MIT', author='tiedu, lewzylu, channingliu', From 100ba9be8b03369ae434d4661b4353259f88812c Mon Sep 17 00:00:00 2001 From: tiedu Date: Mon, 5 Nov 2018 12:00:15 +0800 Subject: [PATCH 5/5] modify ut and sample --- qcloud_cos/cos_client.py | 4 ++-- qcloud_cos/cos_comm.py | 2 +- ut/test.py | 17 ++++++++++++++++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py index ea2b8afe..3e447604 100644 --- a/qcloud_cos/cos_client.py +++ b/qcloud_cos/cos_client.py @@ -174,7 +174,7 @@ def get_auth(self, Method, Bucket, Key, Expired=300, Headers={}, Params={}): client = CosS3Client(config) # 获取上传请求的签名 auth_string = client.get_auth( - Method='PUT,' + Method='PUT', Bucket='bucket', Key='test.txt', Expired=600, @@ -354,7 +354,7 @@ def get_presigned_url(self, Bucket, Key, Method, Expired=300, Params={}, Headers config = CosConfig(Region=region, SecretId=secret_id, SecretKey=secret_key, Token=token) # 获取配置对象 client = CosS3Client(config) # 获取预签名链接 - response = client.get_presigned_download_url( + response = client.get_presigned_url( Bucket='bucket', Key='test.txt', Method='PUT' diff --git a/qcloud_cos/cos_comm.py b/qcloud_cos/cos_comm.py index ccdf4c59..b4d9bed1 100644 --- a/qcloud_cos/cos_comm.py +++ b/qcloud_cos/cos_comm.py @@ -107,7 +107,7 @@ def get_content_md5(body): md5 = hashlib.md5() chunk = body.read(DEFAULT_CHUNK_SIZE) while chunk: - md5.update(chunk) + md5.update(to_bytes(chunk)) chunk = body.read(DEFAULT_CHUNK_SIZE) md5_str = base64.standard_b64encode(md5.digest()) try: diff --git a/ut/test.py b/ut/test.py index a507e89e..ddc265a9 100644 --- a/ut/test.py +++ b/ut/test.py @@ -883,7 +883,7 @@ def test_put_file_like_object(): def test_put_chunked_object(): - """利用BytesIo来模拟文件上传""" + """支持网络流来支持chunk上传""" import requests input = requests.get(client.get_presigned_download_url(test_bucket, test_object)) rt = client.put_object( @@ -894,6 +894,21 @@ def test_put_chunked_object(): assert rt +def test_put_get_gzip_file(): + """上传文件时,带上ContentEncoding,下载时默认不解压""" + rt = client.put_object( + Bucket=test_bucket, + Key='test_gzip_file', + Body='123456', + ContentEncoding='gzip', + ) + rt = client.get_object( + Bucket=test_bucket, + Key='test_gzip_file' + ) + rt['Body'].get_stream_to_file('test_gzip_file.local') + + if __name__ == "__main__": setUp() test_put_object_enable_md5()