From aac36859fd5fba14fdd99b2873806956a3202b52 Mon Sep 17 00:00:00 2001 From: degangliu Date: Tue, 30 Nov 2021 15:44:31 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E5=AE=A1=E6=A0=B8sdk=20array=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E5=80=BC=E7=9A=84=E4=BC=98=E5=8C=96=E5=85=BC=E5=AE=B9?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qcloud_cos/cos_client.py | 120 ++++++++++++++++++++++++++++++++++++--- qcloud_cos/cos_comm.py | 17 +++++- 2 files changed, 128 insertions(+), 9 deletions(-) diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py index b000d856..0bb0ad13 100644 --- a/qcloud_cos/cos_client.py +++ b/qcloud_cos/cos_client.py @@ -511,7 +511,25 @@ def get_object_sensitive_content_recognition(self, Bucket, Key, DetectType, Inte params=params, headers=headers) + logging.debug("get object sensitive content recognition rsp:%s", rt.content) data = xml_to_dict(rt.content) + # format res + if 'PornInfo' in data: + if 'OcrResults' in data['PornInfo']: + format_dict_or_list(data['PornInfo']['OcrResults'], ['Keywords']) + format_dict(data['PornInfo'], ['OcrResults', 'ObjectResults']) + if 'TerroristInfo' in data: + if 'OcrResults' in data['TerroristInfo']: + format_dict_or_list(data['TerroristInfo']['OcrResults'], ['Keywords']) + format_dict(data['TerroristInfo'], ['OcrResults', 'ObjectResults']) + if 'PoliticsInfo' in data: + if 'OcrResults' in data['PoliticsInfo']: + format_dict_or_list(data['PoliticsInfo']['OcrResults'], ['Keywords']) + format_dict(data['PoliticsInfo'], ['OcrResults', 'ObjectResults']) + if 'AdsInfo' in data: + if 'OcrResults' in data['AdsInfo']: + format_dict_or_list(data['AdsInfo']['OcrResults'], ['Keywords']) + format_dict(data['AdsInfo'], ['OcrResults', 'ObjectResults']) return data @@ -4505,6 +4523,7 @@ def ci_auditing_submit_common(self, Bucket, Key, DetectType, Type, Url=None, Biz params=params, headers=headers) + logging.debug("ci auditing rsp:%s", rt.content) data = xml_to_dict(rt.content) return data @@ -4557,6 +4576,7 @@ def ci_auditing_query_common(self, Bucket, Type, JobID, **kwargs): params=params, headers=headers) + logging.debug("query ci auditing:%s", rt.content) data = xml_to_dict(rt.content) return data @@ -4641,13 +4661,50 @@ def ci_auditing_video_query(self, Bucket, JobID, **kwargs): print response """ - return self.ci_auditing_query_common( + data = self.ci_auditing_query_common( Bucket=Bucket, JobID=JobID, Type='video', **kwargs ) + if 'JobsDetail' in data: + format_dict(data['JobsDetail'], ['Snapshot', 'AudioSection']) + if 'Snapshot' in data['JobsDetail']: + for snapshot in data['JobsDetail']['Snapshot']: + if 'PornInfo' in snapshot: + format_dict(snapshot['PornInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in snapshot['PornInfo']: + for ocrResult in snapshot['PornInfo']['OcrResults']: + format_dict(ocrResult, ['Keywords']) + if 'TerrorismInfo' in snapshot: + format_dict(snapshot['TerrorismInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in snapshot['TerrorismInfo']: + for ocrResult in snapshot['TerrorismInfo']['OcrResults']: + format_dict(ocrResult, ['Keywords']) + if 'PoliticsInfo' in snapshot: + format_dict(snapshot['PoliticsInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in snapshot['PoliticsInfo']: + for ocrResult in snapshot['PoliticsInfo']['OcrResults']: + format_dict(ocrResult, ['Keywords']) + if 'AdsInfo' in snapshot: + format_dict(snapshot['AdsInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in snapshot['AdsInfo']: + for ocrResult in snapshot['AdsInfo']['OcrResults']: + format_dict(ocrResult, ['Keywords']) + if 'AudioSection' in data['JobsDetail']: + for audioSection in data['JobsDetail']['AudioSection']: + if 'PornInfo' in audioSection: + format_dict(audioSection['PornInfo'], ['Keywords']) + if 'TerrorismInfo' in audioSection: + format_dict(audioSection['TerrorismInfo'], ['Keywords']) + if 'PoliticsInfo' in audioSection: + format_dict(audioSection['PoliticsInfo'], ['Keywords']) + if 'AdsInfo' in audioSection: + format_dict(audioSection['AdsInfo'], ['Keywords']) + + return data + def ci_auditing_audio_submit(self, Bucket, Key, DetectType, Url=None, Callback=None, CallbackVersion='Simple', BizType=None, **kwargs): """提交音频审核任务接口 https://cloud.tencent.com/document/product/460/53395 @@ -4714,12 +4771,26 @@ def ci_auditing_audio_query(self, Bucket, JobID, **kwargs): print response """ - return self.ci_auditing_query_common( + data = self.ci_auditing_query_common( Bucket=Bucket, JobID=JobID, Type='audio', **kwargs ) + if 'JobsDetail' in data: + format_dict(data['JobsDetail'], ['Section']) + if 'Section' in data['JobsDetail']: + for section in data['JobsDetail']['Section']: + if 'PornInfo' in section: + format_dict(section['PornInfo'], ['Keywords']) + if 'TerrorismInfo' in section: + format_dict(section['TerrorismInfo'], ['Keywords']) + if 'PoliticsInfo' in section: + format_dict(section['PoliticsInfo'], ['Keywords']) + if 'AdsInfo' in section: + format_dict(section['AdsInfo'], ['Keywords']) + + return data def ci_auditing_text_submit(self, Bucket, Key, DetectType, Content=None, Callback=None, BizType=None, **kwargs): """提交文本审核任务接口 https://cloud.tencent.com/document/product/460/56285 @@ -4758,7 +4829,7 @@ def ci_auditing_text_submit(self, Bucket, Key, DetectType, Content=None, Callbac if Callback: conf['Callback'] = Callback - return self.ci_auditing_submit_common( + data = self.ci_auditing_submit_common( Bucket=Bucket, Key=Key, Type='text', @@ -4769,6 +4840,11 @@ def ci_auditing_text_submit(self, Bucket, Key, DetectType, Content=None, Callbac **kwargs ) + if 'JobsDetail' in data: + format_dict(data['JobsDetail'], ['Section']) + + return data + def ci_auditing_text_query(self, Bucket, JobID, **kwargs): """查询文本审核任务接口 https://cloud.tencent.com/document/product/460/56284 @@ -4789,19 +4865,23 @@ def ci_auditing_text_query(self, Bucket, JobID, **kwargs): print response """ - return self.ci_auditing_query_common( + data = self.ci_auditing_query_common( Bucket=Bucket, JobID=JobID, Type='text', **kwargs ) + if 'JobsDetail' in data: + format_dict(data['JobsDetail'], ['Section']) + return data - def ci_auditing_document_submit(self, Bucket, Url, DetectType, Type=None, Callback=None, BizType=None, **kwargs): + def ci_auditing_document_submit(self, Bucket, Url, DetectType, Key=None, Type=None, Callback=None, BizType=None, **kwargs): """提交文档审核任务接口 https://cloud.tencent.com/document/product/460/59380 :param Bucket(string): 存储桶名称. :param Url(string): 文档文件的链接地址,例如 http://www.example.com/doctest.doc :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads + :param Key(string): 存储在 COS 存储桶中的文件名称,例如在目录 test 中的文件test.doc,则文件名称为 test/test. Key 和 Url 只能选择其中一种。 :param Type(string): 指定文档文件的类型,如未指定则默认以文件的后缀为类型。 如果文件没有后缀,该字段必须指定,否则会审核失败。例如:doc、docx、ppt、pptx 等 :param Callback(string): 回调地址,以http://或者https://开头的地址。 @@ -4822,7 +4902,11 @@ def ci_auditing_document_submit(self, Bucket, Url, DetectType, Type=None, Callba print response """ - Input = {'Url': Url} + Input = {} + if Url is not None: + Input['Url'] = Url + if Key is not None: + Input['Object'] = Key if Type: Input['Type'] = Type @@ -4863,13 +4947,35 @@ def ci_auditing_document_query(self, Bucket, JobID, **kwargs): print response """ - return self.ci_auditing_query_common( + data = self.ci_auditing_query_common( Bucket=Bucket, JobID=JobID, Type='document', **kwargs ) + if 'JobsDetail' in data and 'PageSegment' in data['JobsDetail'] and 'Results' in data['JobsDetail']['PageSegment']: + format_dict(data['JobsDetail']['PageSegment'], ['Results']) + for resultsItem in data['JobsDetail']['PageSegment']['Results']: + if 'PornInfo' in resultsItem: + format_dict(resultsItem['PornInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in resultsItem['PornInfo']: + format_dict_or_list(resultsItem['PornInfo']['OcrResults'], ['Keywords']) + if 'TerrorismInfo' in resultsItem: + format_dict(resultsItem['TerrorismInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in resultsItem['TerrorismInfo']: + format_dict_or_list(resultsItem['TerrorismInfo']['OcrResults'], ['Keywords']) + if 'PoliticsInfo' in resultsItem: + format_dict(resultsItem['PoliticsInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in resultsItem['PoliticsInfo']: + format_dict_or_list(resultsItem['PoliticsInfo']['OcrResults'], ['Keywords']) + if 'AdsInfo' in resultsItem: + format_dict(resultsItem['AdsInfo'], ['OcrResults', 'ObjectResults']) + if 'OcrResults' in resultsItem['AdsInfo']: + format_dict_or_list(resultsItem['AdsInfo']['OcrResults'], ['Keywords']) + + return data + def ci_get_media_queue(self, Bucket, **kwargs): """查询媒体处理队列接口 https://cloud.tencent.com/document/product/436/54045 diff --git a/qcloud_cos/cos_comm.py b/qcloud_cos/cos_comm.py index 35cb9acc..fe165bf7 100644 --- a/qcloud_cos/cos_comm.py +++ b/qcloud_cos/cos_comm.py @@ -12,7 +12,7 @@ import xml.dom.minidom import xml.etree.ElementTree from datetime import datetime -from dicttoxml import dicttoxml +from dicttoxml import dicttoxml, unicode_me from .xml2dict import Xml2Dict from .cos_exception import CosClientError from .cos_exception import CosServiceError @@ -396,13 +396,26 @@ def format_dict(data, key_lst): return data for key in key_lst: # 将dict转为list,保持一致 - if key in data and (isinstance(data[key], dict) or isinstance(data[key], str)): + if key in data and (isinstance(data[key], dict) or isinstance(data[key], string_types)): lst = [] lst.append(data[key]) data[key] = lst return data +def format_dict_or_list(data, key_lst): + """转换返回dict或list中的可重复字段为list""" + if not ((isinstance(data, list) or isinstance(data, dict)) and isinstance(key_lst, list)): + return data + if isinstance(data, dict): + return format_dict(data, key_lst) + + for data_item in data: + format_dict(data_item, key_lst) + + return data + + def decode_result(data, key_lst, multi_key_list): """decode结果中的字段""" for key in key_lst: From fba9f463c289ce57f5bb177f0b6528c88f756808 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 30 Nov 2021 15:52:43 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E8=AF=AF=E6=B7=BB=E5=8A=A0=E7=9A=84?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E5=88=A0=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qcloud_cos/cos_comm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qcloud_cos/cos_comm.py b/qcloud_cos/cos_comm.py index fe165bf7..7d5f7360 100644 --- a/qcloud_cos/cos_comm.py +++ b/qcloud_cos/cos_comm.py @@ -12,7 +12,7 @@ import xml.dom.minidom import xml.etree.ElementTree from datetime import datetime -from dicttoxml import dicttoxml, unicode_me +from dicttoxml import dicttoxml from .xml2dict import Xml2Dict from .cos_exception import CosClientError from .cos_exception import CosServiceError From 09cb73bccbec047939db64f1022fdee288060ab7 Mon Sep 17 00:00:00 2001 From: degangliu Date: Fri, 3 Dec 2021 10:42:10 +0800 Subject: [PATCH 3/4] detect type add abuse and illegal --- qcloud_cos/cos_client.py | 2 +- qcloud_cos/cos_comm.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py index 0bb0ad13..d508a3c4 100644 --- a/qcloud_cos/cos_client.py +++ b/qcloud_cos/cos_client.py @@ -4452,7 +4452,7 @@ def ci_auditing_submit_common(self, Bucket, Key, DetectType, Type, Url=None, Biz :param Bucket(string): 存储桶名称. :param Key(string): COS路径. - :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads + :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads, 16: Ileegal, 32:Abuse :param Type(string): 审核类型,video:视频,text:文本,audio:音频,docment:文档。 :param Url(string): Url, 支持非cos上的文件 :param Conf(dic): 审核的个性化配置 diff --git a/qcloud_cos/cos_comm.py b/qcloud_cos/cos_comm.py index 7d5f7360..b8afd8ee 100644 --- a/qcloud_cos/cos_comm.py +++ b/qcloud_cos/cos_comm.py @@ -484,6 +484,8 @@ class CiDetectType(): TERRORIST = 2 POLITICS = 4 ADS = 8 + ILLEGAL = 16 + ABUSE = 32 @staticmethod def get_detect_type_str(DetectType): @@ -503,6 +505,14 @@ def get_detect_type_str(DetectType): if len(detect_type) > 0: detect_type += ',' detect_type += 'Ads' + if DetectType & CiDetectType.ILLEGAL > 0: + if len(detect_type) > 0: + detect_type += ',' + detect_type += 'Illegal' + if DetectType & CiDetectType.ABUSE > 0: + if len(detect_type) > 0: + detect_type += ',' + detect_type += 'Abuse' return detect_type From 7f1926d8de177979825d4b218f775f12afba5e06 Mon Sep 17 00:00:00 2001 From: degangliu Date: Fri, 3 Dec 2021 10:44:07 +0800 Subject: [PATCH 4/4] some text modify --- qcloud_cos/cos_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py index d508a3c4..ca8d5a6a 100644 --- a/qcloud_cos/cos_client.py +++ b/qcloud_cos/cos_client.py @@ -4452,7 +4452,7 @@ def ci_auditing_submit_common(self, Bucket, Key, DetectType, Type, Url=None, Biz :param Bucket(string): 存储桶名称. :param Key(string): COS路径. - :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads, 16: Ileegal, 32:Abuse + :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads, 16: Illegal, 32:Abuse :param Type(string): 审核类型,video:视频,text:文本,audio:音频,docment:文档。 :param Url(string): Url, 支持非cos上的文件 :param Conf(dic): 审核的个性化配置