|
| 1 | +/* Copyright 2017 R. Thomas |
| 2 | + * Copyright 2017 Quarkslab |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +#include "encoding.hpp" |
| 18 | + |
| 19 | +py::object safe_string_converter(const std::string& str) { |
| 20 | + auto global = py::dict(py::module::import("__main__").attr("__dict__")); |
| 21 | + auto local = py::dict(); |
| 22 | + py::bytes name_bytes = py::bytes(str); |
| 23 | + local["name_bytes"] = name_bytes; |
| 24 | + |
| 25 | +#if PY_MAJOR_VERSION >= 3 |
| 26 | + py::eval<py::eval_statements>(R"( |
| 27 | +encodings = ["big5", "big5hkscs", "cp037", "cp424", "cp437", "cp500", "cp737", "cp775", "cp850", "cp852", "cp855", |
| 28 | + "cp856", "cp857", "cp860", "cp861", "cp862", "cp863", "cp864", "cp865", "cp866", "cp869", "cp874", "cp875", "cp932", "cp949", |
| 29 | + "cp950", "cp1006", "cp1026", "cp1140", "cp1250", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", |
| 30 | + "euc_jp", "euc_jis_2004", "euc_jisx0213", "euc_kr", "gb2312", "gbk", "gb18030", "hz", "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", |
| 31 | + "iso2022_jp_2004", "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", "latin_1", "iso8859_2", "iso8859_3", "iso8859_4", "iso8859_5", |
| 32 | + "iso8859_6", "iso8859_7", "iso8859_8", "iso8859_9", "iso8859_10", "iso8859_13", "iso8859_14", "iso8859_15", "johab", "koi8_r", "koi8_u", |
| 33 | + "mac_cyrillic", "mac_greek", "mac_iceland", "mac_latin2", "mac_roman", "mac_turkish", "ptcp154", "shift_jis", "shift_jis_2004", |
| 34 | + "shift_jisx0213", "utf_32", "utf_32_be", "utf_32_le", "utf_16", "utf_16_be", "utf_16_le", "utf_7", "utf_8_sig" ] |
| 35 | +for e in encodings: |
| 36 | + try: |
| 37 | + name_str = name_bytes.decode(e) |
| 38 | + break |
| 39 | + except (UnicodeEncodeError, UnicodeDecodeError) as e: |
| 40 | + continue |
| 41 | +name_str = name_bytes.decode('ascii', 'backslashreplace') |
| 42 | + )", global, local); |
| 43 | +#else |
| 44 | + py::eval<py::eval_statements>(R"( |
| 45 | +def handler(err): |
| 46 | + start = err.start |
| 47 | + end = err.end |
| 48 | + return (u"".join([u"\\x{0:02x}".format(ord(err.object[i])) for i in range(start,end)]),end) |
| 49 | +import codecs |
| 50 | +codecs.register_error('backslashreplace_', handler) |
| 51 | +name_str = name_bytes.decode('ascii', 'backslashreplace_') |
| 52 | + )", global, local); |
| 53 | +#endif |
| 54 | + return local["name_str"]; |
| 55 | +} |
0 commit comments