From 0d59859c670b9de37bffa8a6e536497c88d9f25d Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Fri, 19 Jul 2019 01:16:50 +0800 Subject: [PATCH] throw error when meet non ascii (#2229) * throw error when meet non ascii * check ascii for config strings. --- include/LightGBM/dataset.h | 5 +++++ include/LightGBM/utils/common.h | 9 +++++++++ src/io/config.cpp | 3 +++ 3 files changed, 17 insertions(+) diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index 92cc201cc62..e688522fbb1 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -555,6 +556,10 @@ class Dataset { // replace ' ' in feature_names with '_' bool spaceInFeatureName = false; for (auto& feature_name : feature_names_) { + // check ascii + if (!Common::CheckASCII(feature_name)) { + Log::Fatal("Do not support non-ascii characters in feature name."); + } if (feature_name.find(' ') != std::string::npos) { spaceInFeatureName = true; std::replace(feature_name.begin(), feature_name.end(), ' ', '_'); diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index f42aa8295e3..8ad82245411 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -895,6 +895,15 @@ static T SafeLog(T x) { } } +inline bool CheckASCII(const std::string& s) { + for (auto c : s) { + if (static_cast(c) > 127) { + return false; + } + } + return true; +} + } // namespace Common } // namespace LightGBM diff --git a/src/io/config.cpp b/src/io/config.cpp index d0924ac0576..446b4971d92 100644 --- a/src/io/config.cpp +++ b/src/io/config.cpp @@ -17,6 +17,9 @@ void Config::KV2Map(std::unordered_map& params, const if (tmp_strs.size() == 2) { std::string key = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[0])); std::string value = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[1])); + if (!Common::CheckASCII(key) || !Common::CheckASCII(value)) { + Log::Fatal("Do not support non-ascii characters in config."); + } if (key.size() > 0) { auto value_search = params.find(key); if (value_search == params.end()) { // not set