diff --git a/README.md b/README.md index 0ce3c206ca9..e575cd9d25a 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,8 @@ MMLSpark (Spark-package): https://github.com/Azure/mmlspark ML.NET (.NET/C#-package): https://github.com/dotnet/machinelearning +LightGBM.NET (.NET/C#-package): https://github.com/rca22/LightGBM.Net + Dask-LightGBM (distributed and parallel Python-package): https://github.com/dask/dask-lightgbm Get Started and Documentation diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index 624aa14e8dc..53939b2a921 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -822,6 +822,37 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle, int64_t* out_len, double* out_result); +/*! +* \brief make prediction for an new data set +* Note: should pre-allocate memory for out_result, +* for noraml and raw score: its length is equal to num_class * num_data +* for leaf index, its length is equal to num_class * num_data * num_iteration +* \param handle handle +* \param data pointer to the data space +* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64 +* \param nrow number of rows +* \param ncol number columns +* \param predict_type +* C_API_PREDICT_NORMAL: normal prediction, with transform (if needed) +* C_API_PREDICT_RAW_SCORE: raw score +* C_API_PREDICT_LEAF_INDEX: leaf index +* \param num_iteration number of iteration for prediction, <= 0 means no limit +* \param parameter Other parameters for the parameters, e.g. early stopping for prediction. +* \param out_len len of output result +* \param out_result used to set a pointer to array, should allocate memory before call this function +* \return 0 when succeed, -1 when failure happens +*/ +LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMats(BoosterHandle handle, + const void** data, + int data_type, + int32_t nrow, + int32_t ncol, + int predict_type, + int num_iteration, + const char* parameter, + int64_t* out_len, + double* out_result); + /*! * \brief save model into file * \param handle handle diff --git a/src/c_api.cpp b/src/c_api.cpp index 200aab6566a..c30fdea2e75 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -389,6 +389,9 @@ RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_ std::function>(int row_idx)> RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major); +std::function>(int row_idx)> +RowPairFunctionFromDenseRows(const void** data, int num_col, int data_type); + std::function>(int idx)> RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices, const void* data, int data_type, int64_t nindptr, int64_t nelem); @@ -1416,6 +1419,30 @@ int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle, } +int LGBM_BoosterPredictForMats(BoosterHandle handle, + const void** data, + int data_type, + int32_t nrow, + int32_t ncol, + int predict_type, + int num_iteration, + const char* parameter, + int64_t* out_len, + double* out_result) { + API_BEGIN(); + auto param = Config::Str2Map(parameter); + Config config; + config.Set(param); + if (config.num_threads > 0) { + omp_set_num_threads(config.num_threads); + } + Booster* ref_booster = reinterpret_cast(handle); + auto get_row_fun = RowPairFunctionFromDenseRows(data, ncol, data_type); + ref_booster->Predict(num_iteration, predict_type, nrow, get_row_fun, + config, out_result, out_len); + API_END(); +} + int LGBM_BoosterSaveModel(BoosterHandle handle, int start_iteration, int num_iteration, @@ -1589,6 +1616,22 @@ RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int d return nullptr; } +// data is array of pointers to individual rows +std::function>(int row_idx)> +RowPairFunctionFromDenseRows(const void** data, int num_col, int data_type) { + return [=](int row_idx) { + auto inner_function = RowFunctionFromDenseMatric(data[row_idx], 1, num_col, data_type, /* is_row_major */ true); + auto raw_values = inner_function(0); + std::vector> ret; + for (int i = 0; i < static_cast(raw_values.size()); ++i) { + if (std::fabs(raw_values[i]) > kZeroThreshold || std::isnan(raw_values[i])) { + ret.emplace_back(i, raw_values[i]); + } + } + return ret; + }; +} + std::function>(int idx)> RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices, const void* data, int data_type, int64_t , int64_t ) { if (data_type == C_API_DTYPE_FLOAT32) {