diff --git a/.ci/ci.yml b/.ci/ci.yml index dd6357a32b1..ddf8e9a005a 100644 --- a/.ci/ci.yml +++ b/.ci/ci.yml @@ -104,6 +104,12 @@ jobs: CXX: "clang++" cmakeOptions: '$(commonSWIGCMakeFlags) -DINTERFACE_RUBY=ON' interfaceName: 'ruby' + coreml: + CC: "clang" + CXX: "clang++" + cmakeOptions: '$(commonSWIGCMakeFlags) -DINTERFACE_COREML=ON' + interfaceName: 'coreml' + variables: testRunTitle: '$(build.sourceBranchName)-debian' diff --git a/cmake/ShogunUtils.cmake b/cmake/ShogunUtils.cmake index 4fcfb533c53..39867997e16 100644 --- a/cmake/ShogunUtils.cmake +++ b/cmake/ShogunUtils.cmake @@ -315,3 +315,39 @@ function(ADD_SHOGUN_BENCHMARK REL_BENCHMARK_NAME) set_tests_properties(${BENCHMARK_NAME} PROPERTIES ${ARGN}) endif() endfunction() + +function(ADD_SHOGUN_UNITTEST) + set(options) + set(oneValueArgs TARGET) + set(multiValueArgs LABELS) + cmake_parse_arguments(ADD_SHOGUN_UNITTEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT ENABLE_TESTING) + return() + endif() + get_filename_component(UNITTEST_NAME ${ADD_SHOGUN_UNITTEST_TARGET} NAME_WE) + + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${ADD_SHOGUN_UNITTEST_TARGET}.cc) + # This benchmark has a corresponding .cc file, set it up as an executable. + add_executable(${UNITTEST_NAME} "${ADD_SHOGUN_UNITTEST_TARGET}.cc") + set_target_properties (${UNITTEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + set_target_properties (${UNITTEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/bin) + set_target_properties (${UNITTEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/bin) + target_link_libraries(${UNITTEST_NAME} ${SHOGUN_UNITTEST_LINK_LIBS}) + add_dependencies(${UNITTEST_NAME} GoogleMock shogun::shogun) + target_include_directories(${UNITTEST_NAME} PRIVATE ${source_dir}/googlemock/include ${source_dir}/googletest/include) + set(NO_COLOR "--color_print=false") + endif() + + add_test(${UNITTEST_NAME} ${CMAKE_BINARY_DIR}/bin/${UNITTEST_NAME} ${NO_COLOR}) + if (ADD_SHOGUN_UNITTEST_LABELS) + set_tests_properties(${UNITTEST_NAME} PROPERTIES LABELS ${ADD_SHOGUN_UNITTEST_LABELS}) + else () + set_tests_properties(${UNITTEST_NAME} PROPERTIES LABELS "unit") + endif() + + if(ARGN) + set_tests_properties(${UNITTEST_NAME} PROPERTIES ${ARGN}) + endif() +endfunction() + diff --git a/cmake/external/GoogleTestNMock.cmake b/cmake/external/GoogleTestNMock.cmake index 8bec01282f1..f5e6230b0de 100644 --- a/cmake/external/GoogleTestNMock.cmake +++ b/cmake/external/GoogleTestNMock.cmake @@ -1,44 +1,46 @@ -MergeCFLAGS() -include(ExternalProject) +IF (NOT TARGET GoogleMock) + MergeCFLAGS() + include(ExternalProject) -IF (MSVC) - SET (CUSTOM_CMAKE_ARGS -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock - -DCMAKE_LIBRARY_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock - -DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS}${CMAKE_DEFINITIONS} - -DCMAKE_CXX_FLAGS_RELEASE:STRING=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DISTRIBUTION:STRING=${CMAKE_CXX_FLAGS_DISTRIBUTION} - -DCMAKE_CXX_FLAGS_DEBUG:STRING=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER} - ) -ELSE () - SET(MERGED_CXX_FLAGS "${MERGED_CXX_FLAGS} -fPIC") - SET (CUSTOM_CMAKE_ARGS -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock - -DCMAKE_LIBRARY_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock - -DCMAKE_CXX_FLAGS:STRING=${MERGED_CXX_FLAGS}${CMAKE_DEFINITIONS} - -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER} - ) -ENDIF() + IF (MSVC) + SET (CUSTOM_CMAKE_ARGS -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock + -DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS}${CMAKE_DEFINITIONS} + -DCMAKE_CXX_FLAGS_RELEASE:STRING=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DISTRIBUTION:STRING=${CMAKE_CXX_FLAGS_DISTRIBUTION} + -DCMAKE_CXX_FLAGS_DEBUG:STRING=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER} + ) + ELSE () + SET(MERGED_CXX_FLAGS "${MERGED_CXX_FLAGS} -fPIC") + SET (CUSTOM_CMAKE_ARGS -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY:PATH=${THIRD_PARTY_DIR}/libs/gmock + -DCMAKE_CXX_FLAGS:STRING=${MERGED_CXX_FLAGS}${CMAKE_DEFINITIONS} + -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER} + ) + ENDIF() -IF(EXISTS /usr/src/googletest) - ExternalProject_Add( - GoogleMock - DOWNLOAD_COMMAND "" - SOURCE_DIR /usr/src/googletest - PREFIX ${CMAKE_BINARY_DIR}/GoogleMock - INSTALL_COMMAND "" - CMAKE_ARGS ${CUSTOM_CMAKE_ARGS} - ) -ELSE() - ExternalProject_Add( - GoogleMock - URL https://github.com/google/googletest/archive/release-1.8.1.tar.gz - URL_MD5 2e6fbeb6a91310a16efe181886c59596 - TIMEOUT 10 - PREFIX ${CMAKE_BINARY_DIR}/GoogleMock - DOWNLOAD_DIR ${THIRD_PARTY_DIR}/GoogleMock - INSTALL_COMMAND "" - CMAKE_ARGS ${CUSTOM_CMAKE_ARGS} - ) + IF(EXISTS /usr/src/googletest) + ExternalProject_Add( + GoogleMock + DOWNLOAD_COMMAND "" + SOURCE_DIR /usr/src/googletest + PREFIX ${CMAKE_BINARY_DIR}/GoogleMock + INSTALL_COMMAND "" + CMAKE_ARGS ${CUSTOM_CMAKE_ARGS} + ) + ELSE() + ExternalProject_Add( + GoogleMock + URL https://github.com/google/googletest/archive/release-1.8.1.tar.gz + URL_MD5 2e6fbeb6a91310a16efe181886c59596 + TIMEOUT 10 + PREFIX ${CMAKE_BINARY_DIR}/GoogleMock + DOWNLOAD_DIR ${THIRD_PARTY_DIR}/GoogleMock + INSTALL_COMMAND "" + CMAKE_ARGS ${CUSTOM_CMAKE_ARGS} + ) + ENDIF() ENDIF() diff --git a/src/interfaces/coreml/CMakeLists.txt b/src/interfaces/coreml/CMakeLists.txt index 3d0c0acc03d..77fb1050bb0 100644 --- a/src/interfaces/coreml/CMakeLists.txt +++ b/src/interfaces/coreml/CMakeLists.txt @@ -1,23 +1,56 @@ -IF (PROTOBUF_FOUND AND ENABLE_PROTOBUF) - FILE(GLOB COREML_SRC "${CMAKE_CURRENT_SOURCE_DIR}/*.proto") - FILE(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") - FOREACH (SRC_FILE ${COREML_SRC}) - GET_FILENAME_COMPONENT(SRC_FILE_WE ${SRC_FILE} NAME_WE) - SET(COMPILED_SRC "${CMAKE_CURRENT_BINARY_DIR}/coreml/${SRC_FILE_WE}") - SET(COMPILED_SRC_H "${COMPILED_SRC}.pb.h") - SET(COMPILED_SRC_CPP "${COMPILED_SRC}.pb.cpp") - ADD_CUSTOM_COMMAND( - OUTPUT "${COMPILED_SRC_H}" "${COMPILED_SRC_CPP}" - COMMAND "${PROTOBUF_PROTOC_EXECUTABLE}" - "${SRC_FILE}" - --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" - --proto_path "${CMAKE_CURRENT_SOURCE_DIR}" - COMMENT "Compiling CoreML Protobuf for ${SRC_FILE_WE}" - ) - SET(PROTO_TARGET_NAME "CoreML_${SRC_FILE_WE}") - ADD_CUSTOM_TARGET("${PROTO_TARGET_NAME}" DEPENDS "${COMPILED_SRC_H}" "${COMPILED_SRC_CPP}") - ADD_DEPENDENCIES(libshogun "${PROTO_TARGET_NAME}") - ENDFOREACH() -ELSE() - MESSAGE(FATAL_ERROR "Protobuf is required for CoreML") -ENDIF() +set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(shogun_coreml_version 0.1.0) + +add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/format") + +set(HEADER_FILES ShogunCoreML.h CoreMLModel.h) +add_library(shogun-coreml SHARED + CoreMLModel.cc + internal/CoreMLConverter.cc + internal/GLMRegressorConverter.cc + internal/GLMClassifierConverter.cc + internal/KernelConverter.cc + internal/SVMClassifierConverter.cc + internal/SVMRegressorConverter.cc) +set_property(TARGET shogun-coreml PROPERTY POSITION_INDEPENDENT_CODE ON) +set_target_properties( + shogun-coreml + PROPERTIES + SOVERSION 0 + VERSION "${shogun_coreml_version}" + PUBLIC_HEADER "${HEADER_FILES}" + ARCHIVE_OUTPUT_DIRECTORY "lib" + LIBRARY_OUTPUT_DIRECTORY "lib" +) +target_link_libraries(shogun-coreml PUBLIC shogun::shogun ${Protobuf_LITE_LIBRARIES}) +target_link_libraries(shogun-coreml PRIVATE coreml) +target_include_directories(shogun-coreml PUBLIC + $ + $ + $ +) +add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/internal") + +set(SHOGUN_COREML_LIB_INSTALL lib) +file(TO_CMAKE_PATH ${SHOGUN_COREML_LIB_INSTALL}/cmake/shogun-coreml CONFIG_PACKAGE_DIR) + +install( + TARGETS shogun-coreml + EXPORT ShogunCoreMLTarget + ARCHIVE DESTINATION ${SHOGUN_COREML_LIB_INSTALL} + RUNTIME DESTINATION ${SHOGUN_COREML_LIB_INSTALL} + LIBRARY DESTINATION ${SHOGUN_COREML_LIB_INSTALL} + INCLUDES DESTINATION include/shogun-coreml + PUBLIC_HEADER DESTINATION include/shogun-coreml +) + +export(EXPORT ShogunCoreMLTarget + FILE "${CMAKE_CURRENT_BINARY_DIR}/ShogunCoreMLTarget.cmake" + NAMESPACE shogun:: +) + +install(EXPORT ShogunCoreMLTarget + FILE ShogunCoreMLTarget.cmake + NAMESPACE shogun:: + DESTINATION ${CONFIG_PACKAGE_DIR} +) diff --git a/src/interfaces/coreml/CoreMLModel.cc b/src/interfaces/coreml/CoreMLModel.cc new file mode 100644 index 00000000000..4d007607ab5 --- /dev/null +++ b/src/interfaces/coreml/CoreMLModel.cc @@ -0,0 +1,53 @@ +#include "CoreMLModel.h" + +#include + +#include + +#include "format/Model.pb.h" + +using namespace shogun::coreml; + +CoreMLModel::CoreMLModel(): + m_spec(std::make_shared()) +{ +} + +CoreMLModel::CoreMLModel(std::shared_ptr spec): + m_spec(spec) +{ +} + +CoreMLModel::~CoreMLModel() +{ + m_spec.reset(); +} + +void CoreMLModel::save(const std::string& filename) const +{ + std::fstream out(filename, std::ios::binary | std::ios::out); + this->save(out); + out.close(); +} + +void CoreMLModel::save(std::ostream& out) const +{ + ::google::protobuf::io::OstreamOutputStream pb_out(&out); + if (!m_spec->SerializeToZeroCopyStream(&pb_out)) + throw std::runtime_error("could not save"); +} + +void CoreMLModel::set_short_description(const std::string& desc) +{ + m_spec->mutable_description()->mutable_metadata()->set_shortdescription(desc); +} + +void CoreMLModel::set_license(const std::string& license) +{ + m_spec->mutable_description()->mutable_metadata()->set_license(license); +} + +void CoreMLModel::set_author(const std::string& author) +{ + m_spec->mutable_description()->mutable_metadata()->set_author(author); +} diff --git a/src/interfaces/coreml/CoreMLModel.h b/src/interfaces/coreml/CoreMLModel.h new file mode 100644 index 00000000000..596ec38b318 --- /dev/null +++ b/src/interfaces/coreml/CoreMLModel.h @@ -0,0 +1,38 @@ +#ifndef __COREML_MODEL_H__ +#define __COREML_MODEL_H__ + +#include +#include +#include + +namespace CoreML +{ + namespace Specification + { + class Model; + } +} + +namespace shogun +{ + namespace coreml + { + class CoreMLModel + { + public: + CoreMLModel(); + CoreMLModel(std::shared_ptr); + ~CoreMLModel(); + + void save(const std::string& filename) const; + void save(std::ostream& out) const; + void set_short_description(const std::string& desc); + void set_license(const std::string& license); + void set_author(const std::string& author); + + private: + std::shared_ptr m_spec; + }; + } +} +#endif diff --git a/src/interfaces/coreml/ShogunCoreML.h b/src/interfaces/coreml/ShogunCoreML.h new file mode 100644 index 00000000000..9686eec118a --- /dev/null +++ b/src/interfaces/coreml/ShogunCoreML.h @@ -0,0 +1,16 @@ +#ifndef __SHOGUN_COREML_H__ +#define __SHOGUN_COREML_H__ + +#include + +#include "CoreMLModel.h" + +namespace shogun +{ + namespace coreml + { + std::shared_ptr convert(const CMachine* m) noexcept(false); + } +} + +#endif diff --git a/src/interfaces/coreml/ArrayFeatureExtractor.proto b/src/interfaces/coreml/format/ArrayFeatureExtractor.proto similarity index 100% rename from src/interfaces/coreml/ArrayFeatureExtractor.proto rename to src/interfaces/coreml/format/ArrayFeatureExtractor.proto diff --git a/src/interfaces/coreml/BayesianProbitRegressor.proto b/src/interfaces/coreml/format/BayesianProbitRegressor.proto similarity index 100% rename from src/interfaces/coreml/BayesianProbitRegressor.proto rename to src/interfaces/coreml/format/BayesianProbitRegressor.proto diff --git a/src/interfaces/coreml/format/CMakeLists.txt b/src/interfaces/coreml/format/CMakeLists.txt new file mode 100644 index 00000000000..65d0db606ad --- /dev/null +++ b/src/interfaces/coreml/format/CMakeLists.txt @@ -0,0 +1,25 @@ +IF (PROTOBUF_FOUND AND ENABLE_PROTOBUF) + FILE(GLOB COREML_SRC "${CMAKE_CURRENT_SOURCE_DIR}/*.proto") + FILE(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + SET(COREML_SOURCES) + FOREACH (SRC_FILE ${COREML_SRC}) + GET_FILENAME_COMPONENT(SRC_FILE_WE ${SRC_FILE} NAME_WE) + SET(COMPILED_SRC "${CMAKE_CURRENT_BINARY_DIR}/${SRC_FILE_WE}") + SET(COMPILED_SRC_H "${COMPILED_SRC}.pb.h") + SET(COMPILED_SRC_CPP "${COMPILED_SRC}.pb.cc") + ADD_CUSTOM_COMMAND( + OUTPUT "${COMPILED_SRC_H}" "${COMPILED_SRC_CPP}" + COMMAND "${PROTOBUF_PROTOC_EXECUTABLE}" + "${SRC_FILE}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + --proto_path "${CMAKE_CURRENT_SOURCE_DIR}" + COMMENT "Compiling CoreML Protobuf for ${SRC_FILE_WE}" + ) + SET(PROTO_TARGET_NAME "CoreML_${SRC_FILE_WE}") + ADD_CUSTOM_TARGET("${PROTO_TARGET_NAME}" DEPENDS "${COMPILED_SRC_H}" "${COMPILED_SRC_CPP}") + LIST(APPEND COREML_SOURCES ${COMPILED_SRC_CPP}) + ENDFOREACH() + add_library(coreml STATIC ${COREML_SOURCES}) +ELSE() + MESSAGE(FATAL_ERROR "Protobuf is required for CoreML") +ENDIF() \ No newline at end of file diff --git a/src/interfaces/coreml/CategoricalMapping.proto b/src/interfaces/coreml/format/CategoricalMapping.proto similarity index 100% rename from src/interfaces/coreml/CategoricalMapping.proto rename to src/interfaces/coreml/format/CategoricalMapping.proto diff --git a/src/interfaces/coreml/CustomModel.proto b/src/interfaces/coreml/format/CustomModel.proto similarity index 100% rename from src/interfaces/coreml/CustomModel.proto rename to src/interfaces/coreml/format/CustomModel.proto diff --git a/src/interfaces/coreml/DataStructures.proto b/src/interfaces/coreml/format/DataStructures.proto similarity index 100% rename from src/interfaces/coreml/DataStructures.proto rename to src/interfaces/coreml/format/DataStructures.proto diff --git a/src/interfaces/coreml/DictVectorizer.proto b/src/interfaces/coreml/format/DictVectorizer.proto similarity index 100% rename from src/interfaces/coreml/DictVectorizer.proto rename to src/interfaces/coreml/format/DictVectorizer.proto diff --git a/src/interfaces/coreml/FeatureTypes.proto b/src/interfaces/coreml/format/FeatureTypes.proto similarity index 100% rename from src/interfaces/coreml/FeatureTypes.proto rename to src/interfaces/coreml/format/FeatureTypes.proto diff --git a/src/interfaces/coreml/FeatureVectorizer.proto b/src/interfaces/coreml/format/FeatureVectorizer.proto similarity index 100% rename from src/interfaces/coreml/FeatureVectorizer.proto rename to src/interfaces/coreml/format/FeatureVectorizer.proto diff --git a/src/interfaces/coreml/GLMClassifier.proto b/src/interfaces/coreml/format/GLMClassifier.proto similarity index 100% rename from src/interfaces/coreml/GLMClassifier.proto rename to src/interfaces/coreml/format/GLMClassifier.proto diff --git a/src/interfaces/coreml/GLMRegressor.proto b/src/interfaces/coreml/format/GLMRegressor.proto similarity index 100% rename from src/interfaces/coreml/GLMRegressor.proto rename to src/interfaces/coreml/format/GLMRegressor.proto diff --git a/src/interfaces/coreml/Identity.proto b/src/interfaces/coreml/format/Identity.proto similarity index 100% rename from src/interfaces/coreml/Identity.proto rename to src/interfaces/coreml/format/Identity.proto diff --git a/src/interfaces/coreml/Imputer.proto b/src/interfaces/coreml/format/Imputer.proto similarity index 100% rename from src/interfaces/coreml/Imputer.proto rename to src/interfaces/coreml/format/Imputer.proto diff --git a/src/interfaces/coreml/LICENSE.txt b/src/interfaces/coreml/format/LICENSE.txt similarity index 100% rename from src/interfaces/coreml/LICENSE.txt rename to src/interfaces/coreml/format/LICENSE.txt diff --git a/src/interfaces/coreml/Model.proto b/src/interfaces/coreml/format/Model.proto similarity index 100% rename from src/interfaces/coreml/Model.proto rename to src/interfaces/coreml/format/Model.proto diff --git a/src/interfaces/coreml/NeuralNetwork.proto b/src/interfaces/coreml/format/NeuralNetwork.proto similarity index 100% rename from src/interfaces/coreml/NeuralNetwork.proto rename to src/interfaces/coreml/format/NeuralNetwork.proto diff --git a/src/interfaces/coreml/NonMaximumSuppression.proto b/src/interfaces/coreml/format/NonMaximumSuppression.proto similarity index 100% rename from src/interfaces/coreml/NonMaximumSuppression.proto rename to src/interfaces/coreml/format/NonMaximumSuppression.proto diff --git a/src/interfaces/coreml/Normalizer.proto b/src/interfaces/coreml/format/Normalizer.proto similarity index 100% rename from src/interfaces/coreml/Normalizer.proto rename to src/interfaces/coreml/format/Normalizer.proto diff --git a/src/interfaces/coreml/OneHotEncoder.proto b/src/interfaces/coreml/format/OneHotEncoder.proto similarity index 100% rename from src/interfaces/coreml/OneHotEncoder.proto rename to src/interfaces/coreml/format/OneHotEncoder.proto diff --git a/src/interfaces/coreml/README.rst b/src/interfaces/coreml/format/README.rst similarity index 100% rename from src/interfaces/coreml/README.rst rename to src/interfaces/coreml/format/README.rst diff --git a/src/interfaces/coreml/SVM.proto b/src/interfaces/coreml/format/SVM.proto similarity index 100% rename from src/interfaces/coreml/SVM.proto rename to src/interfaces/coreml/format/SVM.proto diff --git a/src/interfaces/coreml/Scaler.proto b/src/interfaces/coreml/format/Scaler.proto similarity index 100% rename from src/interfaces/coreml/Scaler.proto rename to src/interfaces/coreml/format/Scaler.proto diff --git a/src/interfaces/coreml/TextClassifier.proto b/src/interfaces/coreml/format/TextClassifier.proto similarity index 100% rename from src/interfaces/coreml/TextClassifier.proto rename to src/interfaces/coreml/format/TextClassifier.proto diff --git a/src/interfaces/coreml/TreeEnsemble.proto b/src/interfaces/coreml/format/TreeEnsemble.proto similarity index 100% rename from src/interfaces/coreml/TreeEnsemble.proto rename to src/interfaces/coreml/format/TreeEnsemble.proto diff --git a/src/interfaces/coreml/VisionFeaturePrint.proto b/src/interfaces/coreml/format/VisionFeaturePrint.proto similarity index 100% rename from src/interfaces/coreml/VisionFeaturePrint.proto rename to src/interfaces/coreml/format/VisionFeaturePrint.proto diff --git a/src/interfaces/coreml/WordTagger.proto b/src/interfaces/coreml/format/WordTagger.proto similarity index 100% rename from src/interfaces/coreml/WordTagger.proto rename to src/interfaces/coreml/format/WordTagger.proto diff --git a/src/interfaces/coreml/internal/CMakeLists.txt b/src/interfaces/coreml/internal/CMakeLists.txt new file mode 100644 index 00000000000..4d6a17455fd --- /dev/null +++ b/src/interfaces/coreml/internal/CMakeLists.txt @@ -0,0 +1,21 @@ +IF (ENABLE_TESTING) + include(external/GoogleTestNMock) + ExternalProject_Get_Property(GoogleMock source_dir) + LINK_DIRECTORIES(${THIRD_PARTY_DIR}/libs/gmock) + + enable_testing() + add_library(coreml_unittest_main ${CMAKE_CURRENT_SOURCE_DIR}/unittest_main.cc) + target_link_libraries(coreml_unittest_main gmock gtest shogun::shogun) + add_dependencies(coreml_unittest_main GoogleMock shogun::shogun) + target_include_directories(coreml_unittest_main + PRIVATE + ${source_dir}/googlemock/include + ${source_dir}/googletest/include) + set(SHOGUN_UNITTEST_LINK_LIBS coreml_unittest_main shogun::shogun shogun-coreml gmock gtest) + + ADD_SHOGUN_UNITTEST(TARGET SVMRegressorConverter_test LABELS coreml-unit) + ADD_SHOGUN_UNITTEST(TARGET SVMClassifierConverter_test LABELS coreml-unit) + ADD_SHOGUN_UNITTEST(TARGET KernelConverter_test LABELS coreml-unit) + ADD_SHOGUN_UNITTEST(TARGET GLMRegressorConverter_test LABELS coreml-unit) + ADD_SHOGUN_UNITTEST(TARGET GLMClassifierConverter_test LABELS coreml-unit) +ENDIF() diff --git a/src/interfaces/coreml/internal/CoreMLConverter.cc b/src/interfaces/coreml/internal/CoreMLConverter.cc new file mode 100644 index 00000000000..130d76fe70c --- /dev/null +++ b/src/interfaces/coreml/internal/CoreMLConverter.cc @@ -0,0 +1,21 @@ +#include "internal/CoreMLConverter.h" +#include "../ShogunCoreML.h" + +#include +#include + +using namespace shogun; +using namespace shogun::coreml; + +std::shared_ptr shogun::coreml::convert(const CMachine* m) noexcept(false) +{ + REQUIRE(m != NULL, "No machine has been provided") + + auto converter_registry = ConverterFactory::instance(); + if (!m->is_trained()) + throw NotFittedException("The supplied machine is not trained!"); + + auto spec = converter_registry->operator()(m)->description(); + spec->mutable_description()->mutable_metadata()->set_versionstring(Version::get_version_main()); + return std::make_shared(spec); +} diff --git a/src/interfaces/coreml/internal/CoreMLConverter.h b/src/interfaces/coreml/internal/CoreMLConverter.h new file mode 100644 index 00000000000..0eaf75438df --- /dev/null +++ b/src/interfaces/coreml/internal/CoreMLConverter.h @@ -0,0 +1,161 @@ +#ifndef __COREML_CONVERTER_H__ +#define __COREML_CONVERTER_H__ + +#include +#include +#include +#include + +#include + +#include "../CoreMLModel.h" +#include "format/Model.pb.h" + +namespace shogun +{ + namespace coreml + { + static constexpr int32_t SPECIFICATION_VERSION = 1; + + class ICoreMLConverter + { + public: + virtual std::shared_ptr description() const = 0; + }; + + template + class CoreMLConverter: public ICoreMLConverter + { + private: + static constexpr const char* kInputName = "input"; + static constexpr const char* kPredictionName = "prediction"; + static constexpr const char* kPredictionProbabilitiesName = "prediction"; + + public: + typedef I input_type; + typedef O output_type; + static const std::unordered_set supported_types; + + CoreMLConverter(const CMachine* m): + m_spec(std::make_shared()), + m_machine(m) + { + REQUIRE(m_machine != nullptr, "No machine has been provided") + } + + std::shared_ptr description() const override + { + return m_spec; + } + + O* convert(const I* m) + { + REQUIRE(m != nullptr, "No machine has been provided") + REQUIRE(supported_types.find(m->get_name()) != supported_types.end(), + "Exporting %s to CoreML format is not supported!", m->get_name()) + + auto spec = new O(); + try + { + convert(m, spec); + } + catch(const std::runtime_error& e) + { + delete spec; + throw; + } + return spec; + } + protected: + void convert(const I*, O*); + + virtual ::CoreML::Specification::FeatureType* input_feature_type() const = 0; + + virtual void set_model_interface() + { + auto description = m_spec->mutable_description(); + + // set input + auto input = description->add_input(); + input->set_name(kInputName); + input->set_allocated_type(input_feature_type()); + + // set output + auto output = description->add_output(); + output->set_name(kPredictionName); + + switch(m_machine->get_machine_problem_type()) + { + case PT_REGRESSION: + output->mutable_type()->mutable_doubletype(); + break; + case PT_BINARY: + case PT_MULTICLASS: + output->mutable_type()->mutable_int64type(); + break; + default: + std::stringstream ss; + ss << "Unsupported problem type: " << m_machine->get_machine_problem_type() << "!" << std::endl; + throw std::runtime_error(ss.str()); + } + description->set_predictedfeaturename(kPredictionName); + + //FIXME + //description->set_predictedprobabilitiesname(kPredictionProbabilitiesName); + } + + std::shared_ptr m_spec; + const CMachine* m_machine; + }; + + class ConverterFactory + { + typedef std::function(const CMachine* m)> ConverterFactoryFunction; + public: + + auto size() const + { + return m_registry.size(); + } + + auto register_converter(const std::string& machine_name, ConverterFactoryFunction f) + { + return m_registry.emplace(std::make_pair(machine_name, f)).second; + } + + std::shared_ptr operator()(const CMachine* m) + { + std::string machine_name(m->get_name()); + auto f = m_registry.find(machine_name); + if (f == m_registry.end()) + throw std::runtime_error("The provided machine cannot be converted to CoreML format!"); + return f->second(m); + } + + static ConverterFactory* instance() + { + static ConverterFactory* f = new ConverterFactory(); + return f; + } + + private: + std::unordered_map m_registry; + }; + +#define REGISTER_COREML_CONVERTER4(factory, classname, machines, function) \ + static int register_converter##classname = []() { \ + for (auto m: machines) \ + factory->register_converter(m, function); \ + return factory->size(); \ + }(); + +#define REGISTER_COREML_CONVERTER3(factory, classname, machines) \ + REGISTER_COREML_CONVERTER4(factory, classname, machines, [](const CMachine* m) { return std::make_shared(m); }) + +#define REGISTER_CONVERTER(classname, __VA_ARGS__) \ + VARARG(REGISTER_COREML_CONVERTER, ConverterFactory::instance(), classname, __VA_ARGS__) + + } +} + +#endif diff --git a/src/interfaces/coreml/internal/GLMClassifierConverter.cc b/src/interfaces/coreml/internal/GLMClassifierConverter.cc new file mode 100644 index 00000000000..3557ae4bf63 --- /dev/null +++ b/src/interfaces/coreml/internal/GLMClassifierConverter.cc @@ -0,0 +1,111 @@ +#include "GLMClassifierConverter.h" + +#include "format/FeatureTypes.pb.h" +#include "format/GLMClassifier.pb.h" + +#include + +using namespace shogun; +using namespace shogun::coreml; + +using namespace CoreML; + +template<> +const std::unordered_set GLMClassifierConverterType::supported_types + = {"SVMOcas", "AveragedPerceptron", "LDA", "Perceptron", "NewtonSVM", "LibLinear", "SGDQN"}; + +GLMClassifierConverter::GLMClassifierConverter(const CMachine* m): GLMClassifierConverterType(m) +{ + m_spec->set_specificationversion(SPECIFICATION_VERSION); + m_spec->set_allocated_glmclassifier(CoreMLConverter::convert(static_cast(m))); + set_model_interface(); +} + +template<> +void CoreMLConverter::convert(const CLinearMachine* lm, Specification::GLMClassifier* spec) +{ + REQUIRE(lm != NULL, "No machine has been provided") + REQUIRE(spec != NULL, "No CoreML specification has been provided") + + // set weights + auto w = lm->get_w(); + auto w_spec = spec->add_weights(); + for (auto v: w) + w_spec->add_value(v); + + // set offset + spec->add_offset(lm->get_bias()); + + // FIXME: set post evalution transform + spec->set_postevaluationtransform(Specification::GLMClassifier::Logit); + + // set labels + spec->mutable_int64classlabels()->add_vector(-1); + spec->mutable_int64classlabels()->add_vector(1); + + // encoding + spec->set_classencoding(Specification::GLMClassifier_ClassEncoding::GLMClassifier_ClassEncoding_ReferenceClass); +} + +Specification::FeatureType* GLMClassifierConverter::input_feature_type() const +{ + auto input_feature_type = new Specification::FeatureType(); + input_feature_type->mutable_multiarraytype()->add_shape(static_cast(m_machine)->get_w().vlen); + input_feature_type->mutable_multiarraytype()->set_datatype(Specification::ArrayFeatureType_ArrayDataType::ArrayFeatureType_ArrayDataType_DOUBLE); + return input_feature_type; +} + +template<> +const std::unordered_set MulticlassGLMClassifierConverterType::supported_types + = {"MulticlassLibLinear", "MulticlassOCAS", "MulticlassLogisticRegression"}; + +MulticlassGLMClassifierConverter::MulticlassGLMClassifierConverter(const CMachine* m): MulticlassGLMClassifierConverterType(m) +{ + m_spec->set_specificationversion(SPECIFICATION_VERSION); + m_spec->set_allocated_glmclassifier(CoreMLConverter::convert(static_cast(m))); + set_model_interface(); +} + +template<> +void MulticlassGLMClassifierConverterType::convert(const CLinearMulticlassMachine* mc, Specification::GLMClassifier* spec) +{ + auto strategy = mc->get_multiclass_strategy(); + if (std::string(strategy->get_name()) != "MulticlassOneVsRestStrategy") + throw std::runtime_error("Unsupported multiclass strategy!"); + else + spec->set_classencoding(CoreML::Specification::GLMClassifier_ClassEncoding::GLMClassifier_ClassEncoding_OneVsRest); + + auto num_classes = strategy->get_num_classes(); + SG_UNREF(strategy); + for (auto i = 0; i < num_classes; ++i) + { + // set labels + spec->mutable_int64classlabels()->add_vector(i); + + // set weights + auto cur_machine = mc->get_machine(i)->as(); + auto w = cur_machine->get_w(); + auto w_spec = spec->add_weights(); + for (auto v: w) + w_spec->add_value(v); + + // set offset + spec->add_offset(cur_machine->get_bias()); + } + + // FIXME: set post evalution transform + spec->set_postevaluationtransform(Specification::GLMClassifier::Logit); +} + +Specification::FeatureType* MulticlassGLMClassifierConverter::input_feature_type() const +{ + auto input_feature_type = new Specification::FeatureType(); + auto f = static_cast(m_machine)->get_features(); + input_feature_type->mutable_multiarraytype()->add_shape(f->get_dim_feature_space()); + input_feature_type->mutable_multiarraytype()->set_datatype(Specification::ArrayFeatureType_ArrayDataType::ArrayFeatureType_ArrayDataType_DOUBLE); + SG_UNREF(f); + return input_feature_type; +} + +REGISTER_CONVERTER(GLMClassifierConverter, GLMClassifierConverterType::supported_types) +REGISTER_CONVERTER(MulticlassGLMClassifierConverter, MulticlassGLMClassifierConverterType::supported_types) diff --git a/src/interfaces/coreml/internal/GLMClassifierConverter.h b/src/interfaces/coreml/internal/GLMClassifierConverter.h new file mode 100644 index 00000000000..e65ed2424ab --- /dev/null +++ b/src/interfaces/coreml/internal/GLMClassifierConverter.h @@ -0,0 +1,35 @@ +#ifndef __GLM_CLASSIFIER_CONVERTER_H__ +#define __GLM_CLASSIFIER_CONVERTER_H__ + +#include "CoreMLConverter.h" + +#include +#include + +namespace shogun +{ + namespace coreml + { + using GLMClassifierConverterType = CoreMLConverter; + class GLMClassifierConverter: public GLMClassifierConverterType + { + public: + explicit GLMClassifierConverter(const CMachine* m); + virtual ~GLMClassifierConverter() = default; + protected: + ::CoreML::Specification::FeatureType* input_feature_type() const override; + }; + + using MulticlassGLMClassifierConverterType = CoreMLConverter; + class MulticlassGLMClassifierConverter: public MulticlassGLMClassifierConverterType + { + public: + explicit MulticlassGLMClassifierConverter(const CMachine* m); + virtual ~MulticlassGLMClassifierConverter() = default; + protected: + ::CoreML::Specification::FeatureType* input_feature_type() const override; + }; + } +} + +#endif diff --git a/src/interfaces/coreml/internal/GLMClassifierConverter_test.cc b/src/interfaces/coreml/internal/GLMClassifierConverter_test.cc new file mode 100644 index 00000000000..b97b10ea93a --- /dev/null +++ b/src/interfaces/coreml/internal/GLMClassifierConverter_test.cc @@ -0,0 +1,95 @@ +#include +#include "GLMClassifierConverter.h" +#include "ShogunCoreML.h" + +#include "format/Model.pb.h" +#include "format/GLMClassifier.pb.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../tests/unit/environments/LinearTestEnvironment.h" +#include "../../tests/unit/environments/MultiLabelTestEnvironment.h" + +using namespace shogun; +using namespace shogun::coreml; + +extern LinearTestEnvironment* linear_test_env; +extern MultiLabelTestEnvironment* multilabel_test_env; + +template +class GLMClassifier : public ::testing::Test {}; + +using GLMClassifierTypes = ::testing::Types; +TYPED_TEST_CASE(GLMClassifier, GLMClassifierTypes); + +TYPED_TEST(GLMClassifier, convert) +{ + auto mockData = linear_test_env->getBinaryLabelData(); + auto train_feats = mockData->get_features_train(); + auto train_labels = mockData->get_labels_train(); + + auto m = some(); + m->put("labels", train_labels); + m->train(train_feats); + + auto x = m->apply_binary(mockData->get_features_test()); + mockData->get_features_test()->get_feature_matrix().display_matrix(); + x->get_labels().display_vector(); + + auto converter = std::make_shared(m); + auto machine_spec = converter->description(); + auto spec = machine_spec->glmclassifier(); + + ASSERT_EQ(1, spec.weights_size()); + auto w = m->get_w(); + auto w_spec = spec.weights(0); + ASSERT_EQ(w.vlen, w_spec.value_size()); + int ctr = 0; + for (auto v: w) + ASSERT_EQ(v, w_spec.value(ctr++)); + + ASSERT_EQ(1, spec.offset_size()); + ASSERT_EQ(m->get_bias(), spec.offset(0)); + + auto out = convert(m); + out->save(m->get_name()); +} + +template +class GLMClassifierMulticlass : public ::testing::Test {}; + +using GLMClassifierMulticlassTypes = ::testing::Types ; +TYPED_TEST_CASE(GLMClassifierMulticlass, GLMClassifierMulticlassTypes); + +TYPED_TEST(GLMClassifierMulticlass, convert) +{ + auto mockData = multilabel_test_env->getMulticlassFixture(); + + auto train_feats = mockData->get_features_train(); + auto train_labels = mockData->get_labels_train(); + + auto m = some(); + m->put("labels", train_labels); + m->train(train_feats); + + auto x = m->apply_multiclass(mockData->get_features_test()); + mockData->get_features_test()->get_feature_matrix().display_matrix(); + x->get_labels().display_vector(); + + auto converter = std::make_shared(m); + auto machine_spec = converter->description(); + + auto out = convert(m); + out->save(m->get_name()); + +// ASSERT_EQ(m->, spec->weights_size()); +} diff --git a/src/interfaces/coreml/internal/GLMRegressorConverter.cc b/src/interfaces/coreml/internal/GLMRegressorConverter.cc new file mode 100644 index 00000000000..5e0ca18f785 --- /dev/null +++ b/src/interfaces/coreml/internal/GLMRegressorConverter.cc @@ -0,0 +1,51 @@ +#include "GLMRegressorConverter.h" + +#include "format/GLMRegressor.pb.h" + +using namespace shogun; +using namespace shogun::coreml; + +using namespace CoreML; + +template<> +const std::unordered_set GLMRegressorConverterType::supported_types = {"LibLinearRegression", "LeastAngleRegression", "LeastSquaresRegression", "LinearRidgeRegression"}; + +GLMRegressorConverter::GLMRegressorConverter(const CMachine* m): GLMRegressorConverterType(m) +{ + set_model_interface(); + + m_spec->set_specificationversion(SPECIFICATION_VERSION); + m_spec->set_allocated_glmregressor(CoreMLConverter::convert(static_cast(m))); +} + +template<> +void GLMRegressorConverterType::convert(const CLinearMachine* lr, Specification::GLMRegressor* spec) +{ + REQUIRE(lr != NULL, "No machine has been provided") + REQUIRE(spec != NULL, "No CoreML specification has been provided") + + // set weights + auto w = lr->get_w(); + auto w_spec = spec->add_weights(); + for (auto v: w) + w_spec->add_value(v); + + // set offset + spec->add_offset(lr->get_bias()); + + // set post evalution transform + spec->set_postevaluationtransform(Specification::GLMRegressor::NoTransform); + // other possible values: + // Specification::GLMRegressor::Logit + // Specification::GLMRegressor::Probit +} + +Specification::FeatureType* GLMRegressorConverter::input_feature_type() const +{ + auto input_feature_type = new Specification::FeatureType(); + input_feature_type->mutable_multiarraytype()->add_shape(static_cast(m_machine)->get_w().vlen); + input_feature_type->mutable_multiarraytype()->set_datatype(Specification::ArrayFeatureType_ArrayDataType::ArrayFeatureType_ArrayDataType_DOUBLE); + return input_feature_type; +} + +REGISTER_CONVERTER(GLMRegressorConverter, GLMRegressorConverterType::supported_types) diff --git a/src/interfaces/coreml/internal/GLMRegressorConverter.h b/src/interfaces/coreml/internal/GLMRegressorConverter.h new file mode 100644 index 00000000000..fcad850eb97 --- /dev/null +++ b/src/interfaces/coreml/internal/GLMRegressorConverter.h @@ -0,0 +1,25 @@ +#ifndef __GLM_REGRESSOR_CONVERTER_H__ +#define __GLM_REGRESSOR_CONVERTER_H__ + +#include "CoreMLConverter.h" + +#include + +namespace shogun +{ + namespace coreml + { + using GLMRegressorConverterType = CoreMLConverter; + class GLMRegressorConverter: public GLMRegressorConverterType + { + public: + explicit GLMRegressorConverter(const CMachine* m); + virtual ~GLMRegressorConverter() = default; + protected: + //::CoreML::Specification::FeatureType* input_feature_type() const override; + virtual ::CoreML::Specification::FeatureType* input_feature_type() const override; + }; + } +} + +#endif diff --git a/src/interfaces/coreml/internal/GLMRegressorConverter_test.cc b/src/interfaces/coreml/internal/GLMRegressorConverter_test.cc new file mode 100644 index 00000000000..177545b73e4 --- /dev/null +++ b/src/interfaces/coreml/internal/GLMRegressorConverter_test.cc @@ -0,0 +1,62 @@ +#include +#include "internal/GLMRegressorConverter.h" + +#include "ShogunCoreML.h" + +#include "format/GLMRegressor.pb.h" +#include "format/Model.pb.h" + +#include +#include +#include +#include + +#include "../../tests/unit/environments/LinearTestEnvironment.h" + +using namespace shogun; +using namespace shogun::coreml; + +extern LinearTestEnvironment* linear_test_env; + +template +class GLMRegressorTest : public ::testing::Test {}; + +using GLMRegressorTypes = ::testing::Types; +TYPED_TEST_CASE(GLMRegressorTest, GLMRegressorTypes); + +TYPED_TEST(GLMRegressorTest, convert) +{ + auto mock_data = linear_test_env->get_one_dimensional_regression_data(true); + auto labels_train = (CLabels*) mock_data->get_labels_train(); + auto features_train = mock_data->get_features_train(); + + auto m = some(); + m->put("labels", labels_train); + m->train(features_train); + + auto x = m->apply_regression(mock_data->get_features_test()); + mock_data->get_features_test()->get_feature_matrix().display_matrix(); + x->get_labels().display_vector(); + + auto converter = std::make_shared(m); + auto machine_spec = converter->description(); + auto spec = machine_spec->glmregressor(); + + ASSERT_EQ(1, spec.weights_size()); + auto w = m->get_w(); + auto w_spec = spec.weights(0); + ASSERT_EQ(w.vlen, w_spec.value_size()); + int ctr = 0; + for (auto v: w) + ASSERT_EQ(v, w_spec.value(ctr++)); + + ASSERT_EQ(1, spec.offset_size()); + ASSERT_EQ(m->get_bias(), spec.offset(0)); + + ASSERT_EQ( + CoreML::Specification::GLMRegressor_PostEvaluationTransform::GLMRegressor_PostEvaluationTransform_NoTransform, + spec.postevaluationtransform()); + + auto out = convert(m); + out->save(m->get_name()); +} diff --git a/src/interfaces/coreml/internal/KernelConverter.cc b/src/interfaces/coreml/internal/KernelConverter.cc new file mode 100644 index 00000000000..65aba8c4307 --- /dev/null +++ b/src/interfaces/coreml/internal/KernelConverter.cc @@ -0,0 +1,83 @@ +#include "internal/KernelConverter.h" + +#include "format/SVM.pb.h" + +#include +#include +#include +#include + +using namespace shogun; +using namespace shogun::coreml; +using namespace CoreML; + +template +struct converter +{ + static auto convert(const I* k); + static auto convert(const CKernel* k) + { + return convert(k->as()); + } +}; + +template<> +auto converter::convert(const CGaussianKernel *k) +{ + auto rbf = new Specification::RBFKernel(); + rbf->set_gamma(k->get_width()); + return rbf; +} + +template<> +auto converter::convert(const CSigmoidKernel *k) +{ + auto sigmoid_kernel = new Specification::SigmoidKernel(); + sigmoid_kernel->set_gamma(k->get("gamma")); + sigmoid_kernel->set_c(k->get("coef0")); + return sigmoid_kernel; +} + +template<> +auto converter::convert(const CPolyKernel *k) +{ + auto poly_kernel = new Specification::PolyKernel(); + poly_kernel->set_degree(k->get("degree")); + poly_kernel->set_c(k->get("c")); + poly_kernel->set_gamma(k->get("gamma")); + return poly_kernel; +} + +Specification::Kernel* KernelConverter::convert(const CKernel *k) +{ + auto kernel_normalizer = k->get_normalizer(); + REQUIRE(kernel_normalizer == NULL || std::string(kernel_normalizer->get_name()) == "IdentityKernelNormalizer", + "The kernel has a normaliser set that cannot be converted to CoreML format!") + + auto kernel_spec = new Specification::Kernel(); + auto kernel_type = std::string(k->get_name()); + + if (kernel_type == "LinearKernel") + { + kernel_spec->set_allocated_linearkernel(new Specification::LinearKernel()); + } + else if (kernel_type == "GaussianKernel") + { + kernel_spec->set_allocated_rbfkernel(converter::convert(k)); + } + else if (kernel_type == "SigmoidKernel") + { + kernel_spec->set_allocated_sigmoidkernel(converter::convert(k)); + } + else if (kernel_type == "PolyKernel") + { + kernel_spec->set_allocated_polykernel(converter::convert(k)); + } + else + { + delete kernel_spec; + throw std::runtime_error("Kernel type is not supported in CoreML"); + } + + return kernel_spec; +}; diff --git a/src/interfaces/coreml/internal/KernelConverter.h b/src/interfaces/coreml/internal/KernelConverter.h new file mode 100644 index 00000000000..71433013061 --- /dev/null +++ b/src/interfaces/coreml/internal/KernelConverter.h @@ -0,0 +1,27 @@ +#ifndef __KERNEL_CONVERTER_H__ +#define __KERNEL_CONVERTER_H__ + +#include "internal/CoreMLConverter.h" + +#include + +namespace CoreML +{ + namespace Specification + { + class Kernel; + } +} + +namespace shogun +{ + namespace coreml + { + struct KernelConverter + { + static CoreML::Specification::Kernel* convert(const CKernel* k); + }; + } +} + +#endif diff --git a/src/interfaces/coreml/internal/KernelConverter_test.cc b/src/interfaces/coreml/internal/KernelConverter_test.cc new file mode 100644 index 00000000000..be7ad15591b --- /dev/null +++ b/src/interfaces/coreml/internal/KernelConverter_test.cc @@ -0,0 +1,57 @@ +#include +#include "internal/KernelConverter.h" + +#include +#include +#include +#include +#include + +#include "format/SVM.pb.h" + +using namespace shogun; +using namespace shogun::coreml; + +TEST(LinearKernel, convert) +{ + auto lk = some(); + auto descr = KernelConverter::convert(lk.get()); +} + +TEST(GaussianKernel, convert) +{ + auto k = some(); + auto descr = KernelConverter::convert(k.get()); + + ASSERT_TRUE(descr->has_rbfkernel()); + + auto rbf = descr->rbfkernel(); + ASSERT_EQ(k->get_width(), rbf.gamma()); +} + +TEST(PolyKernel, convert) +{ + auto k = std::make_shared(10, 3, 2.0, 2.2); + k->set_normalizer(new CIdentityKernelNormalizer()); + auto descr = KernelConverter::convert(k.get()); + + ASSERT_TRUE(descr->has_polykernel()); + + auto pk = descr->polykernel(); + ASSERT_EQ(k->get("gamma"), pk.gamma()); + ASSERT_EQ(k->get("c"), pk.c()); + ASSERT_EQ(k->get("degree"), pk.degree()); +} + +TEST(SigmoidKernel, convert) +{ + auto k = std::make_shared(10, 55.5, 2.3); + auto descr = KernelConverter::convert(k.get()); + + ASSERT_TRUE(descr->has_sigmoidkernel()); + + auto sk = descr->sigmoidkernel(); + ASSERT_EQ(k->get("gamma"), sk.gamma()); + ASSERT_EQ(k->get("coef0"), sk.c()); + +} diff --git a/src/interfaces/coreml/internal/SVMClassifierConverter.cc b/src/interfaces/coreml/internal/SVMClassifierConverter.cc new file mode 100644 index 00000000000..6acf82c62dc --- /dev/null +++ b/src/interfaces/coreml/internal/SVMClassifierConverter.cc @@ -0,0 +1,106 @@ +#include "internal/SVMClassifierConverter.h" +#include "internal/KernelConverter.h" +#include "internal/SVMConverter.h" + +#include "format/SVM.pb.h" + +#include +#include +#include + +#include + +using namespace shogun; +using namespace shogun::coreml; +using namespace CoreML; + +template<> +const std::unordered_set SVMClassifierConverterType::supported_types + = {"SVMLightOneClass", "SVMLight", "MPDSVM", "LibSVMOneClass", "LibSVM", "GNPPSVM"}; + + +SVMClassifierConverter::SVMClassifierConverter(const CMachine* m): + SVMConverter(m) +{ + m_spec->set_specificationversion(SPECIFICATION_VERSION); + m_spec->set_allocated_supportvectorclassifier(CoreMLConverter::convert(static_cast(m))); +} + +static void convert_csvm_classifiers(std::vector& ms, CoreML::Specification::SupportVectorClassifier* spec) +{ + for (int i = 0; i < ms.size(); ++i) + { + // num of support vectors per class + spec->add_numberofsupportvectorsperclass(ms[i]->get_num_support_vectors()); + + // set coefficients + auto coeffs = ms[i]->get_alphas(); + auto coeffs_spec = spec->add_coefficients(); + for (auto c: coeffs) + coeffs_spec->add_alpha(c); + + // set bias + spec->add_rho(ms[i]->get_bias()); + + // set support vectors + set_support_vectors(ms[i], spec); + } + + // set labels + if (ms.size() == 1) + { + spec->mutable_int64classlabels()->add_vector(-1); + spec->mutable_int64classlabels()->add_vector(1); + } + else + { + for (int i = 0; i < ms.size(); ++i) + spec->mutable_int64classlabels()->add_vector(i); + } +} + +template<> +void SVMClassifierConverterType::convert(const CSVM* svm, CoreML::Specification::SupportVectorClassifier* spec) +{ + std::vector ms {svm}; + + // set kernel + auto kernel = svm->get("kernel"); + spec->set_allocated_kernel(KernelConverter::convert(kernel)); + + convert_csvm_classifiers(ms, spec); +} + +REGISTER_CONVERTER(SVMClassifierConverter, SVMClassifierConverterType::supported_types) + +template<> +const std::unordered_set MulticlassSVMClassifierConverterType::supported_types + = {"MulticlassLibSVM", "GMNPSVM"}; + +MulticlassSVMClassifierConverter::MulticlassSVMClassifierConverter(const CMachine* m): + SVMConverter(m) +{ + m_spec->set_specificationversion(SPECIFICATION_VERSION); + m_spec->set_allocated_supportvectorclassifier(CoreMLConverter::convert(static_cast(m))); +} + +template<> +void MulticlassSVMClassifierConverterType::convert(const CMulticlassSVM* svm, CoreML::Specification::SupportVectorClassifier* spec) +{ + std::vector ms; + auto strategy = svm->get_multiclass_strategy(); + auto num_classes = strategy->get_num_classes(); + SG_UNREF(strategy); + + //get_num_machines() + for (auto i = 0; i < num_classes; ++i) + ms.push_back(svm->get_svm(i)); + + // set kernel + auto kernel = svm->get("kernel"); + spec->set_allocated_kernel(KernelConverter::convert(kernel)); + + convert_csvm_classifiers(ms, spec); +} + +REGISTER_CONVERTER(MulticlassSVMClassifierConverter, MulticlassSVMClassifierConverterType::supported_types) diff --git a/src/interfaces/coreml/internal/SVMClassifierConverter.h b/src/interfaces/coreml/internal/SVMClassifierConverter.h new file mode 100644 index 00000000000..0e42bb5138f --- /dev/null +++ b/src/interfaces/coreml/internal/SVMClassifierConverter.h @@ -0,0 +1,32 @@ +#ifndef __SVM_CLASSIFIER_CONVERTER_H__ +#define __SVM_CLASSIFIER_CONVERTER_H__ + +#include "internal/SVMConverter.h" + +#include +#include + +namespace shogun +{ + namespace coreml + { + using SVMClassifierConverterType = CoreMLConverter; + class SVMClassifierConverter: public SVMConverter + { + public: + explicit SVMClassifierConverter(const CMachine* m); + virtual ~SVMClassifierConverter() = default; + }; + + using MulticlassSVMClassifierConverterType = CoreMLConverter; + class MulticlassSVMClassifierConverter: public SVMConverter + { + public: + explicit MulticlassSVMClassifierConverter(const CMachine* m); + virtual ~MulticlassSVMClassifierConverter() = default; + }; + + } +} + +#endif diff --git a/src/interfaces/coreml/internal/SVMClassifierConverter_test.cc b/src/interfaces/coreml/internal/SVMClassifierConverter_test.cc new file mode 100644 index 00000000000..53ee591d290 --- /dev/null +++ b/src/interfaces/coreml/internal/SVMClassifierConverter_test.cc @@ -0,0 +1,126 @@ +#include +#include "internal/SVMClassifierConverter.h" +#include "ShogunCoreML.h" + +#include "format/SVM.pb.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../../tests/unit/environments/LinearTestEnvironment.h" +#include "../../tests/unit/environments/MultiLabelTestEnvironment.h" + +using namespace shogun; +using namespace shogun::coreml; + +extern LinearTestEnvironment* linear_test_env; +extern MultiLabelTestEnvironment* multilabel_test_env; + +template +class SVMClassifier : public ::testing::Test {}; +using SVMClassifierTypes = ::testing::Types; +TYPED_TEST_CASE(SVMClassifier, SVMClassifierTypes); + +TYPED_TEST(SVMClassifier, convert) +{ + auto mockData = linear_test_env->getBinaryLabelData(); + auto train_feats = mockData->get_features_train(); + auto train_labels = mockData->get_labels_train(); + + // TODO: make kernel a parameter? + auto k = some(10); + k->init(train_feats, train_feats); + auto m = some(); + m->put("kernel", (CKernel*) k.get()); + m->put("labels", train_labels); + m->train(); + + auto x = m->apply_binary(mockData->get_features_test()); + mockData->get_features_test()->get_feature_matrix().display_matrix(); + x->get_labels().display_vector(); + + auto converter = std::make_shared(m); + auto machine_spec = converter->description(); + auto spec = machine_spec->supportvectorclassifier(); + + // check kernel + ASSERT_TRUE(spec.has_kernel()); + auto kernel_spec = spec.kernel(); + ASSERT_TRUE(kernel_spec.has_rbfkernel()); + + // check rho + auto rhos = spec.rho(); + ASSERT_EQ(1, rhos.size()); + ASSERT_EQ(m->get_bias(), rhos[0]); + + // check coeffs + auto coeffs = m->get_alphas(); + auto coeffs_spec = spec.coefficients(); + ASSERT_EQ(1, coeffs_spec.size()); + ASSERT_EQ(coeffs.size(), coeffs_spec[0].alpha_size()); + + int ctr = 0; + for (auto c: coeffs) + ASSERT_EQ(c, coeffs_spec[0].alpha(ctr++)); +/* + // check support vectors + ASSERT_TRUE(spec.has_densesupportvectors()); + auto svs_idx = m->get_support_vectors(); + auto svs_spec = spec.densesupportvectors(); + + ASSERT_EQ(svs_idx.vlen, svs_spec.vectors_size()); + ctr = 0; + for (auto idx: svs_idx) + { + auto sv_spec = svs_spec.vectors(ctr++); + auto sv = features_train->get_feature_vector(idx); + ASSERT_EQ(sv.vlen, sv_spec.values_size()); + + int j = 0; + for (auto v: sv) + ASSERT_EQ(v, sv_spec.values(j++)); + } + +*/ + auto out = convert(m); + out->save(m->get_name()); +} + +template +class SVMClassifierMulticlass : public ::testing::Test {}; +using SVMClassifierMulticlassTypes = ::testing::Types; +TYPED_TEST_CASE(SVMClassifierMulticlass, SVMClassifierMulticlassTypes); +TYPED_TEST(SVMClassifierMulticlass, convert) +{ + auto mockData = multilabel_test_env->getMulticlassFixture(); + + auto train_feats = mockData->get_features_train(); + auto train_labels = mockData->get_labels_train(); + + // TODO: make kernel a parameter? + auto k = some(10); + k->init(train_feats, train_feats); + auto m = some(); + m->put("kernel", (CKernel*) k.get()); + m->put("labels", train_labels); + m->put("C", 1.0); + m->train(); + + auto x = m->apply_multiclass(mockData->get_features_test()); + mockData->get_features_test()->get_feature_matrix().display_matrix(); + x->get_labels().display_vector(); + + //auto converter = std::make_shared(m); + //auto machine_spec = converter->description(); + auto out = convert(m); + out->save(m->get_name()); + +// ASSERT_EQ(m->, spec->weights_size()); +} \ No newline at end of file diff --git a/src/interfaces/coreml/internal/SVMConverter.h b/src/interfaces/coreml/internal/SVMConverter.h new file mode 100644 index 00000000000..7f1a7980498 --- /dev/null +++ b/src/interfaces/coreml/internal/SVMConverter.h @@ -0,0 +1,140 @@ +#ifndef __SVM_CONVERTER_H__ +#define __SVM_CONVERTER_H__ + +#include "CoreMLConverter.h" +#include "format/FeatureTypes.pb.h" + +#include +#include +#include + +namespace shogun +{ + namespace coreml + { + template + class SVMConverter: public CoreMLConverter + { + public: + explicit SVMConverter(const CMachine* m): CoreMLConverter(m) + { + this->set_model_interface(); + } + + protected: + ::CoreML::Specification::FeatureType* input_feature_type() const override + { + auto input_feature_type = new ::CoreML::Specification::FeatureType(); + auto m = static_cast(this->m_machine); + auto kernel = m->get_kernel(); + if (kernel == nullptr) + throw std::runtime_error("Machine has no kernel set!"); + + auto lhs = kernel->get_lhs(); + if (lhs == nullptr) + { + SG_UNREF(kernel); + throw std::runtime_error("Kernel has no left handside features set!"); + } + + input_feature_type->mutable_multiarraytype()->add_shape(input_dimension(lhs)); + input_feature_type->mutable_multiarraytype()->set_datatype(extract_array_type(lhs)); + + SG_UNREF(kernel); + SG_UNREF(lhs); + return input_feature_type; + } + private: + CoreML::Specification::ArrayFeatureType_ArrayDataType extract_array_type(const CFeatures* f) const + { + auto feature_type = f->get_feature_type(); + if (feature_type == F_SHORT || feature_type == F_LONG || feature_type == F_INT + || feature_type == F_UINT) + return CoreML::Specification::ArrayFeatureType_ArrayDataType::ArrayFeatureType_ArrayDataType_INT32; + else if (feature_type == F_SHORTREAL) + return CoreML::Specification::ArrayFeatureType_ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32; + else if (feature_type == F_DREAL) + return CoreML::Specification::ArrayFeatureType_ArrayDataType::ArrayFeatureType_ArrayDataType_DOUBLE; + + throw std::runtime_error("Not specifiable input type"); + } + + index_t input_dimension(const CFeatures* f) const + { + switch (f->get_feature_class()) + { + case C_DENSE: + return f->as()->get_dim_feature_space(); + case C_SPARSE: + return f->as()->get_dim_feature_space(); + default: + throw std::runtime_error("Unsupported input type!"); + } + } + }; + + template + bool set_support_vectors(const CSVM* svm, T* machine_spec) + { + auto svs = svm->get_support_vectors(); + auto k = svm->get_kernel(); + auto lhs = k->get_lhs(); + if (lhs == nullptr) + { + SG_UNREF(k); + throw std::runtime_error("Features are not set in kernel (required for support vectors), cannot export to CoreML!"); + } + + bool result = false; + switch (lhs->get_feature_class()) + { + case C_DENSE: + { + auto svs_spec = machine_spec->mutable_densesupportvectors(); + // FIXME: support all CDenseFeatures type! + auto dense_features = lhs->as>(); + for (auto sv_idx: svs) + { + auto sv = dense_features->get_feature_vector(sv_idx); + sv.display_vector(); + auto sv_spec = svs_spec->add_vectors(); + for (auto v: sv) + sv_spec->add_values(v); + } + result = true; + break; + } + case C_SPARSE: + { + // FIXME: support all CDenseFeatures type! + auto sparse_features = lhs->as>(); + auto svs_spec = machine_spec->mutable_sparsesupportvectors(); + for (auto sv_idx: svs) + { + auto sv = sparse_features->get_sparse_feature_vector(sv_idx); + auto sv_spec = svs_spec->add_vectors(); + for (index_t i = 0; i < sv.num_feat_entries; ++i) + { + auto node = sv_spec->add_nodes(); + node->set_value(sv.features[i].entry); + node->set_index(sv.features[i].feat_index); + } + } + result = true; + break; + } + default: + SG_UNREF(lhs); + SG_UNREF(k); + throw std::runtime_error("CoreML does not support the provided feature class!"); + + } + SG_UNREF(lhs); + SG_UNREF(k); + + return false; + } + } +} + +#endif diff --git a/src/interfaces/coreml/internal/SVMRegressorConverter.cc b/src/interfaces/coreml/internal/SVMRegressorConverter.cc new file mode 100644 index 00000000000..1360cd91ddc --- /dev/null +++ b/src/interfaces/coreml/internal/SVMRegressorConverter.cc @@ -0,0 +1,46 @@ +#include "internal/SVMRegressorConverter.h" +#include "internal/KernelConverter.h" + +#include "format/SVM.pb.h" + +#include + +#include + +using namespace shogun; +using namespace shogun::coreml; +using namespace CoreML; + +template<> +const std::unordered_set SVMRegressorConverterType::supported_types + = {"LibSVR", "SVRLight", "KernelRidgeRegression", "KRRNystrom"}; + +SVMRegressorConverter::SVMRegressorConverter(const CMachine* m): + SVMConverter(m) +{ + m_spec->set_specificationversion(SPECIFICATION_VERSION); + m_spec->set_allocated_supportvectorregressor(CoreMLConverter::convert(static_cast(m))); +} + +template<> +void SVMRegressorConverterType::convert(const CSVM* svm, CoreML::Specification::SupportVectorRegressor* spec) +{ + // set kernel + auto kernel = svm->get("kernel"); + spec->set_allocated_kernel(KernelConverter::convert(kernel)); + + // set coefficients + auto coeffs = svm->get_alphas(); + auto coeffs_spec = spec->mutable_coefficients(); + for (auto c: coeffs) + coeffs_spec->add_alpha(c); + + // set rho + spec->set_rho(-svm->get_bias()); + + // set support vectors + set_support_vectors(svm, spec); +} + +REGISTER_CONVERTER(SVMRegressorConverter, SVMRegressorConverterType::supported_types) + diff --git a/src/interfaces/coreml/internal/SVMRegressorConverter.h b/src/interfaces/coreml/internal/SVMRegressorConverter.h new file mode 100644 index 00000000000..85f99c55b72 --- /dev/null +++ b/src/interfaces/coreml/internal/SVMRegressorConverter.h @@ -0,0 +1,22 @@ +#ifndef __SVM_REGRESSOR_CONVERTER_H__ +#define __SVM_REGRESSOR_CONVERTER_H__ + +#include "internal/SVMConverter.h" + +#include + +namespace shogun +{ + namespace coreml + { + using SVMRegressorConverterType = CoreMLConverter; + class SVMRegressorConverter: public SVMConverter + { + public: + explicit SVMRegressorConverter(const CMachine* m); + virtual ~SVMRegressorConverter() = default; + }; + } +} + +#endif diff --git a/src/interfaces/coreml/internal/SVMRegressorConverter_test.cc b/src/interfaces/coreml/internal/SVMRegressorConverter_test.cc new file mode 100644 index 00000000000..f8437744c13 --- /dev/null +++ b/src/interfaces/coreml/internal/SVMRegressorConverter_test.cc @@ -0,0 +1,96 @@ +#include +#include "internal/SVMRegressorConverter.h" + +#include "ShogunCoreML.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "format/Model.pb.h" +#include "format/SVM.pb.h" + +#include "../../tests/unit/environments/LinearTestEnvironment.h" + +using namespace shogun; +using namespace shogun::coreml; + +extern LinearTestEnvironment* linear_test_env; + +template +class SVMRegressorTest : public ::testing::Test {}; + +using SVMRegressorTypes = ::testing::Types; +TYPED_TEST_CASE(SVMRegressorTest, SVMRegressorTypes); + +TYPED_TEST(SVMRegressorTest, convert) +{ + auto mock_data = linear_test_env->get_one_dimensional_regression_data(true); + auto labels_train = (CLabels*) mock_data->get_labels_train(); + auto features_train = mock_data->get_features_train(); + + // TODO: make kernel a parameter? + auto k = some(); + //auto k = some(); + //auto k = some(); + //auto k = some(); + k->init(features_train, features_train); + auto m = some(); + m->put("kernel", (CKernel*) k.get()); + m->put("labels", labels_train); + m->train(); + + auto x = m->apply_regression(mock_data->get_features_test()); + mock_data->get_features_test()->get_feature_matrix().display_matrix(); + x->get_labels().display_vector(); + + auto converter = std::make_shared(m); + auto machine_spec = converter->description(); + auto spec = machine_spec->supportvectorregressor(); + + // check rho + ASSERT_EQ(-m->get_bias(), spec.rho()); + + // check coeffs + auto coeffs = m->get_alphas(); + auto coeffs_spec = spec.coefficients(); + ASSERT_EQ(coeffs.size(), coeffs_spec.alpha_size()); + + int ctr = 0; + for (auto c: coeffs) + ASSERT_EQ(c, coeffs_spec.alpha(ctr++)); + + // check kernel + ASSERT_TRUE(spec.has_kernel()); + auto kernel_spec = spec.kernel(); +// ASSERT_TRUE(kernel_spec.has_rbfkernel()); + + // check support vectors + ASSERT_TRUE(spec.has_densesupportvectors()); + auto svs_idx = m->get_support_vectors(); + auto svs_spec = spec.densesupportvectors(); + + ASSERT_EQ(svs_idx.vlen, svs_spec.vectors_size()); + ctr = 0; + for (auto idx: svs_idx) + { + auto sv_spec = svs_spec.vectors(ctr++); + auto sv = features_train->get_feature_vector(idx); + ASSERT_EQ(sv.vlen, sv_spec.values_size()); + + int j = 0; + for (auto v: sv) + ASSERT_EQ(v, sv_spec.values(j++)); + } + + auto out = convert(m); + out->save(m->get_name()); +} diff --git a/src/interfaces/coreml/internal/unittest_main.cc b/src/interfaces/coreml/internal/unittest_main.cc new file mode 100644 index 00000000000..5274fd056e4 --- /dev/null +++ b/src/interfaces/coreml/internal/unittest_main.cc @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +// FIXME +#include "../../tests/unit/environments/LinearTestEnvironment.h" +#include "../../tests/unit/environments/MultiLabelTestEnvironment.h" +#include "../../tests/unit/environments/RegressionTestEnvironment.h" + +using namespace shogun; +using ::testing::Test; +using ::testing::UnitTest; +using ::testing::TestCase; +using ::testing::TestInfo; +using ::testing::TestPartResult; +using ::testing::TestEventListener; +using ::testing::Environment; + +class FailurePrinter : public TestEventListener { +public: + explicit FailurePrinter(TestEventListener* listener) : TestEventListener() {_listener = listener;} + + virtual ~FailurePrinter() {} + + virtual void OnTestProgramStart(const UnitTest& unit_test) {} + virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration) {} + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) {} + virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) {} + virtual void OnTestCaseStart(const TestCase& test_case) {} + virtual void OnTestStart(const TestInfo& test_info) {} + virtual void OnTestPartResult(const TestPartResult& result); + virtual void OnTestEnd(const TestInfo& test_info); + virtual void OnTestCaseEnd(const TestCase& test_case) {} + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) { } + virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) { } + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration) { _listener->OnTestIterationEnd(unit_test, iteration); } + virtual void OnTestProgramEnd(const UnitTest& unit_test) { } + +protected: + TestEventListener* _listener; +}; + +void FailurePrinter::OnTestPartResult(const TestPartResult& test_part_result) +{ + if (test_part_result.failed()) + { + _listener->OnTestPartResult(test_part_result); + printf("\n"); + } +} + +void FailurePrinter::OnTestEnd(const TestInfo& test_info) +{ + if (test_info.result()->Failed()) + _listener->OnTestEnd(test_info); +} + +LinearTestEnvironment* linear_test_env; +MultiLabelTestEnvironment* multilabel_test_env; +RegressionTestEnvironment* regression_test_env; + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + ::testing::InitGoogleMock(&argc, argv); + + if (argc > 1 && !strcmp(argv[1], "--only-on-failure")) + { + testing::TestEventListeners& listeners = + testing::UnitTest::GetInstance()->listeners(); + + testing::TestEventListener* default_printer + = listeners.Release(listeners.default_result_printer()); + listeners.Append(new FailurePrinter(default_printer)); + } + + // setting up fixtures + linear_test_env = new LinearTestEnvironment(); + ::testing::AddGlobalTestEnvironment(linear_test_env); + multilabel_test_env = new MultiLabelTestEnvironment(); + ::testing::AddGlobalTestEnvironment(multilabel_test_env); + regression_test_env = new RegressionTestEnvironment(); + ::testing::AddGlobalTestEnvironment(regression_test_env); + + init_shogun_with_defaults(); + sg_io->set_loglevel(MSG_WARN); + int ret = RUN_ALL_TESTS(); + + exit_shogun(); + + return ret; +} +