Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[kenlm] New Port #13692

Merged
merged 25 commits into from
Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions ports/kenlm/fix-build-install.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ecd6032..78d1f8b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,7 +43,7 @@ set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
# Compile all libraries into lib/
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)

-if (NOT CMAKE_BUILD_TYPE)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+if (0)
set(CMAKE_BUILD_TYPE Release)
endif()

diff --git a/lm/CMakeLists.txt b/lm/CMakeLists.txt
index 069c6aa..395815b 100644
--- a/lm/CMakeLists.txt
+++ b/lm/CMakeLists.txt
@@ -32,11 +32,20 @@ add_subdirectory(common)

add_library(kenlm ${KENLM_LM_SOURCE} ${KENLM_LM_COMMON_SOURCE})
set_target_properties(kenlm PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(kenlm kenlm_util ${Boost_LIBRARIES} Threads::Threads)
+target_link_libraries(kenlm PUBLIC kenlm_util ${Boost_LIBRARIES} Threads::Threads)

set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order")
target_compile_definitions(kenlm PUBLIC -DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})

+install(TARGETS kenlm
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ ARCHIVE DESTINATION lib
+)
+
+file(GLOB SRC_HEADERS ${CMAKE_CURRENT_LIST_DIR}/*.h ${CMAKE_CURRENT_LIST_DIR}/*.hh)
+install(FILES ${SRC_HEADERS} DESTINATION include/kenlm)
+
# This directory has children that need to be processed
add_subdirectory(builder)
add_subdirectory(filter)
@@ -44,12 +53,14 @@ add_subdirectory(interpolate)

# Explicitly list the executable files to be compiled
set(EXE_LIST
- query
fragment
- build_binary
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
kenlm_benchmark
)

+if (UNIX)
+ list(APPEND EXE_LIST query build_binary)
+endif()
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+
set(LM_LIBS kenlm kenlm_util ${Boost_LIBRARIES} Threads::Threads)

AddExes(EXES ${EXE_LIST}
diff --git a/lm/interpolate/CMakeLists.txt b/lm/interpolate/CMakeLists.txt
index f6ea116..26a7ad0 100644
--- a/lm/interpolate/CMakeLists.txt
+++ b/lm/interpolate/CMakeLists.txt
@@ -1,7 +1,8 @@
-find_package(Eigen3)
+if (UNIX)
+find_package(Eigen3 CONFIG REQUIRED)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved

-if(EIGEN3_FOUND)
- if (3.1.0 VERSION_LESS ${EIGEN3_VERSION})
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+if(Eigen3_FOUND)
+ if (0)
include_directories(${EIGEN3_INCLUDE_DIR})

set(KENLM_INTERPOLATE_SOURCE
@@ -26,7 +27,7 @@ if(EIGEN3_FOUND)
endif()

add_library(kenlm_interpolate ${KENLM_INTERPOLATE_SOURCE})
- target_link_libraries(kenlm_interpolate kenlm)
+ target_link_libraries(kenlm_interpolate PUBLIC kenlm Eigen3::Eigen)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved

set(KENLM_INTERPOLATE_EXES
interpolate
@@ -34,6 +35,15 @@ if(EIGEN3_FOUND)

set(KENLM_INTERPOLATE_LIBS
kenlm_interpolate)
+
+ install(TARGETS kenlm_interpolate
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ ARCHIVE DESTINATION lib
+ )
+
+ file(GLOB PY_HEADERS ${CMAKE_CURRENT_LIST_DIR}/*.h ${CMAKE_CURRENT_LIST_DIR}/*.hh)
+ install(FILES ${PY_HEADERS} DESTINATION include/kenlm/interpolate)

AddExes(EXES ${KENLM_INTERPOLATE_EXES}
LIBRARIES ${KENLM_INTERPOLATE_LIBS})
@@ -57,3 +67,6 @@ else()
"(cd $HOME; wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.bz2 |tar xj)\n"
"rm CMakeCache.txt\n")
endif()
+else()
+ message(STATUS "interpolation only support UNIX.")
+endif()
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
index 3f28c92..97d5448 100644
--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
@@ -64,10 +64,21 @@ endif()
# Group these objects together for later use.
add_library(kenlm_util ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL_STREAM_SOURCE} ${KENLM_UTIL_SOURCE})
set_target_properties(kenlm_util PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(kenlm_util ${Boost_LIBRARIES} ${READ_COMPRESSED_LIBS} Threads::Threads ${RT})
+target_link_libraries(kenlm_util PUBLIC ${Boost_LIBRARIES} ${READ_COMPRESSED_LIBS} Threads::Threads ${RT})
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved

+if (UNIX)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
AddExes(EXES probing_hash_table_benchmark
LIBRARIES kenlm_util ${Boost_LIBRARIES} Threads::Threads)
+endif()
+
+install(TARGETS kenlm_util
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ ARCHIVE DESTINATION lib
+)
+
+file(GLOB UTILS_HEADER ${CMAKE_CURRENT_LIST_DIR}/*.h ${CMAKE_CURRENT_LIST_DIR}/*.hh)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+install(FILES ${UTILS_HEADER} DESTINATION include/kenlm/utils)

# Only compile and run unit tests if tests should be run
if(BUILD_TESTING)
46 changes: 46 additions & 0 deletions ports/kenlm/portfile.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
vcpkg_check_linkage(ONLY_STATIC_LIBRARY)

vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO kpu/kenlm
REF 689a25aae9171b3ea46bd80d4189f540f35f1a02
SHA512 a1d3521b3458c791eb1242451b4eaafda870f68b5baeb359549eba10ed69ca417eeaaac95fd0d48350852661af7688c6b640361e9f70af57ae24d261c4ac0b85
HEAD_REF master
PATCHES fix-build-install.patch
)

file(REMOVE ${SOURCE_PATH}/cmake/modules/FindEigen3.cmake)

vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
test COMPILE_TESTS
)

if ("test" IN_LIST FEATURES)
if (VCPKG_TARGET_IS_WINDOWS)
message(FATAL_ERROR "Feature test only support unix.")
endif()
endif()

vcpkg_configure_cmake(
SOURCE_PATH ${SOURCE_PATH}
PREFER_NINJA
OPTIONS
${FEATURE_OPTIONS}
-DFORCE_STATIC=OFF #already handled by vcpkg
-DENABLE_PYTHON=OFF # kenlm.lib(bhiksha.cc.obj) : fatal error LNK1000: Internal error during IMAGE::Pass2
)
vcpkg_install_cmake()

set(KENLM_TOOLS count_ngrams filter fragment kenlm_benchmark lmplz phrase_table_vocab)
if (NOT VCPKG_TARGET_IS_WINDOWS)
list(APPEND KENLM_TOOLS probing_hash_table_benchmark query build_binary)
endif()
vcpkg_copy_tools(TOOL_NAMES ${KENLM_TOOLS} AUTO_CLEAN)

vcpkg_copy_pdbs()

file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")

# Copyright and License
file(INSTALL ${SOURCE_PATH}/COPYING DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT} RENAME copyright)
file(INSTALL ${SOURCE_PATH}/LICENSE DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT} RENAME license)
26 changes: 26 additions & 0 deletions ports/kenlm/vcpkg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"name": "kenlm",
"version-string": "20200924",
"description": "KenLM: Faster and Smaller Language Model Queries",
"supports": "!(arm64 & windows)",
"dependencies": [
"boost-interprocess",
"boost-program-options",
"boost-ptr-container",
"boost-system",
"boost-test",
"boost-thread",
"bzip2",
{
"name": "eigen3",
"platform": "!windows"
},
"liblzma",
"zlib"
],
"features": {
"test": {
"description": "Compile tests"
}
}
}