Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[kenlm] New Port #13692

Merged
merged 25 commits into from
Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions ports/kenlm/fix-build-install.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9d9ed21..2c7aeac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.1)

-IF( WIN32 )
+IF( WIN32 AND BUILD_SHARED_LIBS)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
SET(Boost_USE_STATIC_LIBS OFF)
# The auto-linking feature has problems with USE_STATIC_LIBS off, so we use
# BOOST_ALL_NO_LIB to turn it off.
@@ -14,6 +14,7 @@ ENDIF( )
project(kenlm)

option(FORCE_STATIC "Build static executables" OFF)
+option(ENABLE_INTERPOLATE "Build interpolation program" OFF)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be needed. Compile if we have what we need for it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's needed. Using dynamic judgment instead of specifying options is not a good thing.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added this option in master but set the default based on the whether the package is found. This is backwards compatible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kpu Since this option relies on the lookup of eigen3, if eigen3 is installed using vcpkg in windows, the build of this component will be activated. But this component only supports non-Windows.

Copy link

@kpu kpu Oct 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've set the default to off for windows in master just now. But I figure you asked for control so you would always specify it anyway.

if (FORCE_STATIC)
#presumably overkill, is there a better way?
#http://cmake.3232098.n2.nabble.com/Howto-compile-static-executable-td5580269.html
diff --git a/lm/CMakeLists.txt b/lm/CMakeLists.txt
index 25d22c3..395815b 100644
--- a/lm/CMakeLists.txt
+++ b/lm/CMakeLists.txt
@@ -43,6 +43,9 @@ install(TARGETS kenlm
ARCHIVE DESTINATION lib
)

+file(GLOB SRC_HEADERS ${CMAKE_CURRENT_LIST_DIR}/*.h ${CMAKE_CURRENT_LIST_DIR}/*.hh)
+install(FILES ${SRC_HEADERS} DESTINATION include/kenlm)
+
# This directory has children that need to be processed
add_subdirectory(builder)
add_subdirectory(filter)
@@ -50,12 +53,14 @@ add_subdirectory(interpolate)

# Explicitly list the executable files to be compiled
set(EXE_LIST
- query
fragment
- build_binary
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
kenlm_benchmark
)

+if (UNIX)
+ list(APPEND EXE_LIST query build_binary)
+endif()
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+
set(LM_LIBS kenlm kenlm_util ${Boost_LIBRARIES} Threads::Threads)

AddExes(EXES ${EXE_LIST}
diff --git a/lm/interpolate/CMakeLists.txt b/lm/interpolate/CMakeLists.txt
index 04776be..7375ffd 100644
--- a/lm/interpolate/CMakeLists.txt
+++ b/lm/interpolate/CMakeLists.txt
@@ -1,5 +1,6 @@
+if (ENABLE_INTERPOLATE)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
# Eigen3 less than 3.1.0 has a race condition: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=466
-find_package(Eigen3 3.1.0 CONFIG)
+find_package(Eigen3 3.1.0 CONFIG REQUIRED)

if(EIGEN3_FOUND)
include_directories(${EIGEN3_INCLUDE_DIR})
@@ -54,3 +55,4 @@ else()
"(cd $HOME; wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.bz2 |tar xj)\n"
"rm CMakeCache.txt\n")
endif()
+endif()
\ No newline at end of file
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
index b0a06b1..711aa32 100644
--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
@@ -84,6 +84,15 @@ AddExes(EXES probing_hash_table_benchmark
LIBRARIES kenlm_util ${Boost_LIBRARIES} Threads::Threads)
endif()

+install(TARGETS kenlm_util
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ ARCHIVE DESTINATION lib
+)
+
+file(GLOB UTILS_HEADER ${CMAKE_CURRENT_LIST_DIR}/*.h ${CMAKE_CURRENT_LIST_DIR}/*.hh)
JackBoosY marked this conversation as resolved.
Show resolved Hide resolved
+install(FILES ${UTILS_HEADER} DESTINATION include/kenlm/utils)
+
# Only compile and run unit tests if tests should be run
if(BUILD_TESTING)
set(KENLM_BOOST_TESTS_LIST
48 changes: 48 additions & 0 deletions ports/kenlm/portfile.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
vcpkg_check_linkage(ONLY_STATIC_LIBRARY)

vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO kpu/kenlm
REF ac454207c69f293315ae9be3aff2238fc8c999a0
SHA512 96f35c46237870ce71c04e20783b4d410e7dfec6eb10673e29aa9ba27f95c5ad77aafd443a1583cd7b757a8c360fc16f236ef8ecf965f317c24c8f3b45547722
HEAD_REF master
PATCHES fix-build-install.patch
)

file(REMOVE ${SOURCE_PATH}/cmake/modules/FindEigen3.cmake)

vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
interpolate ENABLE_INTERPOLATE
)

if ("interpolate" IN_LIST FEATURES AND VCPKG_TARGET_IS_WINDOWS)
message(FATAL_ERROR "Feature interpolate only support unix.")
jacobkahn marked this conversation as resolved.
Show resolved Hide resolved
endif()

vcpkg_configure_cmake(
SOURCE_PATH ${SOURCE_PATH}
PREFER_NINJA
OPTIONS
${FEATURE_OPTIONS}
-DFORCE_STATIC=OFF #already handled by vcpkg
-DENABLE_PYTHON=OFF # kenlm.lib(bhiksha.cc.obj) : fatal error LNK1000: Internal error during IMAGE::Pass2
-DCOMPILE_TESTS=OFF
)
vcpkg_install_cmake()

set(KENLM_TOOLS count_ngrams filter fragment kenlm_benchmark lmplz phrase_table_vocab)
if (NOT VCPKG_TARGET_IS_WINDOWS)
list(APPEND KENLM_TOOLS probing_hash_table_benchmark query build_binary streaming_example)
if ("interpolate" IN_LIST FEATURES)
list(APPEND KENLM_TOOLS interpolate)
endif()
endif()
vcpkg_copy_tools(TOOL_NAMES ${KENLM_TOOLS} AUTO_CLEAN)

vcpkg_copy_pdbs()

file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")

# Copyright and License
file(INSTALL ${SOURCE_PATH}/COPYING DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT} RENAME copyright)
file(INSTALL ${SOURCE_PATH}/LICENSE DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT} RENAME license)
32 changes: 32 additions & 0 deletions ports/kenlm/vcpkg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"name": "kenlm",
"version-string": "20200924",
"description": "KenLM: Faster and Smaller Language Model Queries",
"supports": "!(arm64 & windows)",
"dependencies": [
"boost-interprocess",
"boost-program-options",
"boost-ptr-container",
"boost-system",
"boost-test",
"boost-thread",
"bzip2",
{
"name": "eigen3",
"platform": "!windows"
},
"liblzma",
"zlib"
],
"features": {
"interpolate": {
"description": "Build interpolation program",
"dependencies": [
{
"name": "eigen3",
"platform": "!windows"
}
]
}
}
}