Skip to content

Support hotword boosting feature and lexicon based decoding #222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
---
Language: Cpp
BasedOnStyle: WebKit
AlignAfterOpenBracket: Align
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: true
AfterControlStatement: false
AfterEnum: true
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: true
AfterUnion: true
AfterExternBlock: true
BeforeCatch: false
BeforeElse: true
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: false
SplitEmptyNamespace: true
ColumnLimit: 100
IndentCaseLabels: true
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
Standard: Cpp11
TabWidth: 4
UseTab: Never
...
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "third_party/kenlm"]
path = third_party/kenlm
url = https://github.com/kpu/kenlm.git
[submodule "third_party/ThreadPool"]
path = third_party/ThreadPool
url = https://github.com/progschj/ThreadPool.git
[submodule "third_party/kenlm"]
path = third_party/kenlm
url = https://github.com/kpu/kenlm
44 changes: 44 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
set(CMAKE_CXX_STANDARD 17)

# project name
project(CTCBeamDecoder CXX)

# define path to the libtorch extracted folder
set(CMAKE_PREFIX_PATH ${CMAKE_SOURCE_DIR}/third_party/libtorch)

# find torch library and all necessary files
find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

# add cxxopts library for command line parsing
include(FetchContent)

FetchContent_Declare(
cxxopts
GIT_REPOSITORY https://github.com/jarro2783/cxxopts.git
GIT_TAG v3.1.1
)

FetchContent_GetProperties(cxxopts)

if (NOT cxxopts_POPULATED)
FetchContent_Populate(cxxopts)
add_subdirectory(${cxxopts_SOURCE_DIR} ${cxxopts_BINARY_DIR})
endif()

# add sudirectories
add_subdirectory(ctcdecode)
add_subdirectory(${CMAKE_SOURCE_DIR}/third_party)
add_subdirectory(${CMAKE_SOURCE_DIR}/tests/cpp)

# build_fst library
add_library(build_fst_lib ${CMAKE_SOURCE_DIR}/tools/build_fst.cpp)
target_include_directories(build_fst_lib PUBLIC ${CMAKE_SOURCE_DIR}/tools)
target_link_libraries(build_fst_lib PUBLIC ctcdecode "${TORCH_LIBRARIES}" cxxopts pthread dl)

# executable to add that we want to compile and run
add_executable(build_fst ${CMAKE_SOURCE_DIR}/tools/build_fst_main.cpp)

# link libraries to our executable
target_link_libraries(build_fst build_fst_lib)
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ git clone --recursive https://github.com/parlance/ctcdecode.git
cd ctcdecode && pip install .
```

To build ctcdecode library,
```bash
bash build.sh
```

## How to Use

```python
Expand Down
52 changes: 52 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@

#!/bin/bash

# Download libtorch built CPU libraries
URL="https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.0.1%2Bcpu.zip" # stable version 2.0.1
LIBTORCH_FILE_NAME="libtorch-shared-with-deps-2.0.1+cpu.zip"
BUILD_DIR="build"

# Check if the file exists
if [ ! -f "third_party/$LIBTORCH_FILE_NAME" ]; then
# If the file doesn't exist, download it
cd third_party
wget "$URL"
# Unzip the file
unzip "$LIBTORCH_FILE_NAME"
cd ..
fi

download_and_extract(){
URL=$1
FILE_NAME=$2
if [ ! -f "third_party/$FILE_NAME" ]; then
# If the file doesn't exist, download it
cd third_party
wget "$URL"
# Unzip the file
tar -xvzf "$FILE_NAME"
cd ..
fi

}

# Download OpenFST
URL="https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.8.2.tar.gz"
OPENFST_FILE_NAME="openfst-1.8.2.tar.gz"
download_and_extract "$URL" "$OPENFST_FILE_NAME"



# Download boost
URL="https://github.com/parlance/ctcdecode/releases/download/v1.0/boost_1_67_0.tar.gz"
BOOST_FILE_NAME="boost_1_67_0.tar.gz"
download_and_extract "$URL" "$BOOST_FILE_NAME"


if [ ! -d "$BUILD_DIR" ]; then
mkdir "$BUILD_DIR"
fi

cd build
cmake ..
make
45 changes: 45 additions & 0 deletions ctcdecode/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)

set(CMAKE_CXX_STANDARD 17)

# find python package
find_package(Python COMPONENTS Interpreter Development)
# Check if Python was found
if(Python_FOUND)
message("Python found: ${Python_EXECUTABLE}")

# Include directories provided by Python
include_directories(${Python_INCLUDE_DIRS})

else()
message("Python not found.")
endif()

# build pybind11
include(FetchContent)

FetchContent_Declare(
pybind11
GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.10.4
)

FetchContent_MakeAvailable(pybind11)

# define path to the libtorch extracted folder
set(CMAKE_PREFIX_PATH ${CMAKE_SOURCE_DIR}/third_party/libtorch)

#find torch library and all necessary files
find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

add_compile_options("-O3" "-DKENLM_MAX_ORDER=6" "-std=c++17" "-fPIC" "-DINCLUDE_KENLM")

# ctc decode
file(GLOB CTC_SOURCES ${CMAKE_SOURCE_DIR}/ctcdecode/src/*.cpp)
add_library(ctcdecode STATIC "${CTC_SOURCES}")

target_include_directories(ctcdecode PUBLIC ${CMAKE_SOURCE_DIR}/third_party/kenlm ${CMAKE_SOURCE_DIR}/third_party/openfst-1.8.2/src/include ${CMAKE_SOURCE_DIR}/third_party/utf8 ${CMAKE_SOURCE_DIR}/third_party/ThreadPool ${CMAKE_SOURCE_DIR}/third_party/boost_1_67_0 ${CMAKE_SOURCE_DIR}/ctcdecode/src )
target_link_libraries(ctcdecode PUBLIC "${TORCH_LIBRARIES}" kenlm fst pybind11::module)

# message("${CTC_SOURCES}")
Loading