Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add voicevox (AI speech synthesis) package #13

Merged
merged 17 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions 3rdparty/voicevox/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
build
dict
lib
node_scripts/voicevox_engine
requirements.txt
!.gitignore
71 changes: 71 additions & 0 deletions 3rdparty/voicevox/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
cmake_minimum_required(VERSION 2.8.3)
project(voicevox)

find_package(catkin REQUIRED
COMPONENTS
catkin_virtualenv
)

set(INSTALL_DIR ${PROJECT_SOURCE_DIR})

catkin_package()

catkin_generate_virtualenv(
INPUT_REQUIREMENTS requirements.in
PYTHON_INTERPRETER python3
USE_SYSTEM_PACKAGES FALSE
)

add_custom_command(
OUTPUT voicevox_model_installed
COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.model
MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
INSTALL_DIR=${INSTALL_DIR}
)


add_custom_command(
OUTPUT voicevox_core_installed
COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.core
MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
INSTALL_DIR=${INSTALL_DIR}
)

add_custom_command(
OUTPUT voicevox_engine_installed
COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.engine
MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
INSTALL_DIR=${INSTALL_DIR}
)

add_custom_command(
OUTPUT open_jtalk_dic_installed
COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.open_jtalk_dic
MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
INSTALL_DIR=${INSTALL_DIR}
)

add_custom_target(all_installed ALL DEPENDS
voicevox_model_installed
voicevox_core_installed
voicevox_engine_installed
open_jtalk_dic_installed)

file(GLOB NODE_SCRIPTS_FILES node_scripts/*.py)
catkin_install_python(
PROGRAMS ${NODE_SCRIPTS_FILES}
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}/node_scripts/
)
install(DIRECTORY node_scripts/voicevox_engine
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/catkin_virtualenv_scripts/
USE_SOURCE_PERMISSIONS)
install(DIRECTORY launch dict
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS)
install(PROGRAMS bin/text2wave
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin)

install(DIRECTORY
${INSTALL_DIR}/lib
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS)
11 changes: 11 additions & 0 deletions 3rdparty/voicevox/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
all:
make -f Makefile.core
make -f Makefile.model
make -f Makefile.engine
make -f Makefile.open_jtalk_dic
clean:
make -f Makefile.core clean
make -f Makefile.model clean
make -f Makefile.engine clean
make -f Makefile.open_jtalk_dic clean
rm -rf build
28 changes: 28 additions & 0 deletions 3rdparty/voicevox/Makefile.core
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# -*- makefile -*-

all: installed.viocevox_core

VERSION = 0.11.4
FILENAME = core.zip
TARBALL = build/$(FILENAME)
TARBALL_URL = "https://github.com/VOICEVOX/voicevox_core/releases/download/$(VERSION)/core.zip"
SOURCE_DIR = build/core
UNPACK_CMD = unzip
MD5SUM_DIR = $(CURDIR)/md5sum
MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum
SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
include $(shell rospack find mk)/download_unpack_build.mk
INSTALL_DIR = './'


installed.viocevox_core: $(SOURCE_DIR)/unpacked
mkdir -p $(INSTALL_DIR)/lib
cp build/core/lib*.so $(INSTALL_DIR)/lib/
cp build/core/*.bin $(INSTALL_DIR)/lib/
cp build/core/metas.json $(INSTALL_DIR)/lib/metas.json

clean:
rm -rf $(TARBALL)
rm -rf $(SOURCE_DIR)
rm -rf $(INSTALL_DIR)/lib
rm -rf build
24 changes: 24 additions & 0 deletions 3rdparty/voicevox/Makefile.engine
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -*- makefile -*-

all: installed.voicevox_engine

VERSION = 0.11.4
FILENAME = $(VERSION).tar.gz
TARBALL = build/$(FILENAME)
TARBALL_URL = "https://github.com/VOICEVOX/voicevox_engine/archive/refs/tags/$(FILENAME)"
SOURCE_DIR = build/voicevox_engine-$(VERSION)
UNPACK_CMD = tar xvzf
MD5SUM_DIR = $(CURDIR)/md5sum
MD5SUM_FILE = $(MD5SUM_DIR)/voicevox_engine.tar.gz.md5sum
include $(shell rospack find mk)/download_unpack_build.mk
INSTALL_DIR = './'


installed.voicevox_engine: $(SOURCE_DIR)/unpacked
cp -r build/voicevox_engine-$(VERSION) $(INSTALL_DIR)/node_scripts/voicevox_engine

clean:
rm -rf $(TARBALL)
rm -rf $(SOURCE_DIR)
rm -rf $(INSTALL_DIR)/node_scripts/voicevox_engine
rm -rf build
26 changes: 26 additions & 0 deletions 3rdparty/voicevox/Makefile.model
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- makefile -*-

all: installed.voicevox_model

VERSION = 1.10.0
FILENAME = onnxruntime-linux-x64-$(VERSION).tgz
TARBALL = build/$(FILENAME)
TARBALL_URL = "https://github.com/microsoft/onnxruntime/releases/download/v$(VERSION)/$(FILENAME)"
SOURCE_DIR = build/onnxruntime-linux-x64-$(VERSION)
UNPACK_CMD = tar xvzf
MD5SUM_DIR = $(CURDIR)/md5sum
MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum
SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
include $(shell rospack find mk)/download_unpack_build.mk
INSTALL_DIR = './'


installed.voicevox_model: $(SOURCE_DIR)/unpacked
mkdir -p $(INSTALL_DIR)/lib
cp build/onnxruntime-linux-x64-$(VERSION)/lib/* $(INSTALL_DIR)/lib

clean:
rm -rf $(TARBALL)
rm -rf $(SOURCE_DIR)
rm -rf $(INSTALL_DIR)/lib
rm -rf build
25 changes: 25 additions & 0 deletions 3rdparty/voicevox/Makefile.open_jtalk_dic
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- makefile -*-

all: installed.open_jtalk_dic

VERSION = 1.11.1
FILENAME = open_jtalk_dic_utf_8-1.11.tar.gz
TARBALL = build/$(FILENAME)
TARBALL_URL = "https://github.com/r9y9/open_jtalk/releases/download/v$(VERSION)/$(FILENAME)"
SOURCE_DIR = build/open_jtalk_dic_utf_8-1.11
UNPACK_CMD = tar xvzf
MD5SUM_DIR = $(CURDIR)/md5sum
MD5SUM_FILE = $(MD5SUM_DIR)/open_jtalk_dic.tar.gz.md5sum
include $(shell rospack find mk)/download_unpack_build.mk
INSTALL_DIR = './'


installed.open_jtalk_dic: $(SOURCE_DIR)/unpacked
mkdir -p $(INSTALL_DIR)/dict
cp -r build/open_jtalk_dic_utf_8-1.11 $(INSTALL_DIR)/dict

clean:
rm -rf $(TARBALL)
rm -rf $(SOURCE_DIR)
rm -rf $(INSTALL_DIR)/dict/open_jtalk_dic_utf_8-1.11
rm -rf build
103 changes: 103 additions & 0 deletions 3rdparty/voicevox/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# voicevox

ROS Interface for [VOICEVOX](https://voicevox.hiroshiba.jp/) (AI speech synthesis)

## TERM

[VOICEVOX](https://voicevox.hiroshiba.jp/) is basically free to use, but please check the terms of use below.

[TERM](https://voicevox.hiroshiba.jp/term)

Each voice synthesis character has its own rules. Please use this package according to those terms.

| Character name | term link |
| ---- | ---- |
| 四国めたん | https://zunko.jp/con_ongen_kiyaku.html |
| ずんだもん | https://zunko.jp/con_ongen_kiyaku.html |
| 春日部つむぎ | https://tsukushinyoki10.wixsite.com/ktsumugiofficial/利用規約 |
| 波音リツ | http://canon-voice.com/kiyaku.html |
| 雨晴はう | https://amehau.com/?page_id=225 |
| 玄野武宏 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
| 白上虎太郎 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
| 青山龍星 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
| 冥鳴ひまり | https://kotoran8zunzun.wixsite.com/my-site/利用規約 |
| 九州そら | https://zunko.jp/con_ongen_kiyaku.html |

## Installation

Build this package.

```bash
cd /path/to/catkin_workspace
catkin build voicevox
```

## Usage

### Launch sound_play with VOICEVOX Text-to-Speech

```bash
roslaunch voicevox voicevox_texttospeech.launch
```

<a id="saysomething"></a>
### Say something

#### For python users

```python
import rospy
from sound_play.libsoundplay import SoundClient

rospy.init_node('say_node')

client = SoundClient(sound_action='robotsound_jp', sound_topic='robotsound_jp')

client.say('こんにちは', voice='四国めたん-あまあま')
```

You can change the voice by changing the voice_name.
You can also specify the speaker id.
Look at the following tables for further details.

| speaker_id | voice_name |
| ---- | ---- |
| 0 | 四国めたん-あまあま |
| 1 | ずんだもん-あまあま |
| 2 | 四国めたん-ノーマル |
| 3 | ずんだもん-ノーマル |
| 4 | 四国めたん-セクシー |
| 5 | ずんだもん-セクシー |
| 6 | 四国めたん-ツンツン |
| 7 | ずんだもん-ツンツン |
| 8 | 春日部つむぎ-ノーマル |
| 9 | 波音リツ-ノーマル |
| 10 | 雨晴はう-ノーマル |
| 11 | 玄野武宏-ノーマル |
| 12 | 白上虎太郎-ノーマル |
| 13 | 青山龍星-ノーマル |
| 14 | 冥鳴ひまり-ノーマル |
| 15 | 九州そら-あまあま |
| 16 | 九州そら-ノーマル |
| 17 | 九州そら-セクシー |
| 18 | 九州そら-ツンツン |
| 19 | 九州そら-ささやき |

#### For roseus users

```
$ roseus
(load "package://pr2eus/speak.l")

(ros::roseus "say_node")

(speak "JSKへようこそ。" :lang "波音リツ" :wait t :topic-name "robotsound_jp")
```

### Tips

Normally, the server for speech synthesis starts up at `http://localhost:50021`.
You can change the url and port by setting values for `VOICEVOX_TEXTTOSPEECH_URL` and `VOICEVOX_TEXTTOSPEECH_PORT`.

You can also set the default character by setting `VOICEVOX_DEFAULT_SPEAKER_ID`.
Please refer to [here](#saysomething) for the speaker id.
Loading