Skip to content

Commit

Permalink
Add support for CSS10 datasets and improved docker image to reuse pip…
Browse files Browse the repository at this point in the history
… install
  • Loading branch information
schrieveslaach committed Apr 28, 2019
1 parent 00b92b5 commit 531ef65
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 6 deletions.
1 change: 1 addition & 0 deletions .dockerignore
@@ -0,0 +1 @@
.git/
13 changes: 12 additions & 1 deletion README.md
Expand Up @@ -63,6 +63,7 @@ Contributions are accepted! We'd love the communities help in building a better
* [LJ Speech](https://keithito.com/LJ-Speech-Dataset/) (Public Domain) * [LJ Speech](https://keithito.com/LJ-Speech-Dataset/) (Public Domain)
* [Blizzard 2012](http://www.cstr.ed.ac.uk/projects/blizzard/2012/phase_one) (Creative Commons Attribution Share-Alike) * [Blizzard 2012](http://www.cstr.ed.ac.uk/projects/blizzard/2012/phase_one) (Creative Commons Attribution Share-Alike)
* [M-ailabs](http://www.m-ailabs.bayern/en/the-mailabs-speech-dataset/) * [M-ailabs](http://www.m-ailabs.bayern/en/the-mailabs-speech-dataset/)
* [CSS10: A Collection of Single Speaker Speech Datasets for 10 Languages](https://github.com/Kyubyong/css10)


You can use other datasets if you convert them to the right format. See [TRAINING_DATA.md](TRAINING_DATA.md) for more info. You can use other datasets if you convert them to the right format. See [TRAINING_DATA.md](TRAINING_DATA.md) for more info.


Expand Down Expand Up @@ -90,7 +91,17 @@ Contributions are accepted! We'd love the communities help in building a better
|- lab |- lab
|- wav |- wav
``` ```


alternatively, like this for CSS10, German dataset (make sure to adjust `text/symbols.py` in order to meet the character set):
```
tacotron
|- css10
|- achtgesichterambiwasse
|- meisterfloh
|- serapionsbruederauswahl
|- transcript.txt
```

For M-AILABS follow the directory structure from [here](http://www.m-ailabs.bayern/en/the-mailabs-speech-dataset/) For M-AILABS follow the directory structure from [here](http://www.m-ailabs.bayern/en/the-mailabs-speech-dataset/)


3. **Preprocess the data** 3. **Preprocess the data**
Expand Down
10 changes: 7 additions & 3 deletions cpu.Dockerfile
@@ -1,8 +1,12 @@
FROM tensorflow/tensorflow:1.8.0-py3 FROM tensorflow/tensorflow:1.8.0-py3


RUN mkdir /root/mimic2 RUN mkdir /root/mimic2
COPY . /root/mimic2
WORKDIR /root/mimic2 WORKDIR /root/mimic2
RUN pip install --no-cache-dir -r requirements.txt


ENTRYPOINT [ "/bin/bash" ] COPY requirements.txt /root/mimic2/requirements.txt
RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
RUN apt update && apt install -y ffmpeg

COPY . /root/mimic2

ENTRYPOINT [ "/bin/bash" ]
43 changes: 43 additions & 0 deletions datasets/css10.py
@@ -0,0 +1,43 @@
from concurrent.futures import ProcessPoolExecutor
from functools import partial
import numpy as np
import os

from util import audio


def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
'''Preprocesses the css10 dataset from a given input path into a given output directory.'''
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []

# Read the transcript file
with open(os.path.join(in_dir, 'transcript.txt'), encoding='utf-8') as f:
for line in f:
parts = line.strip().split('|')
path = os.path.join(in_dir, parts[0])
text = parts[1]
futures.append(executor.submit(partial(_process_utterance, out_dir, parts[0].split('/')[1], path, text)))

return [future.result() for future in tqdm(futures)]


def _process_utterance(out_dir, prompt_id, wav_path, text):
# Load the audio to a numpy array:
wav = audio.load_wav(wav_path)

# Compute the linear-scale spectrogram from the wav:
spectrogram = audio.spectrogram(wav).astype(np.float32)

# Compute a mel-scale spectrogram from the wav:
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32)

# Write the spectrograms to disk:
spectrogram_filename = 'css10-spec-%s.npy' % prompt_id
mel_filename = 'css10css10-mel-%s.npy' % prompt_id
np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False)
np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False)

# Return a tuple describing this training example:
n_frames = spectrogram.shape[1]
return (spectrogram_filename, mel_filename, n_frames, text)
14 changes: 12 additions & 2 deletions preprocess.py
Expand Up @@ -2,7 +2,7 @@
import os import os
from multiprocessing import cpu_count from multiprocessing import cpu_count
from tqdm import tqdm from tqdm import tqdm
from datasets import amy, blizzard, ljspeech, kusal, mailabs from datasets import amy, blizzard, css10, ljspeech, kusal, mailabs
from hparams import hparams, hparams_debug_string from hparams import hparams, hparams_debug_string




Expand Down Expand Up @@ -32,6 +32,14 @@ def preprocess_amy(args):
write_metadata(metadata, out_dir) write_metadata(metadata, out_dir)




def preprocess_css10_de(args):
in_dir = os.path.join(args.base_dir, 'css10')
out_dir = os.path.join(args.base_dir, args.output)
os.makedirs(out_dir, exist_ok=True)
metadata = css10.build_from_path(in_dir, out_dir, args.num_workers, tqdm=tqdm)
write_metadata(metadata, out_dir)


def preprocess_kusal(args): def preprocess_kusal(args):
in_dir = os.path.join(args.base_dir, 'kusal') in_dir = os.path.join(args.base_dir, 'kusal')
out_dir = os.path.join(args.base_dir, args.output) out_dir = os.path.join(args.base_dir, args.output)
Expand Down Expand Up @@ -79,7 +87,7 @@ def main():
parser.add_argument('--base_dir', default=os.path.expanduser('~/tacotron')) parser.add_argument('--base_dir', default=os.path.expanduser('~/tacotron'))
parser.add_argument('--output', default='training') parser.add_argument('--output', default='training')
parser.add_argument( parser.add_argument(
'--dataset', required=True, choices=['amy', 'blizzard', 'ljspeech', 'kusal', 'mailabs'] '--dataset', required=True, choices=['amy', 'blizzard', 'css10', 'ljspeech', 'kusal', 'mailabs']
) )
parser.add_argument('--mailabs_books_dir', parser.add_argument('--mailabs_books_dir',
help='absolute directory to the books for the mlailabs') help='absolute directory to the books for the mlailabs')
Expand All @@ -103,6 +111,8 @@ def main():
preprocess_amy(args) preprocess_amy(args)
elif args.dataset == 'blizzard': elif args.dataset == 'blizzard':
preprocess_blizzard(args) preprocess_blizzard(args)
elif args.dataset == 'css10':
preprocess_css10_de(args)
elif args.dataset == 'ljspeech': elif args.dataset == 'ljspeech':
preprocess_ljspeech(args) preprocess_ljspeech(args)
elif args.dataset == 'kusal': elif args.dataset == 'kusal':
Expand Down

0 comments on commit 531ef65

Please sign in to comment.