/
librispeech_to_tfrecords.py
67 lines (45 loc) · 2.13 KB
/
librispeech_to_tfrecords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import tensorflow as tf
from glob import glob
import numpy as np
import librosa
import scipy
# create .tfrecords file with signals and annonation info
def _bytes_features(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_features(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def convert_speakers_to_labels(labels, convertion_dict=None):
return np.array([convertion_dict[l] for l in labels], dtype=np.int32)
def write_tfrecords(wav_path, tfrecord_path, signal_length=40000, sr=16000):
wav_files = glob('{}/**/*.wav'.format(wav_path), recursive=True)
speakers = [int(file.split('/')[-1].split('-')[0]) for file in wav_files]
speaker_to_label = {v: k for k, v in enumerate(set(speakers))}
labels = convert_speakers_to_labels(speakers, convertion_dict=speaker_to_label)
tfrecords_filename = tfrecord_path
with tf.python_io.TFRecordWriter(tfrecords_filename) as writer:
original_signals = []
for wav_file, speaker, label in zip(wav_files, speakers, labels):
print(wav_file)
# sr, wav = scipy.io.wavfile.read(wav_file)
wav, sr = librosa.core.load(wav_file, sr=sr, dtype=np.float32)
if len(wav) < signal_length:
continue
else:
wav = wav[:signal_length]
annotation = (sr, speaker, label)
original_signals.append((wav, annotation))
# encode to bytes
wav_raw = wav.tostring()
example = tf.train.Example(features=tf.train.Features(
feature={
'signal_raw': _bytes_features(wav_raw),
'sr': _int64_features(sr),
'speaker': _int64_features(speaker),
'label': _int64_features(label)
}))
writer.write(example.SerializeToString())
if __name__ == "__main__":
for folder in ['train']:
wav_path = '/workspace/data/LibriSpeech_to_classify/{}'.format(folder)
tfrecord_path = '{}/wavs.tfrecord'.format(wav_path)
write_tfrecords(wav_path, tfrecord_path)