Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
156 lines (117 sloc) 4.12 KB
import os
import glob
import shutil
import tempfile
import numpy as np
import common
import features
import folds
from audio_toolbox import ffmpeg, sox
from constants import *
def normalize(input_file):
temp_dir = tempfile.mkdtemp()
transcoded_file = os.path.join(temp_dir, 'transcoded.flac')
ffmpeg.transcode(input_file, transcoded_file)
if not args.keep_silence:
trimmed_file = os.path.join(temp_dir, 'trimmed.flac')
sox.remove_silence(
transcoded_file,
trimmed_file,
min_duration_sec=args.silence_min_duration_sec,
threshold=args.silence_threshold)
else:
trimmed_file = transcoded_file
duration = sox.get_duration(trimmed_file)
duration = int((duration // FRAGMENT_DURATION) * FRAGMENT_DURATION)
normalized_file = os.path.join(temp_dir, 'normalized.flac')
sox.normalize(trimmed_file, normalized_file, duration_in_sec=duration)
return normalized_file, temp_dir
def load_samples(normalized_file):
temp_dir = tempfile.mkdtemp()
fragmented_file = os.path.join(temp_dir, 'fragment@n.flac')
sox.split(normalized_file, fragmented_file, FRAGMENT_DURATION)
features.process_audio(temp_dir)
samples = []
for file in glob.glob(os.path.join(temp_dir, '*.npz')):
sample = np.load(file)[DATA_KEY]
sample = folds.normalize_fb(sample)
assert sample.shape == INPUT_SHAPE
assert sample.dtype == DATA_TYPE
samples.append(sample)
samples = np.array(samples)
return samples, temp_dir
def predict(model_file):
import keras.models
_, languages = common.build_label_binarizer()
model = keras.models.load_model(model_file)
results = model.predict(samples)
scores = np.zeros(len(languages))
for result in results:
scores[np.argmax(result)] += 1
return scores, languages
def clean(paths):
for path in paths:
shutil.rmtree(path)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Test the model.')
parser.add_argument(
'input',
help='a path to an audio file')
parser.add_argument(
'--model',
dest='model',
help='a path to the H5 model file; the default is `model.h5`')
parser.add_argument(
'--silence-threshold',
dest='silence_threshold',
type=float,
help=("indicates what sample value you should treat as silence; "
"the default is `0.5`"))
parser.add_argument(
'--silence-min-duration',
dest='silence_min_duration_sec',
type=float,
help=("specifies a period of silence that must exist before audio is "
"not copied any more; the default is `0.1`"))
parser.add_argument(
'--keep-silence',
dest='keep_silence',
action='store_true',
help='don\'t remove silence from samples')
parser.add_argument(
'--keep-temp-files',
dest='keep_temp_files',
action='store_true',
help='don\'t remove temporary files when done')
parser.add_argument(
'--verbose',
dest='verbose',
action='store_true',
help='print more logs')
parser.set_defaults(
model='model.h5',
keep_silence=False,
silence_min_duration_sec=0.1,
silence_threshold=0.5,
keep_temp_files=False,
verbose=False)
args = parser.parse_args()
if not args.verbose:
# supress all warnings
import warnings
warnings.filterwarnings("ignore")
# supress tensorflow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
normalized_file, normalized_dir = normalize(args.input)
samples, samples_dir = load_samples(normalized_file)
if not args.keep_temp_files:
clean((normalized_dir, samples_dir))
scores, languages = predict(args.model)
total = np.sum(scores)
for language_idx, language in enumerate(languages):
score = scores[language_idx]
print("{language}: {percent:.2f}% ({amount:.0f})".format(
language=language,
percent=(score / total) * 100,
amount=score))
You can’t perform that action at this time.