Skip to content

Commit

Permalink
[egs] updating local/make_voxceleb1.pl so that it works with newer ve…
Browse files Browse the repository at this point in the history
…rsions of VoxCeleb1 (#2684)
  • Loading branch information
david-ryan-snyder authored and danpovey committed Sep 6, 2018
1 parent c40205f commit 1cd9d41
Showing 1 changed file with 29 additions and 12 deletions.
41 changes: 29 additions & 12 deletions egs/voxceleb/v1/local/make_voxceleb1.pl
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@
my $out_test_dir = "$out_dir/voxceleb1_test";
my $out_train_dir = "$out_dir/voxceleb1_train";

if (! -e "$data_base/voxceleb1_test.txt") {
system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt");
}

if (system("mkdir -p $out_test_dir") != 0) {
die "Error making directory $out_test_dir";
}
Expand All @@ -31,58 +27,78 @@
my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
closedir $dh;

if (! -e "$data_base/voxceleb1_test.txt") {
system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt");
}

if (! -e "$data_base/vox1_meta.csv") {
system("wget -O $data_base/vox1_meta.csv http://www.openslr.org/resources/49/vox1_meta.csv");
}

open(TRIAL_IN, "<", "$data_base/voxceleb1_test.txt") or die "Could not open the verification trials file $data_base/voxceleb1_test.txt";
open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv";
open(SPKR_TEST, ">", "$out_test_dir/utt2spk") or die "Could not open the output file $out_test_dir/utt2spk";
open(WAV_TEST, ">", "$out_test_dir/wav.scp") or die "Could not open the output file $out_test_dir/wav.scp";
open(SPKR_TRAIN, ">", "$out_train_dir/utt2spk") or die "Could not open the output file $out_train_dir/utt2spk";
open(WAV_TRAIN, ">", "$out_train_dir/wav.scp") or die "Could not open the output file $out_train_dir/wav.scp";
open(TRIAL_OUT, ">", "$out_test_dir/trials") or die "Could not open the output file $out_test_dir/trials";

my %id2spkr = ();
while (<META_IN>) {
chomp;
my ($vox_id, $spkr_id, $gender, $nation, $set) = split;
$id2spkr{$vox_id} = $spkr_id;
}

my $test_spkrs = ();
while (<TRIAL_IN>) {
chomp;
my ($tar_or_none, $path1, $path2) = split;
my ($tar_or_non, $path1, $path2) = split;

# Create entry for left-hand side of trial
my $wav = "$data_base/voxceleb1_wav/$path1";
my ($spkr_id, $filename) = split('/', $path1);
my $rec_id = substr($filename, 0, 11);
my $segment = substr($filename, 12, 7);
my $utt_id1 = "$spkr_id-$rec_id-$segment";
$test_spkrs{$spkr_id} = ();

# Create entry for right-hand side of trial
my $wav = "$data_base/voxceleb1_wav/$path2";
my ($spkr_id, $filename) = split('/', $path2);
my $rec_id = substr($filename, 0, 11);
my $segment = substr($filename, 12, 7);
my $utt_id2 = "$spkr_id-$rec_id-$segment";
$test_spkrs{$spkr_id} = ();

my $target = "nontarget";
if ($tar_or_none eq "1") {
if ($tar_or_non eq "1") {
$target = "target";
}
print TRIAL_OUT "$utt_id1 $utt_id2 $target\n";
}

foreach (@spkr_dirs) {
my $spkr_id = $_;
my $new_spkr_id = $spkr_id;
# If we're using a newer version of VoxCeleb1, we need to "deanonymize"
# the speaker labels.
if (exists $id2spkr{$spkr_id}) {
$new_spkr_id = $id2spkr{$spkr_id};
}
opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
closedir $dh;
foreach (@files) {
my $filename = $_;
my $rec_id = substr($filename, 0, 11);
my $segment = substr($filename, 12, 7);
my $utt_id = "$spkr_id-$rec_id-$segment";
my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
if (exists $test_spkrs{$spkr_id}) {
my $utt_id = "$new_spkr_id-$rec_id-$segment";
if (exists $test_spkrs{$new_spkr_id}) {
print WAV_TEST "$utt_id", " $wav", "\n";
print SPKR_TEST "$utt_id", " $spkr_id", "\n";
print SPKR_TEST "$utt_id", " $new_spkr_id", "\n";
} else {
print WAV_TRAIN "$utt_id", " $wav", "\n";
print SPKR_TRAIN "$utt_id", " $spkr_id", "\n";
print SPKR_TRAIN "$utt_id", " $new_spkr_id", "\n";
}
}
}
Expand All @@ -93,6 +109,7 @@
close(WAV_TRAIN) or die;
close(TRIAL_OUT) or die;
close(TRIAL_IN) or die;
close(META_IN) or die;

if (system(
"utils/utt2spk_to_spk2utt.pl $out_test_dir/utt2spk >$out_test_dir/spk2utt") != 0) {
Expand Down

0 comments on commit 1cd9d41

Please sign in to comment.