Skip to content

Commit

Permalink
Merge pull request #8 from david-ryan-snyder/chain
Browse files Browse the repository at this point in the history
Xvector Egs, Etc
  • Loading branch information
danpovey committed Feb 18, 2016
2 parents f698d36 + 66eb517 commit cb4635c
Show file tree
Hide file tree
Showing 7 changed files with 271 additions and 21 deletions.
17 changes: 11 additions & 6 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2098,20 +2098,25 @@ static void _diff_xent(const int32_cuda* vec_tgt, Real* mat_net_out, Real* vec_l
template<typename Real>
__global__
static void _compute_xvector_objf(const Real* scores, MatrixDim scores_dim,
Real* obfj_terms, MatrixDim objf_dim,
Real* obfj_derivs, MatrixDim derivs_dim) {
Real* objf_terms, MatrixDim objf_dim,
Real* objf_derivs, MatrixDim derivs_dim) {
int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x;
int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y;
int32_cuda scores_index = i + j * scores_dim.stride;
int32_cuda objf_index = i + j * objf_dim.stride;
int32_cuda derivs_index = i + j * derivs_dim.stride;
Real K = 1.0 / (scores_dim.rows - 2.0);
Real L = scores[scores_index];
if (i < scores_dim.cols && j < scores_dim.rows) {
if (i + 1 == j && i % 2 == 0) {
obfj_terms[scores_index] = log(1.0 + exp(-L));
obfj_derivs[scores_index] = 1.0 / (1.0 + exp(L));
objf_terms[objf_index] = L < -15 ? -L : log(1.0 + exp(-L));
objf_derivs[derivs_index] = 1.0 / (1.0 + exp(L));
} else if (i < j) {
obfj_terms[scores_index] = K * log(1.0 + exp(L));
obfj_derivs[scores_index] = -K / (1.0 + exp(-L));
objf_terms[objf_index] = K * (L > 15 ? L : log(1.0 + exp(L)));
objf_derivs[derivs_index] = -K / (1.0 + exp(-L));
} else {
objf_terms[objf_index] = 0.0;
objf_derivs[derivs_index] = 0.0;
}
}
}
Expand Down
15 changes: 10 additions & 5 deletions src/cudamatrix/cu-math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,21 @@ void ComputeXvectorObjfFromScores(const CuMatrixBase<BaseFloat> &scores,
} else
#endif
{
// Compute the xvector objective function and its derivatives in the CPU.
int32 num_rows = scores.NumRows();
BaseFloat K = 1.0 / (num_rows - 2.0);
for (int32 i = 0; i < num_rows; i++) {
for (int32 j = i + 1; j < num_rows; j++) {
for (int32 j = 0; j < num_rows; j++) {
BaseFloat L = scores(i, j);
if (i + 1 == j && i % 2 == 0) {
(*objf_terms)(i, j) = log(1.0 + exp(-scores(i, j)));
(*objf_derivs)(i, j) = 1.0 / (1.0 + exp(scores(i, j)));
(*objf_terms)(i, j) = L < -15 ? -L : log(1.0 + exp(-L));
(*objf_derivs)(i, j) = 1.0 / (1.0 + exp(L));
} else if (i < j) {
(*objf_terms)(i, j) = K * (L > 15 ? L : log(1.0 + exp(L)));
(*objf_derivs)(i, j) = -K / (1.0 + exp(-L));
} else {
(*objf_terms)(i, j) = K * log(1.0 + exp(scores(i, j)));
(*objf_derivs)(i, j) = -K / (1.0 + exp(-scores(i, j)));
(*objf_terms)(i, j) = 0;
(*objf_derivs)(i, j) = 0;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/ivector/xvector-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void TestComputeXvectorObjfAndDeriv(
BaseFloat *tot_weight);

bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
int32 xvector_dim = RandInt(4, 50),
int32 xvector_dim = RandInt(4, 100),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand Down Expand Up @@ -126,7 +126,7 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
}

bool TestXvectorComputeObjf() {
int32 xvector_dim = RandInt(4, 40),
int32 xvector_dim = RandInt(4, 100),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand Down
8 changes: 4 additions & 4 deletions src/ivector/xvector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ void ComputeXvectorObjfAndDeriv(
P(N, xvector_dim),
Q(N, N),
R(N, N),
scores(N, N), // The raw scores.
objf_terms(N, N),
objf_deriv_terms(N, N); // Derivative of the
// objf w.r.t. the scores.
scores(N, N), // The raw scores.
objf_terms(N, N, kUndefined),
objf_deriv_terms(N, N, // Derivative of the
kUndefined); // objf w.r.t. the scores.
CuVector<BaseFloat> r(N);

P.AddMatMat(1.0, xvector_pairs, kNoTrans, S_tmp, kNoTrans, 0.0);
Expand Down
6 changes: 3 additions & 3 deletions src/ivector/xvector.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace kaldi {
/*
Computes the training objective function and the derivatives for
the xvector. Let N = xvector_pairs.NumRows() be the number of
xvectors. There are N(N-1)/2 pairs in total and N from the same
xvectors. There are N(N-1)/2 pairs in total and N/2 from the same
class. Let v(n) be the n'th row of the matrix xvector_pairs.
The total objective function written to 'tot_objf' is
\sum_{n=0}^{N/2} p_same(v(n*2), v(n*2+1))
Expand Down Expand Up @@ -61,9 +61,9 @@ namespace kaldi {
the objective function with respect to the parameter b is written here.
@param [out] tot_objf The total objective function described above
@param [out] tot_weight The total normalizing factor for the objective
function, equal to dvector_pairs.NumRows().
function, equal to xvector_pairs.NumRows().
*/
void ComputeXvectorObjfAndDeriv(const CuMatrixBase<BaseFloat> &dvector_pairs,
void ComputeXvectorObjfAndDeriv(const CuMatrixBase<BaseFloat> &xvector_pairs,
const CuSpMatrix<BaseFloat> &S,
BaseFloat b,
CuMatrixBase<BaseFloat> *deriv_xvector,
Expand Down
2 changes: 1 addition & 1 deletion src/nnet3bin/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \
nnet3-am-adjust-priors nnet3-am-copy nnet3-compute-prob \
nnet3-average nnet3-am-info nnet3-combine nnet3-latgen-faster \
nnet3-copy nnet3-show-progress nnet3-align-compiled \
nnet3-get-egs-dense-targets nnet3-compute
nnet3-get-egs-dense-targets nnet3-compute nnet3-xvector-get-egs

OBJFILES =

Expand Down
240 changes: 240 additions & 0 deletions src/nnet3bin/nnet3-xvector-get-egs.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
// nnet3bin/nnet3-xvector-get-egs.cc

// Copyright 2012-2016 Johns Hopkins University (author: Daniel Povey)
// 2016 David Snyder

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "util/common-utils.h"
#include "nnet3/nnet-example.h"

namespace kaldi {
namespace nnet3 {

// A struct for holding information about the position and
// duration of each pair of chunks.
struct ChunkPairInfo {
std::string pair_name;
int32 output_archive_id;
int32 start_frame1;
int32 start_frame2;
int32 num_frames1;
int32 num_frames2;
};

// Process the range input file and store it as a map from utterance
// name to vector of ChunkPairInfo structs.
static void ProcessRangeFile(const std::string &range_rxfilename,
unordered_map<std::string,
std::vector<ChunkPairInfo *> > *utt_to_pairs) {
Input range_input(range_rxfilename);
if (!range_rxfilename.empty()) {
std::string line;
while (std::getline(range_input.Stream(), line)) {
ChunkPairInfo *pair = new ChunkPairInfo();
std::vector<std::string> fields;
SplitStringToVector(line, " \t\n\r", true, &fields);
if (fields.size() != 6)
KALDI_ERR << "Expected 6 fields in line of range file, got "
<< fields.size() << " instead.";

std::string utt = fields[0],
start_frame1_str = fields[2],
num_frames1_str = fields[3],
start_frame2_str = fields[4],
num_frames2_str = fields[5];

if (!ConvertStringToInteger(fields[1], &(pair->output_archive_id))
|| !ConvertStringToInteger(start_frame1_str, &(pair->start_frame1))
|| !ConvertStringToInteger(start_frame2_str, &(pair->start_frame2))
|| !ConvertStringToInteger(num_frames1_str, &(pair->num_frames1))
|| !ConvertStringToInteger(num_frames2_str, &(pair->num_frames2)))
KALDI_ERR << "Expected integer for output archive in range file.";
pair->pair_name = utt + "-" + start_frame1_str + "-" + num_frames1_str
+ "-" + start_frame2_str + "-" + num_frames2_str;
unordered_map<std::string, std::vector<ChunkPairInfo*> >::iterator
got = utt_to_pairs->find(utt);
if (got == utt_to_pairs->end()) {
std::vector<ChunkPairInfo* > pairs;
pairs.push_back(pair);
utt_to_pairs->insert(std::make_pair<std::string,
std::vector<ChunkPairInfo* > > (utt, pairs));
} else {
got->second.push_back(pair);
}
}
}
}

static void WriteExamples(const MatrixBase<BaseFloat> &feats,
const std::vector<ChunkPairInfo *> &pairs,
const std::string &utt,
bool compress,
int32 *num_egs_written,
std::vector<NnetExampleWriter *> *example_writers) {
for (std::vector<ChunkPairInfo *>::const_iterator it = pairs.begin();
it != pairs.end(); ++it) {
ChunkPairInfo *pair = *it;
NnetExample eg;
int32 num_rows = feats.NumRows(),
feat_dim = feats.NumCols();
if (num_rows < std::max(pair->num_frames1, pair->num_frames2)) {
KALDI_WARN << "Unable to create examples for utterance " << utt
<< ". Requested chunk size of "
<< std::max(pair->num_frames1, pair->num_frames2)
<< " but utterance has only " << num_rows << " frames.";
} else {
// The requested chunk positions are approximate. It's possible
// that they slightly exceed the number of frames in the utterance.
// If that occurs, we can shift the chunks location back slightly.
int32 shift1 = std::min(0, num_rows - pair->start_frame1
- pair->num_frames1),
shift2 = std::min(0, num_rows - pair->start_frame2
- pair->num_frames2);
SubMatrix<BaseFloat> chunk1(feats, pair->start_frame1 + shift1,
pair->num_frames1, 0, feat_dim),
chunk2(feats, pair->start_frame2 + shift2,
pair->num_frames2, 0, feat_dim);
NnetIo nnet_io1 = NnetIo("input1", 0, chunk1),
nnet_io2 = NnetIo("input2", 0, chunk2);
for (std::vector<Index>::iterator indx_it = nnet_io1.indexes.begin();
indx_it != nnet_io1.indexes.end(); ++indx_it)
indx_it->n = 0;
for (std::vector<Index>::iterator indx_it = nnet_io2.indexes.begin();
indx_it != nnet_io2.indexes.end(); ++indx_it)
indx_it->n = 1;

NnetExample eg;
eg.io.push_back(nnet_io1);
eg.io.push_back(nnet_io2);
if (compress)
eg.Compress();

if (pair->output_archive_id >= example_writers->size())
KALDI_ERR << "Requested output index exceeds number of specified "
<< "output files.";
(*example_writers)[pair->output_archive_id]->Write(
pair->pair_name, eg);
(*num_egs_written) += 1;
}
}
}

// Delete the dynamically allocated memory.
static void Cleanup(unordered_map<std::string,
std::vector<ChunkPairInfo *> > *utt_to_pairs,
std::vector<NnetExampleWriter *> *writers) {
for (unordered_map<std::string, std::vector<ChunkPairInfo*> >::iterator
map_it = utt_to_pairs->begin();
map_it != utt_to_pairs->end(); ++map_it)
for (std::vector<ChunkPairInfo*>::iterator
vec_it = map_it->second.begin(); vec_it != map_it->second.end();
++vec_it)
delete *vec_it;
for (std::vector<NnetExampleWriter *>::iterator
it = writers->begin(); it != writers->end(); ++it)
delete *it;
}

} // namespace nnet3
} // namespace kaldi

int main(int argc, char *argv[]) {
try {
using namespace kaldi;
using namespace kaldi::nnet3;
typedef kaldi::int32 int32;

const char *usage =
"Get examples for training an nnet3 neural network for the xvector\n"
"system. Each output example contains a pair of feature chunks from\n"
"the same utterance. The location and length of the feature chunks\n"
"are specified in the 'ranges' file. Each line is interpreted as\n"
"follows:\n"
" <source-utterance> <output-archive-index> <start-frame-index1>"
" <num-frames1> <start-frame-index2> <num-frames2>\n"
"For example:\n"
" utt1 3 0 65 112 110\n"
" utt1 0 160 50 214 180\n"
" utt2 ...\n"
"\n"
"Usage: nnet3-xvector-get-egs [options] <ranges-filename> "
"<features-rspecifier> <egs-0-out> <egs-1-out> ... <egs-N-1-out>\n"
"\n"
"For example:\n"
"nnet3-xvector-get-egs ranges.1 \"$feats\" ark:egs_temp.1.ark"
" ark:egs_temp.2.ark ark:egs_temp.3.ark\n";

bool compress = true;

ParseOptions po(usage);
po.Register("compress", &compress, "If true, write egs in "
"compressed format.");

po.Read(argc, argv);

if (po.NumArgs() < 3) {
po.PrintUsage();
exit(1);
}

std::string
range_rspecifier = po.GetArg(1),
feature_rspecifier = po.GetArg(2);
std::vector<NnetExampleWriter *> example_writers;

for (int32 i = 3; i <= po.NumArgs(); i++)
example_writers.push_back(new NnetExampleWriter(po.GetArg(i)));

unordered_map<std::string, std::vector<ChunkPairInfo *> > utt_to_pairs;
ProcessRangeFile(range_rspecifier, &utt_to_pairs);
SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);

int32 num_done = 0,
num_err = 0,
num_egs_written = 0;

for (; !feat_reader.Done(); feat_reader.Next()) {
std::string key = feat_reader.Key();
const Matrix<BaseFloat> &feats = feat_reader.Value();
unordered_map<std::string, std::vector<ChunkPairInfo*> >::iterator
got = utt_to_pairs.find(key);
if (got == utt_to_pairs.end()) {
KALDI_WARN << "Could not create examples from utterance "
<< key << " because it has no entry in the ranges "
<< "input file.";
num_err++;
} else {
std::vector<ChunkPairInfo *> pairs = got->second;
WriteExamples(feats, pairs, key, compress, &num_egs_written,
&example_writers);
num_done++;
}
}
Cleanup(&utt_to_pairs, &example_writers);

KALDI_LOG << "Finished generating examples, "
<< "successfully processed " << num_done
<< " feature files, wrote " << num_egs_written << " examples; "
<< num_err << " files had errors.";
return (num_egs_written == 0 || num_err > num_done ? 1 : 0);
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

0 comments on commit cb4635c

Please sign in to comment.