Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[not for merge] Convolution in time #1881

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
80 changes: 80 additions & 0 deletions src/nnet3/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,51 @@ void PadComputationInputTime(const ConvolutionModel &model,
}
}


// see comment in header for what this does.
void PadComputationIoSpecial(int32 frames_left_context,
int32 frames_right_context,
ConvolutionComputationIo *io) {
KALDI_ASSERT(frames_left_context >= 0 && frames_right_context >= 0);

// fill in any gaps in the output, make it contiguous.
if (io->t_step_out == 0) {
KALDI_ASSERT(io->num_t_out == 1);
io->t_step_out = 1;
} else {
io->num_t_out *= io->t_step_out;
io->num_t_out -= (io->t_step_out - 1);
io->t_step_out = 1;
}
// fill in any gaps in the input, make it contiguous.
if (io->t_step_in == 0) {
KALDI_ASSERT(io->num_t_in == 1);
io->t_step_in = 1;
} else {
io->num_t_in *= io->t_step_in;
io->num_t_in -= (io->t_step_in - 1);
io->t_step_in = 1;
}
KALDI_ASSERT(io->start_t_in <= io->start_t_out);
// the following two statements use the fact that the t_steps are both 1.
int32 last_t_in = io->start_t_in + io->num_t_in - 1;
int32 last_t_out = io->start_t_out + io->num_t_out - 1;
KALDI_ASSERT(io->start_t_in <= io->start_t_out &&
last_t_in >= last_t_out);
int32 input_left_padding =
frames_left_context - (io->start_t_out - io->start_t_in),
input_right_padding =
frames_right_context - (last_t_in - last_t_out);
// the following assert is based on knowledge of how this function
// is called in practice.
KALDI_ASSERT(input_left_padding >= 0 && input_right_padding >= 0);
// the following two statements ensure that there is enough left and right
// context.
io->start_t_in -= input_left_padding;
io->num_t_in += (input_left_padding + input_right_padding);
}


// returns i rounded down to a multiple of n,
// e.g. RoundDownToMultipleOf(3, 2) = 2,
// RoundDownToMultipleOf(-1, 3) = -3
Expand Down Expand Up @@ -1677,6 +1722,41 @@ void MakeComputation(const ConvolutionModel &model,
ComputeTempMatrixSize(opts, computation);
}

void ConvolutionComputationIo::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, "<ConvComputationIo>");
WriteToken(os, binary, "<NumImages>");
WriteBasicType(os, binary, num_images);
WriteToken(os, binary, "<TInStartStepCount>");
WriteBasicType(os, binary, start_t_in);
WriteBasicType(os, binary, t_step_in);
WriteBasicType(os, binary, num_t_in);
WriteToken(os, binary, "<TOutStartStepCount>");
WriteBasicType(os, binary, start_t_out);
WriteBasicType(os, binary, t_step_out);
WriteBasicType(os, binary, num_t_out);
WriteToken(os, binary, "<ReorderTIn>");
WriteBasicType(os, binary, reorder_t_in);
WriteToken(os, binary, "</ConvComputationIo>");
}

void ConvolutionComputationIo::Read(std::istream &is, bool binary) {
ExpectOneOrTwoTokens(is, binary, "<ConvComputationIo>",
"<NumImages>");
ReadBasicType(is, binary, &num_images);
ExpectToken(is, binary, "<TInStartStepCount>");
ReadBasicType(is, binary, &start_t_in);
ReadBasicType(is, binary, &t_step_in);
ReadBasicType(is, binary, &num_t_in);
ExpectToken(is, binary, "<TOutStartStepCount>");
ReadBasicType(is, binary, &start_t_out);
ReadBasicType(is, binary, &t_step_out);
ReadBasicType(is, binary, &num_t_out);
ExpectToken(is, binary, "<ReorderTIn>");
ReadBasicType(is, binary, &reorder_t_in);
ExpectToken(is, binary, "</ConvComputationIo>");
}


} // namespace time_height_convolution
} // namespace nnet3
} // namespace kaldi
24 changes: 23 additions & 1 deletion src/nnet3/convolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,9 @@ struct ConvolutionComputationIo {
// a reshaping such that we can imagine that the input and output have the
// same 't' increment; it's useful in subsampling convolutions..
int32 reorder_t_in;

void Write(std::ostream &os, bool binary) const;
void Read(std::istream &is, bool binary);
};

/**
Expand Down Expand Up @@ -437,9 +440,15 @@ void CheckModelAndIo(const ConvolutionModel &model,
each Index (n,t,x) in 'output_indexes', the Index
(n,t+time_offset,x) must be present in 'input_indexes'
for each time_offset in model.required_time_offsets.
@param [in] opts Options class (currently has just the memory limit).
@param [out] computation If non-NULL, the compiled computation will be
written to this location.

@param [out] input_indexes_modified. This is like 'input_indexes', but
it will be sorted in the way we require and it may be
padded as needed with Indexes of the form (n, kNoTime, x).
@param [out] output_indexes_modified. This is like 'output_indexes', but
it will be sorted in the way we require and it may be
padded as needed with Indexes of the form (n, kNoTime, x).
*/
void CompileConvolutionComputation(
const ConvolutionModel &model,
Expand All @@ -451,6 +460,7 @@ void CompileConvolutionComputation(
std::vector<Index> *output_indexes_modified);



/**
\brief This does the forward computation of convolution. (note: this is
convolution without a bias term; you have to handle that separately).
Expand Down Expand Up @@ -583,6 +593,18 @@ void PadComputationInputTime(const ConvolutionModel &model,
ConvolutionComputationIo *io);


/*
This function pads the 'io' object; it's a special case that is used in
TimeConvolutionComponent. It makes sure that the t_step_in and t_step_out are
both one, and then it ensures that for each output frame, a left and right
context given by 'frames_left_context' and 'frames_right_context' are present.
*/
void PadComputationIoSpecial(int32 frames_left_context,
int32 frames_right_context,
ConvolutionComputationIo *io);



/**
This function takes a model that might require zero padding
in the height dimension and outputs a model accepting a
Expand Down
4 changes: 4 additions & 0 deletions src/nnet3/nnet-component-itf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ ComponentPrecomputedIndexes* ComponentPrecomputedIndexes::NewComponentPrecompute
ans = new BackpropTruncationComponentPrecomputedIndexes();
} else if (cpi_type == "TimeHeightConvolutionComponentPrecomputedIndexes") {
ans = new TimeHeightConvolutionComponent::PrecomputedIndexes();
} else if (cpi_type == "TimeConvolutionComponentPrecomputedIndexes") {
ans = new TimeConvolutionComponent::PrecomputedIndexes();
}
if (ans != NULL) {
KALDI_ASSERT(cpi_type == ans->Type());
Expand Down Expand Up @@ -159,6 +161,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans = new BatchNormComponent();
} else if (component_type == "TimeHeightConvolutionComponent") {
ans = new TimeHeightConvolutionComponent();
} else if (component_type == "TimeConvolutionComponent") {
ans = new TimeConvolutionComponent();
} else if (component_type == "SumBlockComponent") {
ans = new SumBlockComponent();
}
Expand Down
2 changes: 1 addition & 1 deletion src/nnet3/nnet-component-itf.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ class Component {
/// @param [in] input_index_set The set of indexes that is available at the
/// input of this Component.
/// @param [out] used_inputs If this is non-NULL and the output is
/// computable this will be set to the list of input indexes that will
/// computable, this will be set to the list of input indexes that will
/// actually be used in the computation.
/// @return Returns true iff this output is computable from the provided
/// inputs.
Expand Down
3 changes: 2 additions & 1 deletion src/nnet3/nnet-compute-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ void TestNnetDecodable(Nnet *nnet) {

if (!NnetIsRecurrent(*nnet) &&
nnet->Info().find("statistics-extraction") == std::string::npos &&
nnet->Info().find("TimeHeightConvolutionComponent") == std::string::npos) {
nnet->Info().find("TimeHeightConvolutionComponent") == std::string::npos &&
nnet->Info().find("TimeConvolutionComponent") == std::string::npos) {
// this equivalence will not hold for recurrent nnets, or those that
// have the statistics-extraction/statistics-pooling layers,
// or in general for nnets with convolution components (because these
Expand Down