kaldi-asr · danpovey · May 28, 2017 · Jun 22, 2017 · Jun 24, 2017 · Sep 11, 2017
diff --git a/src/nnet3/convolution.cc b/src/nnet3/convolution.cc
@@ -1096,6 +1096,51 @@ void PadComputationInputTime(const ConvolutionModel &model,
   }
 }
 
+
+// see comment in header for what this does.
+void PadComputationIoSpecial(int32 frames_left_context,
+                             int32 frames_right_context,
+                             ConvolutionComputationIo *io) {
+  KALDI_ASSERT(frames_left_context >= 0 && frames_right_context >= 0);
+
+  // fill in any gaps in the output, make it contiguous.
+  if (io->t_step_out == 0) {
+    KALDI_ASSERT(io->num_t_out == 1);
+    io->t_step_out = 1;
+  } else {
+    io->num_t_out *= io->t_step_out;
+    io->num_t_out -= (io->t_step_out - 1);
+    io->t_step_out = 1;
+  }
+  // fill in any gaps in the input, make it contiguous.
+  if (io->t_step_in == 0) {
+    KALDI_ASSERT(io->num_t_in == 1);
+    io->t_step_in = 1;
+  } else {
+    io->num_t_in *= io->t_step_in;
+    io->num_t_in -= (io->t_step_in - 1);
+    io->t_step_in = 1;
+  }
+  KALDI_ASSERT(io->start_t_in <= io->start_t_out);
+  // the following two statements use the fact that the t_steps are both 1.
+  int32 last_t_in = io->start_t_in + io->num_t_in - 1;
+  int32 last_t_out = io->start_t_out + io->num_t_out - 1;
+  KALDI_ASSERT(io->start_t_in <= io->start_t_out &&
+               last_t_in >= last_t_out);
+  int32 input_left_padding =
+      frames_left_context - (io->start_t_out - io->start_t_in),
+      input_right_padding =
+      frames_right_context - (last_t_in - last_t_out);
+  // the following assert is based on knowledge of how this function
+  // is called in practice.
+  KALDI_ASSERT(input_left_padding >= 0 && input_right_padding >= 0);
+  // the following two statements ensure that there is enough left and right
+  // context.
+  io->start_t_in -= input_left_padding;
+  io->num_t_in += (input_left_padding + input_right_padding);
+}
+
+
 // returns i rounded down to a multiple of n,
 // e.g. RoundDownToMultipleOf(3, 2) = 2,
 //      RoundDownToMultipleOf(-1, 3) = -3
@@ -1677,6 +1722,41 @@ void MakeComputation(const ConvolutionModel &model,
   ComputeTempMatrixSize(opts, computation);
 }
 
+void ConvolutionComputationIo::Write(std::ostream &os, bool binary) const {
+  WriteToken(os, binary, "<ConvComputationIo>");
+  WriteToken(os, binary, "<NumImages>");
+  WriteBasicType(os, binary, num_images);
+  WriteToken(os, binary, "<TInStartStepCount>");
+  WriteBasicType(os, binary, start_t_in);
+  WriteBasicType(os, binary, t_step_in);
+  WriteBasicType(os, binary, num_t_in);
+  WriteToken(os, binary, "<TOutStartStepCount>");
+  WriteBasicType(os, binary, start_t_out);
+  WriteBasicType(os, binary, t_step_out);
+  WriteBasicType(os, binary, num_t_out);
+  WriteToken(os, binary, "<ReorderTIn>");
+  WriteBasicType(os, binary, reorder_t_in);
+  WriteToken(os, binary, "</ConvComputationIo>");
+}
+
+void ConvolutionComputationIo::Read(std::istream &is, bool binary) {
+  ExpectOneOrTwoTokens(is, binary, "<ConvComputationIo>",
+                       "<NumImages>");
+  ReadBasicType(is, binary, &num_images);
+  ExpectToken(is, binary, "<TInStartStepCount>");
+  ReadBasicType(is, binary, &start_t_in);
+  ReadBasicType(is, binary, &t_step_in);
+  ReadBasicType(is, binary, &num_t_in);
+  ExpectToken(is, binary, "<TOutStartStepCount>");
+  ReadBasicType(is, binary, &start_t_out);
+  ReadBasicType(is, binary, &t_step_out);
+  ReadBasicType(is, binary, &num_t_out);
+  ExpectToken(is, binary, "<ReorderTIn>");
+  ReadBasicType(is, binary, &reorder_t_in);
+  ExpectToken(is, binary, "</ConvComputationIo>");
+}
+
+
 } // namespace time_height_convolution
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/nnet3/convolution.h b/src/nnet3/convolution.h
@@ -409,6 +409,9 @@ struct ConvolutionComputationIo {
   // a reshaping such that we can imagine that the input and output have the
   // same 't' increment; it's useful in subsampling convolutions..
   int32 reorder_t_in;
+
+  void Write(std::ostream &os, bool binary) const;
+  void Read(std::istream &is, bool binary);
 };
 
 /**
@@ -437,9 +440,15 @@ void CheckModelAndIo(const ConvolutionModel &model,
                       each Index (n,t,x) in 'output_indexes', the Index
                       (n,t+time_offset,x) must be present in 'input_indexes'
                       for each time_offset in model.required_time_offsets.
+   @param [in] opts  Options class (currently has just the memory limit).
    @param [out] computation  If non-NULL, the compiled computation will be
                       written to this location.
-
+   @param [out] input_indexes_modified.  This is like 'input_indexes', but
+                      it will be sorted in the way we require and it may be
+                      padded as needed with Indexes of the form (n, kNoTime, x).
+   @param [out] output_indexes_modified.  This is like 'output_indexes', but
+                      it will be sorted in the way we require and it may be
+                      padded as needed with Indexes of the form (n, kNoTime, x).
  */
 void CompileConvolutionComputation(
     const ConvolutionModel &model,
@@ -451,6 +460,7 @@ void CompileConvolutionComputation(
     std::vector<Index> *output_indexes_modified);
 
 
+
 /**
    \brief This does the forward computation of convolution.  (note: this is
          convolution without a bias term; you have to handle that separately).
@@ -583,6 +593,18 @@ void PadComputationInputTime(const ConvolutionModel &model,
                              ConvolutionComputationIo *io);
 
 
+/*
+  This function pads the 'io' object; it's a special case that is used in
+  TimeConvolutionComponent.  It makes sure that the t_step_in and t_step_out are
+  both one, and then it ensures that for each output frame, a left and right
+  context given by 'frames_left_context' and 'frames_right_context' are present.
+ */
+void PadComputationIoSpecial(int32 frames_left_context,
+                             int32 frames_right_context,
+                             ConvolutionComputationIo *io);
+
+
+
 /**
   This function takes a model that might require zero padding
   in the height dimension and outputs a model accepting a

diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc
@@ -61,6 +61,8 @@ ComponentPrecomputedIndexes* ComponentPrecomputedIndexes::NewComponentPrecompute
     ans = new BackpropTruncationComponentPrecomputedIndexes();
   } else if (cpi_type == "TimeHeightConvolutionComponentPrecomputedIndexes") {
     ans = new TimeHeightConvolutionComponent::PrecomputedIndexes();
+  } else if (cpi_type == "TimeConvolutionComponentPrecomputedIndexes") {
+    ans = new TimeConvolutionComponent::PrecomputedIndexes();
   }
   if (ans != NULL) {
     KALDI_ASSERT(cpi_type == ans->Type());
@@ -159,6 +161,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
     ans = new BatchNormComponent();
   } else if (component_type == "TimeHeightConvolutionComponent") {
     ans = new TimeHeightConvolutionComponent();
+  } else if (component_type == "TimeConvolutionComponent") {
+    ans = new TimeConvolutionComponent();
   } else if (component_type == "SumBlockComponent") {
     ans = new SumBlockComponent();
   }

diff --git a/src/nnet3/nnet-component-itf.h b/src/nnet3/nnet-component-itf.h
@@ -245,7 +245,7 @@ class Component {
   ///    @param [in] input_index_set  The set of indexes that is available at the
   ///              input of this Component.
   ///    @param [out] used_inputs If this is non-NULL and the output is
-  ///       computable this will be set to the list of input indexes that will
+  ///       computable, this will be set to the list of input indexes that will
   ///       actually be used in the computation.
   ///    @return Returns true iff this output is computable from the provided
   ///          inputs.

diff --git a/src/nnet3/nnet-compute-test.cc b/src/nnet3/nnet-compute-test.cc
@@ -127,7 +127,8 @@ void TestNnetDecodable(Nnet *nnet) {
 
   if (!NnetIsRecurrent(*nnet) &&
       nnet->Info().find("statistics-extraction") == std::string::npos &&
-      nnet->Info().find("TimeHeightConvolutionComponent") == std::string::npos) {
+      nnet->Info().find("TimeHeightConvolutionComponent") == std::string::npos &&
+      nnet->Info().find("TimeConvolutionComponent") == std::string::npos) {
     // this equivalence will not hold for recurrent nnets, or those that
     // have the statistics-extraction/statistics-pooling layers,
     // or in general for nnets with convolution components (because these