Skip to content

Commit

Permalink
Replace remaining GenericVector by std::vector in src/lstm
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Mar 16, 2021
1 parent c676d5b commit 2a3682a
Show file tree
Hide file tree
Showing 18 changed files with 111 additions and 120 deletions.
21 changes: 10 additions & 11 deletions src/ccutil/genericheap.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
// File: genericheap.h
// Description: Template heap class.
// Author: Ray Smith, based on Dan Johnson's original code.
// Created: Wed Mar 14 08:13:00 PDT 2012
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -38,7 +37,7 @@ namespace tesseract {
// GenericHeap doesn't look inside it except for operator<.
//
// The heap is stored as a packed binary tree in an array hosted by a
// GenericVector<Pair>, with the invariant that the children of each node are
// vector<Pair>, with the invariant that the children of each node are
// both NOT Pair::operator< the parent node. KDPairInc defines Pair::operator<
// to use Key::operator< to generate a MIN heap and KDPairDec defines
// Pair::operator< to use Key::operator> to generate a MAX heap by reversing
Expand All @@ -59,7 +58,7 @@ template <typename Pair>
class GenericHeap {
public:
GenericHeap() = default;
// The initial size is only a GenericVector::reserve. It is not enforced as
// The initial size is only a vector::reserve. It is not enforced as
// the size limit of the heap. Caller must implement their own enforcement.
explicit GenericHeap(int initial_size) {
heap_.reserve(initial_size);
Expand All @@ -77,12 +76,12 @@ class GenericHeap {
}
void clear() {
// Clear truncates to 0 to keep the number reserved in tact.
heap_.truncate(0);
heap_.clear();
}
// Provides access to the underlying vector.
// Caution! any changes that modify the keys will invalidate the heap!
GenericVector<Pair> *heap() {
return &heap_;
std::vector<Pair> &heap() {
return heap_;
}
// Provides read-only access to an element of the underlying vector.
const Pair &get(int index) const {
Expand Down Expand Up @@ -128,11 +127,11 @@ class GenericHeap {
// Sift the hole at the start of the heap_ downwards to match the last
// element.
Pair hole_pair = heap_[new_size];
heap_.truncate(new_size);
heap_.resize(new_size);
int hole_index = SiftDown(0, hole_pair);
heap_[hole_index] = hole_pair;
} else {
heap_.truncate(new_size);
heap_.resize(new_size);
}
return true;
}
Expand All @@ -154,7 +153,7 @@ class GenericHeap {
int hole_index = SiftUp(worst_index, hole_pair);
heap_[hole_index] = hole_pair;
}
heap_.truncate(heap_size);
heap_.resize(heap_size);
return true;
}

Expand All @@ -179,7 +178,7 @@ class GenericHeap {
// The pointed-to Pair has changed its key value, so the location of pair
// is reshuffled to maintain the heap invariant.
// Must be a valid pointer to an element of the heap_!
// Caution! Since GenericHeap is based on GenericVector, reallocs may occur
// Caution! Since GenericHeap is based on vector, reallocs may occur
// whenever the vector is extended and elements may get shuffled by any
// Push or Pop operation. Therefore use this function only if Data in Pair is
// of type DoublePtr, derived (first) from DoublePtr, or has a DoublePtr as
Expand Down Expand Up @@ -235,7 +234,7 @@ class GenericHeap {
}

private:
GenericVector<Pair> heap_;
std::vector<Pair> heap_;
};

} // namespace tesseract
Expand Down
6 changes: 6 additions & 0 deletions src/ccutil/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,19 @@
#include <cmath> // std::isfinite
#include <cstdio>
#include <cstring>
#include <algorithm> // for std::find
#include <functional>
#include <random>
#include <string>
#include <vector>

namespace tesseract {

template <class T>
inline bool contains(const std::vector<T> &data, const T &value) {
return std::find(data.begin(), data.end(), value) != data.end();
}

inline const std::vector<std::string> split(const std::string &s, char c) {
std::string buff;
std::vector<std::string> v;
Expand Down
11 changes: 5 additions & 6 deletions src/ccutil/unicharcompress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,7 @@ void UnicharCompress::DefragmentCodeValues(int encoded_null) {
// all codes are used. Likewise with the Han encoding, it is possible that not
// all numbers of strokes are used.
ComputeCodeRange();
GenericVector<int> offsets;
offsets.init_to_size(code_range_, 0);
std::vector<int> offsets(code_range_);
// Find which codes are used
for (int c = 0; c < encoder_.size(); ++c) {
const RecodedCharID &code = encoder_[c];
Expand Down Expand Up @@ -390,26 +389,26 @@ void UnicharCompress::SetupDecoder() {
prefix.Truncate(len);
auto final_it = final_codes_.find(prefix);
if (final_it == final_codes_.end()) {
auto *code_list = new GenericVector<int>;
auto *code_list = new std::vector<int>;
code_list->push_back(code(len));
final_codes_[prefix] = code_list;
while (--len >= 0) {
prefix.Truncate(len);
auto next_it = next_codes_.find(prefix);
if (next_it == next_codes_.end()) {
auto *code_list = new GenericVector<int>;
auto *code_list = new std::vector<int>;
code_list->push_back(code(len));
next_codes_[prefix] = code_list;
} else {
// We still have to search the list as we may get here via multiple
// lengths of code.
if (!next_it->second->contains(code(len)))
if (!contains(*next_it->second, code(len)))
next_it->second->push_back(code(len));
break; // This prefix has been processed.
}
}
} else {
if (!final_it->second->contains(code(len)))
if (!contains(*final_it->second, code(len)))
final_it->second->push_back(code(len));
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/ccutil/unicharcompress.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_

#include <unordered_map>
#include "genericvector.h" // GenericVector
#include <vector>
#include "serialis.h"
#include "unicharset.h"

Expand Down Expand Up @@ -178,13 +178,13 @@ class TESS_API UnicharCompress {
}
// Returns a list of valid non-final next codes for a given prefix code,
// which may be empty.
const GenericVector<int> *GetNextCodes(const RecodedCharID &code) const {
const std::vector<int> *GetNextCodes(const RecodedCharID &code) const {
auto it = next_codes_.find(code);
return it == next_codes_.end() ? nullptr : it->second;
}
// Returns a list of valid final codes for a given prefix code, which may
// be empty.
const GenericVector<int> *GetFinalCodes(const RecodedCharID &code) const {
const std::vector<int> *GetFinalCodes(const RecodedCharID &code) const {
auto it = final_codes_.find(code);
return it == final_codes_.end() ? nullptr : it->second;
}
Expand Down Expand Up @@ -225,14 +225,14 @@ class TESS_API UnicharCompress {
// Decoder converts the output of encoder back to a unichar-id.
std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash> decoder_;
// True if the index is a valid single or start code.
GenericVector<bool> is_valid_start_;
std::vector<bool> is_valid_start_;
// Maps a prefix code to a list of valid next codes.
// The map owns the vectors.
std::unordered_map<RecodedCharID, GenericVector<int> *, RecodedCharID::RecodedCharIDHash>
std::unordered_map<RecodedCharID, std::vector<int> *, RecodedCharID::RecodedCharIDHash>
next_codes_;
// Maps a prefix code to a list of valid final codes.
// The map owns the vectors.
std::unordered_map<RecodedCharID, GenericVector<int> *, RecodedCharID::RecodedCharIDHash>
std::unordered_map<RecodedCharID, std::vector<int> *, RecodedCharID::RecodedCharIDHash>
final_codes_;
// Max of any value in encoder_ + 1.
int code_range_;
Expand Down
13 changes: 5 additions & 8 deletions src/lstm/fullyconnected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,8 @@ void FullyConnected::Forward(bool debug, const NetworkIO &input,
else
output->Resize(input, no_);
SetupForward(input, input_transpose);
GenericVector<NetworkScratch::FloatVec> temp_lines;
temp_lines.init_to_size(kNumThreads, NetworkScratch::FloatVec());
GenericVector<NetworkScratch::FloatVec> curr_input;
curr_input.init_to_size(kNumThreads, NetworkScratch::FloatVec());
std::vector<NetworkScratch::FloatVec> temp_lines(kNumThreads);
std::vector<NetworkScratch::FloatVec> curr_input(kNumThreads);
int ro = no_;
if (IntSimdMatrix::intSimdMatrix)
ro = IntSimdMatrix::intSimdMatrix->RoundOutputs(ro);
Expand Down Expand Up @@ -233,13 +231,12 @@ bool FullyConnected::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkSc
DisplayBackward(fwd_deltas);
#endif
back_deltas->Resize(fwd_deltas, ni_);
GenericVector<NetworkScratch::FloatVec> errors;
errors.init_to_size(kNumThreads, NetworkScratch::FloatVec());
std::vector<NetworkScratch::FloatVec> errors(kNumThreads);
for (int i = 0; i < kNumThreads; ++i)
errors[i].Init(no_, scratch);
GenericVector<NetworkScratch::FloatVec> temp_backprops;
std::vector<NetworkScratch::FloatVec> temp_backprops;
if (needs_to_backprop_) {
temp_backprops.init_to_size(kNumThreads, NetworkScratch::FloatVec());
temp_backprops.resize(kNumThreads);
for (int i = 0; i < kNumThreads; ++i)
temp_backprops[i].Init(ni_, scratch);
}
Expand Down
12 changes: 6 additions & 6 deletions src/lstm/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,10 +297,10 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
// for the other dimension, used only when working in true 2D mode. The width
// is enough to hold an entire strip of the major direction.
int buf_width = Is2D() ? input_map_.Size(FD_WIDTH) : 1;
GenericVector<NetworkScratch::FloatVec> states, outputs;
std::vector<NetworkScratch::FloatVec> states, outputs;
if (Is2D()) {
states.init_to_size(buf_width, NetworkScratch::FloatVec());
outputs.init_to_size(buf_width, NetworkScratch::FloatVec());
states.resize(buf_width);
outputs.resize(buf_width);
for (int i = 0; i < buf_width; ++i) {
states[i].Init(ns_, scratch);
ZeroVector<double>(ns_, states[i]);
Expand Down Expand Up @@ -494,10 +494,10 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
// Rotating buffers of width buf_width allow storage of the recurrent time-
// steps used only for true 2-D. Stores one full strip of the major direction.
int buf_width = Is2D() ? input_map_.Size(FD_WIDTH) : 1;
GenericVector<NetworkScratch::FloatVec> stateerr, sourceerr;
std::vector<NetworkScratch::FloatVec> stateerr, sourceerr;
if (Is2D()) {
stateerr.init_to_size(buf_width, NetworkScratch::FloatVec());
sourceerr.init_to_size(buf_width, NetworkScratch::FloatVec());
stateerr.resize(buf_width);
sourceerr.resize(buf_width);
for (int t = 0; t < buf_width; ++t) {
stateerr[t].Init(ns_, scratch);
sourceerr[t].Init(na_, scratch);
Expand Down
5 changes: 2 additions & 3 deletions src/lstm/networkio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// File: networkio.cpp
// Description: Network input/output data, allowing float/int implementations.
// Author: Ray Smith
// Created: Thu Jun 19 13:01:31 PST 2014
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -507,7 +506,7 @@ int NetworkIO::BestLabel(int t, int not_this, int not_that, float *score) const

// Returns the best start position out of [start, end) (into which all labels
// must fit) to obtain the highest cumulative score for the given labels.
int NetworkIO::PositionOfBestMatch(const GenericVector<int> &labels, int start, int end) const {
int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const {
int length = labels.size();
int last_start = end - length;
int best_start = -1;
Expand All @@ -524,7 +523,7 @@ int NetworkIO::PositionOfBestMatch(const GenericVector<int> &labels, int start,

// Returns the cumulative score of the given labels starting at start, and
// using one label per time-step.
double NetworkIO::ScoreOfLabels(const GenericVector<int> &labels, int start) const {
double NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const {
int length = labels.size();
double score = 0.0;
for (int i = 0; i < length; ++i) {
Expand Down
5 changes: 2 additions & 3 deletions src/lstm/networkio.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include <cstdio>
#include <vector>

#include "genericvector.h"
#include "helpers.h"
#include "static_shape.h"
#include "stridemap.h"
Expand Down Expand Up @@ -169,10 +168,10 @@ class TESS_API NetworkIO {
int BestLabel(int t, int not_this, int not_that, float *score) const;
// Returns the best start position out of range (into which both start and end
// must fit) to obtain the highest cumulative score for the given labels.
int PositionOfBestMatch(const GenericVector<int> &labels, int start, int end) const;
int PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const;
// Returns the cumulative score of the given labels starting at start, and
// using one label per time-step.
double ScoreOfLabels(const GenericVector<int> &labels, int start) const;
double ScoreOfLabels(const std::vector<int> &labels, int start) const;
// Helper function sets all the outputs for a single timestep, such that
// label has value ok_score, and the other labels share 1 - ok_score.
// Assumes float mode.
Expand Down
21 changes: 8 additions & 13 deletions src/lstm/networkscratch.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@
#define TESSERACT_LSTM_NETWORKSCRATCH_H_

#include <mutex>
#include "genericvector.h"
#include "matrix.h"
#include "networkio.h"

namespace tesseract {

// Generic scratch space for network layers. Provides NetworkIO that can store
// a complete set (over time) of intermediates, and GenericVector<float>
// a complete set (over time) of intermediates, and vector<float>
// scratch space that auto-frees after use. The aim here is to provide a set
// of temporary buffers to network layers that can be reused between layers
// and don't have to be reallocated on each call.
Expand Down Expand Up @@ -125,7 +124,7 @@ class NetworkScratch {
}; // class IO.

// Class that acts like a fixed array of float, yet actually uses space
// from a GenericVector<float> in the source NetworkScratch, and knows how
// from a vector<float> in the source NetworkScratch, and knows how
// to unstack the borrowed vector on destruction.
class FloatVec {
public:
Expand All @@ -145,12 +144,8 @@ class NetworkScratch {
scratch_space_->vec_stack_.Return(vec_);
scratch_space_ = scratch;
vec_ = scratch_space_->vec_stack_.Borrow();
// Abuse vec_ here; first resize to 'reserve', which is larger
// than 'size' (i.e. it's size rounded up) then resize down again
// to the desired size. This assumes that the implementation does
// not shrink the storage on a resize.
vec_->resize_no_init(reserve);
vec_->resize_no_init(size);
vec_->reserve(reserve);
vec_->resize(size);
data_ = &(*vec_)[0];
}

Expand All @@ -169,7 +164,7 @@ class NetworkScratch {

private:
// Vector borrowed from the scratch space. Use Return to free it.
GenericVector<double> *vec_;
std::vector<double> *vec_;
// Short-cut pointer to the underlying array.
double *data_;
// The source scratch_space_. Borrowed pointer, used to free the
Expand Down Expand Up @@ -251,19 +246,19 @@ class NetworkScratch {

private:
PointerVector<T> stack_;
GenericVector<bool> flags_;
std::vector<bool> flags_;
int stack_top_;
std::mutex mutex_;
}; // class Stack.

private:
// If true, the network weights are int8_t, if false, float.
bool int_mode_;
// Stacks of NetworkIO and GenericVector<float>. Once allocated, they are not
// Stacks of NetworkIO and vector<float>. Once allocated, they are not
// deleted until the NetworkScratch is deleted.
Stack<NetworkIO> int_stack_;
Stack<NetworkIO> float_stack_;
Stack<GenericVector<double>> vec_stack_;
Stack<std::vector<double>> vec_stack_;
Stack<TransposedArray> array_stack_;
};

Expand Down
8 changes: 3 additions & 5 deletions src/lstm/parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ void Parallel::Forward(bool debug, const NetworkIO &input, const TransposedArray
int stack_size = stack_.size();
if (type_ == NT_PAR_2D_LSTM) {
// Special case, run parallel in parallel.
GenericVector<NetworkScratch::IO> results;
results.init_to_size(stack_size, NetworkScratch::IO());
std::vector<NetworkScratch::IO> results(stack_size);
for (int i = 0; i < stack_size; ++i) {
results[i].Resize(input, stack_[i]->NumOutputs(), scratch);
}
Expand Down Expand Up @@ -124,9 +123,8 @@ bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch
int stack_size = stack_.size();
if (type_ == NT_PAR_2D_LSTM) {
// Special case, run parallel in parallel.
GenericVector<NetworkScratch::IO> in_deltas, out_deltas;
in_deltas.init_to_size(stack_size, NetworkScratch::IO());
out_deltas.init_to_size(stack_size, NetworkScratch::IO());
std::vector<NetworkScratch::IO> in_deltas(stack_size);
std::vector<NetworkScratch::IO> out_deltas(stack_size);
// Split the forward deltas for each stack element.
int feature_offset = 0;
for (int i = 0; i < stack_.size(); ++i) {
Expand Down
Loading

0 comments on commit 2a3682a

Please sign in to comment.