Skip to content
Permalink
Browse files

Untwine: calculate stats for attributes

Without stats we don't know valid ranges of input data, so let's get them calculated
when the data gets indexed by untwine.
  • Loading branch information
wonder-sk authored and nyalldawson committed Jan 4, 2021
1 parent d7d6f8b commit 6fed3b37832250958098365a5a21c64e327e39d4
@@ -29,6 +29,7 @@ void BuPyramid::run(const Options& options, ProgressWriter& progress)
{
m_b.inputDir = options.tempDir;
m_b.outputDir = options.outputDir;
m_b.stats = options.stats;

readBaseInfo();
getInputFiles();
@@ -164,7 +165,31 @@ void BuPyramid::writeInfo()
out << "\"type\": \"" << typeString(pdal::Dimension::base(fdi.type)) << "\", ";
if (fdi.name == "X" || fdi.name == "Y" || fdi.name == "Z")
out << "\"scale\": 0.01, \"offset\": 0, ";
out << "\"size\": " << pdal::Dimension::size(fdi.type) << " ";
out << "\"size\": " << pdal::Dimension::size(fdi.type);
const Stats *stats = m_manager.stats(fdi.name);
if (stats)
{
const Stats::EnumMap& v = stats->values();
out << ", ";
if (v.size())
{
out << "\"counts\": [ ";
for (auto ci = v.begin(); ci != v.end(); ++ci)
{
auto c = *ci;
if (ci != v.begin())
out << ", ";
out << "{\"value\": " << c.first << ", \"count\": " << c.second << "}";
}
out << "], ";
}
out << "\"count\": " << m_manager.totalPoints() << ", ";
out << "\"maximum\": " << stats->maximum() << ", ";
out << "\"minimum\": " << stats->minimum() << ", ";
out << "\"mean\": " << stats->average() << ", ";
out << "\"stddev\": " << stats->stddev() << ", ";
out << "\"variance\": " << stats->variance();
}
out << "}";
if (di + 1 != m_b.dimInfo.end())
out << ",";
@@ -35,6 +35,7 @@ struct BaseInfo
int maxLevel;
DimInfoList dimInfo;
pdal::SpatialReference srs;
bool stats;
};

} // namespace bu
@@ -124,7 +124,7 @@ std::cerr << m_vi.key() << " Accepted/Rejected/num points = " <<
**/

// If this is the final key, append any remaining file infos as accepted points and
// write the accepted points as binary.
// write the accepted points as compressed.
if (m_vi.key() == VoxelKey(0, 0, 0, 0))
{
appendRemainder(accepted);
@@ -136,7 +136,7 @@ std::cerr << m_vi.key() << " Accepted/Rejected/num points = " <<
}


bool Processor::acceptable(int /* pointId */, GridKey key)
bool Processor::acceptable(int pointId, GridKey key)
{
VoxelInfo::Grid& grid = m_vi.grid();

@@ -258,6 +258,9 @@ void Processor::writeCompressedOutput(Index& index)
std::sort(index.begin(), index.end());

IndexIter pos = index.begin();

// If any of our octants has points, we have to write the parent octant, whether or not
// it contains points, in order to create a full tree.
for (int octant = 0; octant < 8; ++octant)
if (m_vi[octant].hasPoints() || m_vi[octant].mustWrite())
{
@@ -267,26 +270,47 @@ void Processor::writeCompressedOutput(Index& index)
}


// o Octant we're writing.
// index Index of all rejected points that were rejected and not hoisted into the parent.
// pos Start position of this octant's point in the index.
// \return Position of the first point in the next octant of our index.
Processor::IndexIter
Processor::writeOctantCompressed(const OctantInfo& o, Index& index, IndexIter pos)
{
auto begin = pos;
pdal::PointTable table;
IndexedStats stats;

//ABELL - fixme
// For now we copy the dimension list so we're sure that it matches the layout, though
// there's no reason why it should change. We should modify things to use a single
// layout.
DimInfoList dims = m_b.dimInfo;
for (FileDimInfo& fdi : dims)
{
fdi.dim = table.layout()->registerOrAssignDim(fdi.name, fdi.type);
if (m_b.stats)
{
if (fdi.dim == pdal::Dimension::Id::Classification)
stats.push_back({fdi.dim, Stats(fdi.name, Stats::EnumType::Enumerate, false)});
else
stats.push_back({fdi.dim, Stats(fdi.name, Stats::EnumType::NoEnum, false)});
}
}
table.finalize();

pdal::PointViewPtr view(new pdal::PointView(table));

// The octant's points can came from one or more FileInfo. The points are sorted such
// all the points that come from a single FileInfo are consecutive.
auto fii = o.fileInfos().begin();
auto fiiEnd = o.fileInfos().end();
size_t count = 0;
if (fii != fiiEnd)
{
// We're trying to find the range of points that come from a single FileInfo.
// If pos is the end of the index of the the current file info, append the points
// to the view. Otherwise, advance the position.
while (true)
{
if (pos == index.end() || *pos >= fii->start() + fii->numPoints())
@@ -296,6 +320,9 @@ Processor::writeOctantCompressed(const OctantInfo& o, Index& index, IndexIter po
if (pos == index.end())
break;
begin = pos;

// Advance through file infos as long as we don't have points that
// correspond to it.
do
{
fii++;
@@ -309,14 +336,14 @@ Processor::writeOctantCompressed(const OctantInfo& o, Index& index, IndexIter po
flush:
try
{
flushCompressed(table, view, o);
flushCompressed(table, view, o, stats);
}
catch (pdal::pdal_error& err)
{
fatal(err.what());
}

m_manager.logOctant(o.key(), count);
m_manager.logOctant(o.key(), count, stats);
return pos;
}

@@ -340,25 +367,39 @@ void Processor::appendCompressed(pdal::PointViewPtr view, const DimInfoList& dim


void Processor::flushCompressed(pdal::PointTableRef table, pdal::PointViewPtr view,
const OctantInfo& oi)
const OctantInfo& oi, IndexedStats& stats)
{
using namespace pdal;

std::string filename = m_b.outputDir + "/ept-data/" + oi.key().toString() + ".laz";

if (m_b.stats)
{
for (PointId id = 0; id < view->size(); ++id)
{
for (auto& sp : stats)
{
Dimension::Id dim = sp.first;
Stats& s = sp.second;
s.insert(view->getFieldAs<double>(dim, id));
}
}
}

StageFactory factory;

BufferReader r;
r.addView(view);

Stage *prev = &r;

if (table.layout()->hasDim(Dimension::Id::GpsTime))
{
Stage *f = factory.createStage("filters.sort");
pdal::Options fopts;
fopts.add("dimension", "gpstime");
f->setOptions(fopts);
f->setInput(r);
f->setInput(*prev);
prev = f;
}

@@ -17,6 +17,7 @@

#include "BuTypes.hpp"
#include "PointAccessor.hpp"
#include "Stats.hpp"
#include "VoxelInfo.hpp"

namespace untwine
@@ -53,7 +54,7 @@ class Processor
void appendCompressed(pdal::PointViewPtr view, const DimInfoList& dims, const FileInfo& fi,
IndexIter begin, IndexIter end);
void flushCompressed(pdal::PointTableRef table, pdal::PointViewPtr view,
const OctantInfo& oi);
const OctantInfo& oi, IndexedStats& stats);

VoxelInfo m_vi;
const BaseInfo& m_b;
@@ -28,8 +28,6 @@ namespace untwine
namespace bu
{

//PyramidManager::PyramidManager(const BaseInfo& b) : m_b(b), m_pool(6), m_totalPoints(0)
//PyramidManager::PyramidManager(const BaseInfo& b) : m_b(b), m_pool(8), m_totalPoints(0)
PyramidManager::PyramidManager(const BaseInfo& b) : m_b(b), m_pool(10), m_totalPoints(0)
{}

@@ -136,10 +134,23 @@ OctantInfo PyramidManager::removeComplete(const VoxelKey& k)
}


void PyramidManager::logOctant(const VoxelKey& k, int cnt)
void PyramidManager::logOctant(const VoxelKey& k, int cnt, const IndexedStats& istats)
{
std::lock_guard<std::mutex> lock(m_mutex);

for (auto is : istats)
{
Stats& s = is.second;

auto it = m_stats.find(s.name());
if (it != m_stats.end())
{
Stats& cur = it->second;
cur.merge(s);
}
else
m_stats.insert({s.name(), s});
}
m_written.insert({k, cnt});
m_totalPoints += cnt;
}
@@ -229,5 +240,14 @@ std::deque<VoxelKey> PyramidManager::emit(const VoxelKey& p, int stopLevel, Entr
return roots;
}


Stats *PyramidManager::stats(const std::string& name)
{
auto si = m_stats.find(name);
if (si == m_stats.end())
return nullptr;
return &si->second;
}

} // namespace bu
} // namespace untwine
@@ -19,6 +19,7 @@

#include "BuTypes.hpp"
#include "OctantInfo.hpp"
#include "Stats.hpp"
#include "../untwine/ThreadPool.hpp"

namespace untwine
@@ -42,9 +43,10 @@ class PyramidManager
void setProgress(ProgressWriter *progress);
void queue(const OctantInfo& o);
void run();
void logOctant(const VoxelKey& k, int cnt);
void logOctant(const VoxelKey& k, int cnt, const IndexedStats& istats);
uint64_t totalPoints() const
{ return m_totalPoints; }
Stats *stats(const std::string& name);

private:
const int LevelBreak = 4;
@@ -56,6 +58,7 @@ class PyramidManager
std::queue<OctantInfo> m_queue;
ThreadPool m_pool;
uint64_t m_totalPoints;
std::map<std::string, Stats> m_stats;
ProgressWriter *m_progress;
//
std::unordered_map<VoxelKey, int> m_written;
@@ -0,0 +1,67 @@

#include "Stats.hpp"

#include <cmath>

namespace untwine
{

void Stats::computeGlobalStats()
{
auto compute_median = [](std::vector<double> vals)
{
std::nth_element(vals.begin(), vals.begin() + vals.size() / 2, vals.end());
return *(vals.begin() + vals.size() / 2);
};

// TODO add quantiles
m_median = compute_median(m_data);
std::transform(m_data.begin(), m_data.end(), m_data.begin(),
[this](double v) { return std::fabs(v - this->m_median); });
m_mad = compute_median(m_data);
}

// Math comes from https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
// (Pebay paper from Sandia labs, 2008)
bool Stats::merge(const Stats& s)
{
if ((m_name != s.m_name) || (m_enumerate != s.m_enumerate) || (m_advanced != s.m_advanced))
return false;

double n1 = m_cnt;
double n2 = s.m_cnt;
double n = n1 + n2;
double nsq = n * n;
double n1n2 = m_cnt * s.m_cnt;
double n1sq = n1 * n1;
double n2sq = n2 * n2;
double ncube = n * n * n;
double deltaMean = s.M1 - M1;

if (n == 0)
return true;

double m1 = M1 + s.m_cnt * deltaMean / n;
double m2 = M2 + s.M2 + n1n2 * std::pow(deltaMean, 2) / n;
double m3 = M3 + s.M3 + n1n2 * (n1 - n2) * std::pow(deltaMean, 3) / nsq +
3 * (n1 * s.M2 - n2 * M2) * deltaMean / n;
double m4 = M4 + s.M4 +
n1n2 * (n1sq - n1n2 + n2sq) * std::pow(deltaMean, 4) / ncube +
6 * (n1sq * s.M2 + n2sq * M2) * std::pow(deltaMean, 2) / nsq +
4 * (n1 * s.M3 - n2 * M3) * deltaMean / n;

M1 = m1;
M2 = m2;
M3 = m3;
M4 = m4;
m_min = (std::min)(m_min, s.m_min);
m_max = (std::max)(m_max, s.m_max);
m_cnt = s.m_cnt + m_cnt;
m_data.insert(m_data.begin(), s.m_data.begin(), s.m_data.end());
for (auto p : s.m_values)
m_values[p.first] += p.second;

return true;
}

} // namespace untwine

0 comments on commit 6fed3b3

Please sign in to comment.
You can’t perform that action at this time.