Skip to content

Commit

Permalink
Add pointcloud statistics, and constant-fold pointcloud_search calls …
Browse files Browse the repository at this point in the history
…when the

position is a constant if the search returns few enough results, by doing the
query at optimization times and putting the results into new constant arrays.
  • Loading branch information
lgritz committed Apr 16, 2012
1 parent 56e1a3a commit f25d252
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 14 deletions.
10 changes: 6 additions & 4 deletions src/liboslexec/llvm_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -960,13 +960,15 @@ RuntimeOptimizer::llvm_assign_impl (Symbol &Result, Symbol &Src,
return true;
}

// Copying of entire arrays
// Copying of entire arrays. It's ok if the array lengths don't match,
// it will only copy up to the length of the smaller one. The compiler
// will ensure they are the same size, except for certain cases where
// the size difference is intended (by the optimizer).
if (result_t.is_array() && src_t.is_array() && arrayindex == -1) {
ASSERT (assignable(result_t.elementtype(), src_t.elementtype()) &&
result_t.arraylength() == src_t.arraylength());
ASSERT (assignable(result_t.elementtype(), src_t.elementtype()));
llvm::Value *resultptr = llvm_void_ptr (Result);
llvm::Value *srcptr = llvm_void_ptr (Src);
int len = Result.size();
int len = std::min (Result.size(), Src.size());
int align = result_t.is_closure_based() ? (int)sizeof(void*) :
(int)result_t.simpletype().basesize();
if (Result.has_derivs() && Src.has_derivs()) {
Expand Down
21 changes: 19 additions & 2 deletions src/liboslexec/opcloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ osl_pointcloud_search (ShaderGlobals *sg, const char *filename, void *center, fl
int max_points, int sort, void *out_indices, void *out_distances, int derivs_offset,
int nattrs, ...)
{
size_t *indices = (size_t *)alloca (sizeof(size_t) * max_points);
// RS::pointcloud_search takes size_t index array (because of the
// presumed use of Partio underneath), but OSL only has int, so we
// have to allocate and copy out. But, on architectures where int
// and size_t are the same, we can take a shortcut and let
// pointcloud_search fill in the array in place (assuming it's
// passed in the first place).
size_t *indices;
if (sizeof(int) == sizeof(size_t) && out_indices)
indices = (size_t *)out_indices;
else
indices = (size_t *)alloca (sizeof(size_t) * max_points);

int count = sg->context->renderer()->pointcloud_search (sg, USTR(filename),
*((Vec3 *)center), radius, max_points, sort,
Expand All @@ -52,9 +62,13 @@ osl_pointcloud_search (ShaderGlobals *sg, const char *filename, void *center, fl
}
va_end (args);

if (out_indices)
// Only copy out if we need to
if (out_indices && sizeof(int) != sizeof(size_t))
for(int i = 0; i < count; ++i)
((int *)out_indices)[i] = indices[i];

sg->context->shadingsys().pointcloud_stats (1, 0, count);

return count;
}

Expand All @@ -67,7 +81,10 @@ osl_pointcloud_get (ShaderGlobals *sg, const char *filename, void *in_indices, i
for(int i = 0; i < count; ++i)
indices[i] = ((int *)in_indices)[i];

sg->context->shadingsys().pointcloud_stats (0, 1, 0);

return sg->context->renderer()->pointcloud_get (USTR(filename), (size_t *)indices, count, USTR(attr_name),
TYPEDESC(attr_type), out_data);

}

8 changes: 8 additions & 0 deletions src/liboslexec/oslexec_pvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,8 @@ class ShadingSystemImpl : public ShadingSystem
return NULL;
}

void pointcloud_stats (int search, int get, int results);

private:
void printstats () const;

Expand Down Expand Up @@ -906,6 +908,12 @@ class ShadingSystemImpl : public ShadingSystem
double m_stat_getattribute_time; ///< Stat: time spent in getattribute
double m_stat_getattribute_fail_time; ///< Stat: time spent in getattribute
atomic_ll m_stat_getattribute_calls; ///< Stat: Number of getattribute
long long m_stat_pointcloud_searches;
long long m_stat_pointcloud_searches_total_results;
int m_stat_pointcloud_max_results;
int m_stat_pointcloud_failures;
long long m_stat_pointcloud_gets;

int m_stat_max_llvm_local_mem; ///< Stat: max LLVM local mem
PeakCounter<off_t> m_stat_memory; ///< Stat: all shading system memory

Expand Down
161 changes: 157 additions & 4 deletions src/liboslexec/runtimeoptimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ using OIIO::spin_lock;
using OIIO::Timer;
#endif


// Maximum number of new constant symbols that a constant-folding function
// is able to add.
static const int max_new_consts_per_fold = 10;


/// Wrapper that erases elements of c for which predicate p is true.
/// (Unlike std::remove_if, it resizes the container so that it contains
/// ONLY elements for which the predicate is true.)
Expand Down Expand Up @@ -2027,6 +2033,153 @@ DECLFOLDER(constfold_texture)



DECLFOLDER(constfold_pointcloud_search)
{
Opcode &op (rop.inst()->ops()[opnum]);
DASSERT (op.nargs() >= 5);
int result_sym = rop.oparg (op, 0);
Symbol& Filename = *rop.opargsym (op, 1);
Symbol& Center = *rop.opargsym (op, 2);
Symbol& Radius = *rop.opargsym (op, 3);
Symbol& Max_points = *rop.opargsym (op, 4);
DASSERT (Filename.typespec().is_string() &&
Center.typespec().is_triple() && Radius.typespec().is_float() &&
Max_points.typespec().is_int());

// Can't constant fold unless all the required input args are constant
if (! (Filename.is_constant() && Center.is_constant() &&
Radius.is_constant() && Max_points.is_constant()))
return 0;

// Handle the optional 'sort' flag, and don't bother constant folding
// if sorted results may be required.
int attr_arg_offset = 5; // where the opt attrs begin
if (op.nargs() > 5 && rop.opargsym(op,5)->typespec().is_int()) {
// Sorting requested
Symbol *Sort = rop.opargsym(op,5);
if (! Sort->is_constant() || *(int *)Sort->data())
return 0; // forget it if sorted data might be requested
++attr_arg_offset;
}
int nattrs = (op.nargs() - attr_arg_offset) / 2;

// First pass through the optional arguments: gather the query names,
// types, and destinations. If any of the query names are not known
// constants, we can't optimize this call so just return.
std::vector<ustring> names;
std::vector<int> value_args;
std::vector<TypeDesc> value_types;
for (int i = 0, num_queries = 0; i < nattrs; ++i) {
Symbol& Name = *rop.opargsym (op, attr_arg_offset + i*2);
Symbol& Value = *rop.opargsym (op, attr_arg_offset + i*2 + 1);
ASSERT (Name.typespec().is_string());
if (!Name.is_constant())
return 0; // unknown optional argument, punt
if (++num_queries > max_new_consts_per_fold)
return 0;
names.push_back (*(ustring *)Name.data());
value_args.push_back (rop.oparg (op, attr_arg_offset + i*2 + 1));
value_types.push_back (Value.typespec().simpletype());
}

// We're doing a fixed query, so instead of running at every shade,
// perform the search now.
const int maxconst = 256; // Max number of points to consider a constant
size_t indices[maxconst+1]; // Make room for one more!
float distances[maxconst+1];
int maxpoints = std::min (maxconst+1, *(int *)Max_points.data());
ustring filename = *(ustring *)Filename.data();
int count = 0;
if (! filename.empty()) {
count = rop.renderer()->pointcloud_search (NULL, filename,
*(Vec3 *)Center.data(), *(float *)Radius.data(),
maxpoints, false, indices, distances, 0);
rop.shadingsys().pointcloud_stats (1, 0, count);
}

// If it returns few enough results (256 points or less), just fold
// those results into constant arrays. If more than that, let the
// query happen at runtime to avoid tying up a bunch of memory.
if (count > maxconst)
return 0;

// If the query returned no matching points, just turn the whole
// pointcloud_search call into an assignment of 0 to the 'result'.
if (count < 1) {
rop.turn_into_assign (op, rop.add_constant (TypeDesc::TypeInt, &count),
"Folded constant pointcloud_search lookup");
return 1;
}

// From here on out, we are able to fold the query (it returned
// results, but not too many). Start by removing the original
// pointcloud_search call itself from the shader code.
rop.turn_into_nop (op, "Folded constant pointcloud_search lookup");

// Now, for each optional individual query, do a pointcloud_get NOW
// to retrieve it, create a constant array for the shader to hold
// those results, and add to the shader an array copy to move it
// from the constant into the place the shader wanted the query
// results to go. (This assignment can be further optimized later
// on as well, depending on how it's used.) If any of the individual
// queries fail now, we will return a failed result in the end.
std::vector<char> tmp; // temporary data
for (int i = 0; i < nattrs; ++i) {
// We had stashed names, data types, and destinations earlier.
// Retrieve them now to build a query.
if (! names[i])
continue;
void *const_data = NULL;
TypeDesc const_valtype = value_types[i];
const_valtype.arraylen = count;
tmp.clear ();
tmp.resize (const_valtype.size(), 0);
const_data = &tmp[0];
if (names[i] == "index") {
// "index" is a special case -- it's retrieving the hit point
// indices, not data on those hit points.
//
// Because the presumed Partio underneath passes indices as
// size_t, but OSL only allows int parameters, we need to
// copy. But just cast if size_t and int are the same size.
if (sizeof(size_t) == sizeof(int)) {
const_data = indices;
} else {
int *int_indices = (int *)const_data;
for (int i = 0; i < count; ++i)
int_indices[i] = (int) indices[i];
}
} else {
// Named queries.
bool ok = rop.renderer()->pointcloud_get (filename, indices, count,
names[i], const_valtype, const_data);
rop.shadingsys().pointcloud_stats (0, 1, 0);
if (! ok) {
count = 0; // Make it look like an error in the end
break;
}
}
// Now make a constant array for those results we just retrieved...
int const_array_sym = rop.add_constant (const_valtype, const_data);
// ... and add an instruction to copy the constant into the
// original destination for the query.
std::vector<int> args_to_add;
args_to_add.push_back (value_args[i]);
args_to_add.push_back (const_array_sym);
rop.insert_code (opnum, u_assign, args_to_add, true);
}

// Query results all copied. The only thing left to do is to assign
// status (query result count) to the original "result".
std::vector<int> args_to_add;
args_to_add.push_back (result_sym);
args_to_add.push_back (rop.add_constant (TypeDesc::TypeInt, &count));
rop.insert_code (opnum, u_assign, args_to_add, true);

return 1;
}



DECLFOLDER(constfold_functioncall)
{
Expand Down Expand Up @@ -2914,10 +3067,10 @@ RuntimeOptimizer::optimize_instance ()
if (is_simple_assign(op))
simple_sym_assign (oparg (op, 0), opnum);

// Make sure there's room for at least one more symbol, so that
// we can add a const if we need to, without worrying about the
// addresses of symbols changing when we add a new one below.
make_symbol_room (1);
// Make sure there's room for several more symbols, so that we
// can add a few consts if we need to, without worrying about
// the addresses of symbols changing when we add a new one below.
make_symbol_room (max_new_consts_per_fold);

// For various ops that we know how to effectively
// constant-fold, dispatch to the appropriate routine.
Expand Down
5 changes: 3 additions & 2 deletions src/liboslexec/runtimeoptimize.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class RuntimeOptimizer {

TextureSystem *texturesys () const { return shadingsys().texturesys(); }

RendererServices *renderer () const { return shadingsys().renderer(); }

/// Are we in debugging mode?
int debug() const { return m_debug; }

Expand All @@ -97,8 +99,7 @@ class RuntimeOptimizer {

/// Search for a constant whose type and value match type and data[...],
/// returning its index if one exists, or else creating a new constant
/// and returning its index. If copy is true, allocate new space and
/// copy the data if no matching constant was found.
/// and returning its index.
int add_constant (const TypeSpec &type, const void *data);

/// Turn the op into a simple assignment of the new symbol index to the
Expand Down
38 changes: 36 additions & 2 deletions src/liboslexec/shadingsys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,12 @@ ShadingSystemImpl::ShadingSystemImpl (RendererServices *renderer,
m_stat_getattribute_time = 0;
m_stat_getattribute_fail_time = 0;
m_stat_getattribute_calls = 0;
m_stat_pointcloud_searches = 0;
m_stat_pointcloud_searches_total_results = 0;
m_stat_pointcloud_max_results = 0;
m_stat_pointcloud_failures = 0;
m_stat_pointcloud_gets = 0;

m_groups_to_compile_count = 0;
m_threads_currently_compiling = 0;

Expand Down Expand Up @@ -381,7 +387,7 @@ ShadingSystemImpl::setup_op_descriptors ()
OP (or, andor, or, true);
OP (pnoise, noise, none, true);
OP (point, construct_triple, triple, true);
OP (pointcloud_search, pointcloud_search, none, false);
OP (pointcloud_search, pointcloud_search, pointcloud_search, false);
OP (pointcloud_get, pointcloud_get, none, false);
OP (pow, generic, pow, true);
OP (printf, printf, none, false);
Expand Down Expand Up @@ -612,11 +618,16 @@ ShadingSystemImpl::getattribute (const std::string &name, TypeDesc type,
ATTR_DECODE ("stat:llvm_irgen_time", float, m_stat_llvm_irgen_time);
ATTR_DECODE ("stat:llvm_opt_time", float, m_stat_llvm_opt_time);
ATTR_DECODE ("stat:llvm_jit_time", float, m_stat_llvm_jit_time);
ATTR_DECODE ("stat:getattribute_calls", long long, m_stat_getattribute_calls);
ATTR_DECODE ("stat:pointcloud_searches", long long, m_stat_pointcloud_searches);
ATTR_DECODE ("stat:pointcloud_gets", long long, m_stat_pointcloud_gets);
ATTR_DECODE ("stat:pointcloud_searches_total_results", long long, m_stat_pointcloud_searches_total_results);
ATTR_DECODE ("stat:pointcloud_max_results", int, m_stat_pointcloud_max_results);
ATTR_DECODE ("stat:pointcloud_failures", int, m_stat_pointcloud_failures);
ATTR_DECODE ("stat:memory_current", long long, m_stat_memory.current());
ATTR_DECODE ("stat:memory_peak", long long, m_stat_memory.peak());
ATTR_DECODE ("stat:mem_master_current", long long, m_stat_mem_master.current());
ATTR_DECODE ("stat:mem_master_peak", long long, m_stat_mem_master.peak());

ATTR_DECODE ("stat:mem_master_ops_current", long long, m_stat_mem_master_ops.current());
ATTR_DECODE ("stat:mem_master_ops_peak", long long, m_stat_mem_master_ops.peak());
ATTR_DECODE ("stat:mem_master_args_current", long long, m_stat_mem_master_args.current());
Expand Down Expand Up @@ -743,6 +754,21 @@ ShadingSystemImpl::message (const std::string &msg)



void
ShadingSystemImpl::pointcloud_stats (int search, int get, int results)
{
spin_lock lock (m_stat_mutex);
m_stat_pointcloud_searches += search;
m_stat_pointcloud_gets += get;
m_stat_pointcloud_searches_total_results += results;
if (search && ! results)
++m_stat_pointcloud_failures;
m_stat_pointcloud_max_results = std::max (m_stat_pointcloud_max_results,
results);
}



std::string
ShadingSystemImpl::getstats (int level) const
{
Expand Down Expand Up @@ -828,6 +854,14 @@ ShadingSystemImpl::getstats (int level) const
out << " (fail time "
<< Strutil::timeintervalformat (m_stat_getattribute_fail_time, 2) << ")\n";
}
if (m_stat_pointcloud_searches) {
out << " pointcloud_search calls: " << m_stat_pointcloud_searches << "\n";
out << " max query results: " << m_stat_pointcloud_max_results << "\n";
out << " average query results: "
<< Strutil::format ("%.1f", (double)m_stat_pointcloud_searches_total_results/(double)m_stat_pointcloud_searches) << "\n";
out << " failures: " << m_stat_pointcloud_failures << "\n";
out << " pointcloud_get calls: " << m_stat_pointcloud_gets << "\n";
}
out << " Memory total: " << m_stat_memory.memstat() << '\n';
out << " Master memory: " << m_stat_mem_master.memstat() << '\n';
out << " Master ops: " << m_stat_mem_master_ops.memstat() << '\n';
Expand Down

0 comments on commit f25d252

Please sign in to comment.