Skip to content

Commit

Permalink
Add tetrahedral interpolation
Browse files Browse the repository at this point in the history
  • Loading branch information
sekrit-twc committed Jul 1, 2023
1 parent f5f50d4 commit af682b6
Show file tree
Hide file tree
Showing 12 changed files with 189 additions and 39 deletions.
9 changes: 6 additions & 3 deletions benchmark/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@
int main(int argc, char **argv)
{
unsigned niter = 4000000;
timecube::Interpolation interp = timecube::Interpolation::LINEAR;
int simd = INT_MAX;

try {
if (argc >= 2)
niter = std::stoi(argv[1]);
if (argc >= 3)
simd = std::stoi(argv[2]);
interp = static_cast<timecube::Interpolation>(std::stoi(argv[2]));
if (argc >= 4)
simd = std::stoi(argv[3]);
} catch (const std::exception &) {
std::cerr << "usage: benchmark [niter] [simd]\n";
std::cerr << "usage: benchmark [niter] [interp] [simd]\n";
return 1;
}

Expand All @@ -44,7 +47,7 @@ int main(int argc, char **argv)
alignas(64) float b[1024] = { 0 };

try {
if (!(lut = timecube::create_lut3d_impl(cube, sizeof(r) / sizeof(r[0]), 1, simd)))
if (!(lut = timecube::create_lut3d_impl(cube, sizeof(r) / sizeof(r[0]), 1, interp, simd)))
throw std::runtime_error{ "failed to create LUT implementation" };
} catch (const std::exception &e) {
std::cerr << e.what() << '\n';
Expand Down
16 changes: 10 additions & 6 deletions test/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace {

void usage()
{
std::cerr << "usage: test cubefile [x y z] [simd]\n";
std::cerr << "usage: test cubefile [x y z] [interp] [simd]\n";
}

} // namespace
Expand All @@ -26,6 +26,7 @@ int main(int argc, char **argv)
alignas(64) float x = 0.0f;
alignas(64) float y = 0.0f;
alignas(64) float z = 0.0f;
timecube::Interpolation interp = timecube::Interpolation::LINEAR;
int simd = 0;

try {
Expand All @@ -36,7 +37,10 @@ int main(int argc, char **argv)
}

if (argc >= 6)
simd = std::stoi(argv[5]);
interp = static_cast<timecube::Interpolation>(std::stoi(argv[5]));

if (argc >= 7)
simd = std::stoi(argv[6]);
} catch (const std::exception &) {
usage();
return 1;
Expand All @@ -48,14 +52,14 @@ int main(int argc, char **argv)
std::cout << "last entry: " << cube.lut[cube.lut.size() - 3] << ' ' << cube.lut[cube.lut.size() - 2] << ' ' << cube.lut[cube.lut.size() - 1] << '\n';

if (cube.is_3d) {
if (!(lut[0] = timecube::create_lut3d_impl(cube, 1, 1, simd)))
if (!(lut[0] = timecube::create_lut3d_impl(cube, 1, 1, interp, simd)))
throw std::runtime_error{ "failed to create LUT implementation" };
} else {
if (!(lut[0] = timecube::create_lut1d_impl(cube, 1, 1, 0, simd)))
if (!(lut[0] = timecube::create_lut1d_impl(cube, 1, 1, 0, interp, simd)))
throw std::runtime_error{ "failed to create LUT implementation" };
if (!(lut[1] = timecube::create_lut1d_impl(cube, 1, 1, 1, simd)))
if (!(lut[1] = timecube::create_lut1d_impl(cube, 1, 1, 1, interp, simd)))
throw std::runtime_error{ "failed to create LUT implementation" };
if (!(lut[2] = timecube::create_lut1d_impl(cube, 1, 1, 2, simd)))
if (!(lut[2] = timecube::create_lut1d_impl(cube, 1, 1, 2, interp, simd)))
throw std::runtime_error{ "failed to create LUT implementation" };
}
} catch (const std::exception &e) {
Expand Down
125 changes: 117 additions & 8 deletions timecube/lut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,57 @@ Vector3 trilinear_interp(const Vector3 tri[2][2][2], float dist_x, float dist_y,
return tmp0;
}

template <class T>
void minmax(T &x, T &y)
{
T minval = std::min(x, y);
T maxval = std::max(x, y);
x = minval;
y = maxval;
}

template <class T>
void sort3(T &x, T &y, T &z)
{
minmax(x, z);
minmax(x, y);
minmax(y, z);
}

Vector3 tetrahedral_interp(const Vector3 &vec, const Vector3 *lut, uint_least32_t dim)
{
float r = vec[0], g = vec[1], b = vec[2];
float x = vec[0], y = vec[1], z = vec[2];
uint_least32_t diag = 1 + dim + dim * dim;
uint_least32_t disp1, disp2;

Vector3 vert[4];
float w[4];

sort3(x, y, z);
disp1 = z == r ? 1 : z == g ? dim : dim * dim;
disp2 = x == r ? 1 : x == g ? dim : dim * dim;

vert[0] = lut[0];
vert[1] = lut[diag];
vert[2] = lut[disp1];
vert[3] = lut[diag - disp2];

w[0] = 1.0f - z;
w[1] = x;
w[2] = z - y;
w[3] = y - x;

return w[0] * vert[0] + w[1] * vert[1] + w[2] * vert[2] + w[3] * vert[3];
}


class Lut1DFilter_C : public Lut1DFilter {
std::vector<float> m_lut;
float m_scale;
float m_offset;
public:
explicit Lut1DFilter_C(const Cube &cube, unsigned width, unsigned height, unsigned plane) :
Lut1DFilter_C(const Cube &cube, unsigned width, unsigned height, unsigned plane) :
Lut1DFilter(width, height),
m_scale{},
m_offset{}
Expand Down Expand Up @@ -139,12 +183,13 @@ class Lut1DFilter_C : public Lut1DFilter {
};

class Lut3DFilter_C : public Lut3DFilter {
protected:
std::vector<Vector3> m_lut;
uint_least32_t m_dim;
float m_scale[3];
float m_offset[3];
public:
explicit Lut3DFilter_C(const Cube &cube, unsigned width, unsigned height) :

Lut3DFilter_C(const Cube &cube, unsigned width, unsigned height) :
Lut3DFilter(width, height),
m_dim{ cube.n },
m_scale{},
Expand All @@ -163,6 +208,11 @@ class Lut3DFilter_C : public Lut3DFilter {
m_lut[i][2] = cube.lut[i * 3 + 2];
}
}
};

class TrilinearFilter_C : public Lut3DFilter_C {
public:
TrilinearFilter_C(const Cube &cube, unsigned width, unsigned height) : Lut3DFilter_C(cube, width, height) {}

void process(const graphengine::BufferDescriptor in[], const graphengine::BufferDescriptor out[],
unsigned i, unsigned left, unsigned right, void *, void *) const noexcept override
Expand Down Expand Up @@ -226,6 +276,61 @@ class Lut3DFilter_C : public Lut3DFilter {
}
};

class TetrahedralFilters_C : public Lut3DFilter_C {
public:
TetrahedralFilters_C(const Cube &cube, unsigned width, unsigned height) : Lut3DFilter_C(cube, width, height) {}

void process(const graphengine::BufferDescriptor in[], const graphengine::BufferDescriptor out[],
unsigned i, unsigned left, unsigned right, void *, void *) const noexcept override
{
const float *src_r = in[0].get_line<float>(i);
const float *src_g = in[1].get_line<float>(i);
const float *src_b = in[2].get_line<float>(i);
float *dst_r = out[0].get_line<float>(i);
float *dst_g = out[1].get_line<float>(i);
float *dst_b = out[2].get_line<float>(i);

uint_least32_t lut_max = m_dim - 1;
float lut_clamp = std::nextafter(static_cast<float>(lut_max), -INFINITY);

for (unsigned i = left; i < right; ++i) {
float r, g, b;
float dist_r, dist_g, dist_b;
uint_least32_t idx_r, idx_g, idx_b;
uint_least32_t idx;

Vector3 interp_result;

r = src_r[i];
g = src_g[i];
b = src_b[i];

r = (r * m_scale[0] + m_offset[0]) * lut_max;
g = (g * m_scale[1] + m_offset[1]) * lut_max;
b = (b * m_scale[2] + m_offset[2]) * lut_max;

r = std::min(std::max(r, 0.0f), lut_clamp);
g = std::min(std::max(g, 0.0f), lut_clamp);
b = std::min(std::max(b, 0.0f), lut_clamp);

idx_r = static_cast<uint_least32_t>(r);
idx_g = static_cast<uint_least32_t>(g);
idx_b = static_cast<uint_least32_t>(b);
idx = idx_r + idx_g * m_dim + idx_b * m_dim * m_dim;

dist_r = r - idx_r;
dist_g = g - idx_g;
dist_b = b - idx_b;

interp_result = tetrahedral_interp({ dist_r, dist_g, dist_b }, m_lut.data() + idx, m_dim);

dst_r[i] = interp_result[0];
dst_g[i] = interp_result[1];
dst_b[i] = interp_result[2];
}
}
};


template <class T>
void to_float(const void *src, void *dst, unsigned left, unsigned right, float scale, float offset, unsigned)
Expand Down Expand Up @@ -364,25 +469,29 @@ std::unique_ptr<graphengine::Filter> create_from_float_impl(unsigned width, unsi
return std::make_unique<PixelIOFilter>(PixelIOFilter::FROM_FLOAT, width, height, to, func);
}

std::unique_ptr<graphengine::Filter> create_lut1d_impl(const Cube &cube, unsigned width, unsigned height, unsigned plane, int)
std::unique_ptr<graphengine::Filter> create_lut1d_impl(const Cube &cube, unsigned width, unsigned height, unsigned plane, Interpolation, int)
{
if (cube.is_3d)
throw std::invalid_argument{ "wrong LUT type" };

return std::make_unique<Lut1DFilter_C>(cube, width, height, plane);
}

std::unique_ptr<graphengine::Filter> create_lut3d_impl(const Cube &cube, unsigned width, unsigned height, int simd)
std::unique_ptr<graphengine::Filter> create_lut3d_impl(const Cube &cube, unsigned width, unsigned height, Interpolation interp, int simd)
{
if (!cube.is_3d)
throw std::invalid_argument{ "wrong LUT type" };

std::unique_ptr<graphengine::Filter> ret;
#ifdef CUBE_X86
ret = create_lut3d_impl_x86(cube, width, height, simd);
ret = create_lut3d_impl_x86(cube, width, height, interp, simd);
#endif
if (!ret)
ret = std::make_unique<Lut3DFilter_C>(cube, width, height);
if (!ret) {
if (interp == Interpolation::TETRA)
ret = std::make_unique<TetrahedralFilters_C>(cube, width, height);
else
ret = std::make_unique<TrilinearFilter_C>(cube, width, height);
}

return ret;
}
Expand Down
9 changes: 7 additions & 2 deletions timecube/lut.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ namespace timecube {
struct Cube;


enum class Interpolation {
LINEAR,
TETRA,
};

enum class PixelType {
BYTE,
WORD,
Expand Down Expand Up @@ -114,9 +119,9 @@ std::unique_ptr<graphengine::Filter> create_to_float_impl(unsigned width, unsign

std::unique_ptr<graphengine::Filter> create_from_float_impl(unsigned width, unsigned height, const PixelFormat &to, int simd);

std::unique_ptr<graphengine::Filter> create_lut1d_impl(const Cube &cube, unsigned width, unsigned height, unsigned plane, int simd);
std::unique_ptr<graphengine::Filter> create_lut1d_impl(const Cube &cube, unsigned width, unsigned height, unsigned plane, Interpolation interp, int simd);

std::unique_ptr<graphengine::Filter> create_lut3d_impl(const Cube &cube, unsigned width, unsigned height, int simd);
std::unique_ptr<graphengine::Filter> create_lut3d_impl(const Cube &cube, unsigned width, unsigned height, Interpolation interp, int simd);

} // namespace timecube

Expand Down
11 changes: 6 additions & 5 deletions timecube/timecube.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class TimecubeFilterGraph : public timecube_filter {
graphengine::node_id m_source_id = graphengine::null_node;
graphengine::node_id m_sink_id = graphengine::null_node;
public:
TimecubeFilterGraph(const timecube::Cube &cube, unsigned width, unsigned height, const timecube::PixelFormat &src_format, const timecube::PixelFormat &dst_format, int cpu)
TimecubeFilterGraph(const timecube::Cube &cube, unsigned width, unsigned height, const timecube::PixelFormat &src_format, const timecube::PixelFormat &dst_format, timecube::Interpolation interp, timecube_cpu_type_e cpu)
{
graphengine::PlaneDescriptor format[3];
std::fill_n(format, 3, graphengine::PlaneDescriptor{ width, height, timecube::pixel_size(src_format.type) });
Expand All @@ -66,15 +66,15 @@ class TimecubeFilterGraph : public timecube_filter {
}

if (cube.is_3d) {
std::unique_ptr<graphengine::Filter> lut_filter = timecube::create_lut3d_impl(cube, width, height, cpu);
std::unique_ptr<graphengine::Filter> lut_filter = timecube::create_lut3d_impl(cube, width, height, interp, cpu);
graphengine::node_id id = m_graph.add_transform(lut_filter.get(), ids);
ids[0] = { id, 0 };
ids[1] = { id, 1 };
ids[2] = { id, 2 };
m_filters.push_back(std::move(lut_filter));
} else {
for (unsigned p = 0; p < 3; ++p) {
std::unique_ptr<graphengine::Filter> lut_filter = timecube::create_lut1d_impl(cube, width, height, p, cpu);
std::unique_ptr<graphengine::Filter> lut_filter = timecube::create_lut1d_impl(cube, width, height, p, interp, cpu);
ids[p] = { m_graph.add_transform(lut_filter.get(), &ids[p]), 0 };
m_filters.push_back(std::move(lut_filter));
}
Expand Down Expand Up @@ -205,13 +205,14 @@ void timecube_lut_free(timecube_lut *ptr)
delete static_cast<timecube::Cube *>(ptr);
}

timecube_filter *timecube_filter_create(const timecube_lut *lut, const timecube_filter_params *params, unsigned width, unsigned height, timecube_cpu_type_e cpu) try
timecube_filter *timecube_filter_create(const timecube_lut *lut, const timecube_filter_params *params) try
{
const timecube::Cube *cube = static_cast<const timecube::Cube *>(lut);
timecube::PixelFormat src_format{ static_cast<timecube::PixelType>(params->src_type), params->src_depth, params->src_range == TIMECUBE_RANGE_FULL };
timecube::PixelFormat dst_format{ static_cast<timecube::PixelType>(params->dst_type), params->dst_depth, params->dst_range == TIMECUBE_RANGE_FULL };
timecube::Interpolation interp = static_cast<timecube::Interpolation>(params->interp);

std::unique_ptr<TimecubeFilterGraph> filter = std::make_unique<TimecubeFilterGraph>(*cube, width, height, src_format, dst_format, cpu);
std::unique_ptr<TimecubeFilterGraph> filter = std::make_unique<TimecubeFilterGraph>(*cube, params->width, params->height, src_format, dst_format, interp, params->cpu);
return filter.release();
} catch (...) {
return nullptr;
Expand Down
13 changes: 12 additions & 1 deletion timecube/timecube.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ typedef enum timecube_lut_format_e {
TIMECUBE_LUT_ADOBE_CUBE /**< Adobe Cube LUT. */
} timecube_lut_format_e;

typedef enum timecube_interpolation_e {
TIMECUBE_INTERP_LINEAR = 0, /**< Linear (1D) or trilinear (3D). */
TIMECUBE_INTERP_TETRA = 1, /**< Tetrahedral (3D) */
} timecube_interpolation_e;


typedef struct timecube_lut timecube_lut;

Expand All @@ -58,15 +63,21 @@ void timecube_lut_free(timecube_lut *ptr);
typedef struct timecube_filter timecube_filter;

typedef struct timecube_filter_params {
unsigned width;
unsigned height;

timecube_pixel_type_e src_type;
timecube_pixel_range_e src_range;
unsigned src_depth;
timecube_pixel_type_e dst_type;
timecube_pixel_range_e dst_range;
unsigned dst_depth;

timecube_interpolation_e interp;
timecube_cpu_type_e cpu;
} timecube_filter_params;

timecube_filter *timecube_filter_create(const timecube_lut *lut, const timecube_filter_params *params, unsigned width, unsigned height, timecube_cpu_type_e cpu);
timecube_filter *timecube_filter_create(const timecube_lut *lut, const timecube_filter_params *params);

size_t timecube_filter_get_tmp_size(const timecube_filter *filter);

Expand Down
5 changes: 4 additions & 1 deletion timecube/x86/lut_avx2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,11 @@ void float_to_half_avx2(const void *src, void *dst, unsigned left, unsigned righ
}


std::unique_ptr<graphengine::Filter> create_lut3d_impl_avx2(const Cube &cube, unsigned width, unsigned height)
std::unique_ptr<graphengine::Filter> create_lut3d_impl_avx2(const Cube &cube, unsigned width, unsigned height, Interpolation interp)
{
if (interp != Interpolation::LINEAR)
return nullptr;

return std::make_unique<Lut3DFilter_AVX2>(cube, width, height);
}

Expand Down
5 changes: 4 additions & 1 deletion timecube/x86/lut_avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,11 @@ void float_to_half_avx512(const void *src, void *dst, unsigned left, unsigned ri
}


std::unique_ptr<graphengine::Filter> create_lut3d_impl_avx512(const Cube &cube, unsigned width, unsigned height)
std::unique_ptr<graphengine::Filter> create_lut3d_impl_avx512(const Cube &cube, unsigned width, unsigned height, Interpolation interp)
{
if (interp != Interpolation::LINEAR)
return nullptr;

return std::make_unique<Lut3DFilter_AVX512>(cube, width, height);
}

Expand Down

0 comments on commit af682b6

Please sign in to comment.