diff --git a/SConstruct b/SConstruct index 1461af983f8f2..57e3938dfe740 100644 --- a/SConstruct +++ b/SConstruct @@ -433,6 +433,21 @@ add_option('disable-warnings-as-errors', nargs=0, ) +add_option('distance-expression-non-bson', + help="Uses Do not uses BSON to pass the values to the distance expressions. You can pass a regular float vector.", + nargs=0, +) + +add_option('distance-expression-use-avx2', + help="Uses BSON to pass the values to the distance expressions", + nargs=0, +) + +add_option('distance-expression-use-avx512', + help="Uses BSON to pass the values to the distance expressions", + nargs=0, +) + add_option('detect-odr-violations', help="Have the linker try to detect ODR violations, if supported", nargs=0, @@ -1813,6 +1828,23 @@ if env.TargetOSIs('posix'): if env.TargetOSIs('linux', 'darwin', 'solaris'): if not has_option("disable-warnings-as-errors"): env.Append( CCFLAGS=["-Werror"] ) + if has_option(distance-expression-non-bson): + env.Append( CCFLAGS=["-DDISTANCE_EXPRESSION_NOT_BSON"] ) + else: + if not has_option("distance-expression-use-avx512"): + if has_option("distance-expression-use-avx2"): + env.Append( CCFLAGS=["-mavx2"] ) + env.Append( CCFLAGS=["-march=haswell"] ) + env.Append( CCFLAGS=["-mtune=intel"] ) + env.Append( CCFLAGS=["-fopenmp"] ) + env.Append( CCFLAGS=["-O3"] ) + env.Append( CCFLAGS=["-DUSE_AVX2"] ) + else: + env.Append( CCFLAGS=["-march=skylake-avx512"] ) + env.Append( CCFLAGS=["-mtune=skylake-avx512"] ) + env.Append( CCFLAGS=["-fopenmp"] ) + env.Append( CCFLAGS=["-O3"] ) + env.Append( CCFLAGS=["-DUSE_AVX512"] ) env.Append( CXXFLAGS=["-Woverloaded-virtual"] ) if env.ToolchainIs('clang'): diff --git a/build_with_avx2.sh b/build_with_avx2.sh new file mode 100644 index 0000000000000..af39e676cfee5 --- /dev/null +++ b/build_with_avx2.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python2 buildscripts/scons.py mongod mongo mongos --disable-warnings-as-errors -j8 --release --distance-expression-use-avx2 diff --git a/pytests/distancetest.py b/pytests/distancetest.py new file mode 100644 index 0000000000000..ca19764f64773 --- /dev/null +++ b/pytests/distancetest.py @@ -0,0 +1,65 @@ +# Distance expression test. + +from __future__ import print_function +from builtins import range +from pymongo import MongoClient +import numpy as np +import random +import time +import string +from bson import binary + + +client = MongoClient() +db = client["test_speed"] + +vec_size = 4096 +num_documents = 150000 +fill_data_base = True +iterations = 10 +functions = ['no_op', 'cossim', 'chi2', 'euclidean', 'squared_euclidean', 'manhattan', 'no_op'] + +vec = [] + +for _ in range(iterations): + vec.append(binary.Binary(np.random.rand(vec_size).astype(np.float32).tobytes())) + + +if fill_data_base: + print("load database") + db.test_speed.drop() + for i in range(num_documents): + if i % 1000 == 0: print(i) + db.test_speed.insert({ + "id": random.randint(0, 1000000), + "other_id": ''.join(np.random.choice(list(string.ascii_uppercase)) for _ in range(6)), + "vector": binary.Binary(np.random.rand(vec_size).astype(np.float32).tobytes()) + }) + + print("database loaded", db.test_speed.count()) + +times_aggregate_base = np.zeros([iterations, 1], dtype=np.float32) +for function in functions: + for index in range(iterations): + start = time.time() + result = db.test_speed.aggregate([ + { + '$project': + { + 'id': '$id', + "other_id": '$other_id', + 'distance': {'${}'.format(function): [vec[index], '$vector']}, + }, + }, + {"$sort": {"distance": -1}}, + {"$limit": 20} + ]) + selection = list(result) + times_aggregate_base[index] = time.time() - start + + print("Aggregate distance {}:".format(function)) + print(" - average: {:.5f}ms".format(np.mean(times_aggregate_base) * 1000)) + print(" - std: {:.5f}ms".format(np.std(times_aggregate_base) * 1000)) + print(" - max: {:.5f}ms".format(np.max(times_aggregate_base) * 1000)) + print(" - min: {:.5f}ms".format(np.min(times_aggregate_base) * 1000)) + print(" - median: {:.5f}ms".format(np.median(times_aggregate_base) * 1000)) diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 1a99044527963..3da3ffd6be290 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -309,6 +309,27 @@ ExitCode _initAndListen(int listenPort) { l << (is32bit ? " 32" : " 64") << "-bit host=" << getHostNameCached() << endl; } + { + LogstreamBuilder l = log(LogComponent::kControl); + + l << "Distance Expression Info: "; + +#ifdef DISTANCE_EXPRESSION_NOT_BSON + l << "Not using BSON"; +#else + l << "Using BSON"; +#ifdef USE_AVX512 + l << " - With AVX512"; +#else +#if USE_AVX2 + l << " - With AVX2"; +#endif +#endif +#endif + + l << std::endl; + } + DEV log(LogComponent::kControl) << "DEBUG build (which is slower)" << endl; #if defined(_WIN32) diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 36b99ea7c5c99..cde7d7e4f5eb2 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -136,6 +136,10 @@ env.Library( source=[ 'expression.cpp', 'expression_trigonometric.cpp', + 'expression_distance.cpp', + 'expression_distance_avx2.cpp', + 'expression_distance_avx512.cpp', + 'expression_distance_non_bson.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/db/query/datetime/date_time_support', diff --git a/src/mongo/db/pipeline/expression_distance.cpp b/src/mongo/db/pipeline/expression_distance.cpp new file mode 100644 index 0000000000000..d2f039e9105c9 --- /dev/null +++ b/src/mongo/db/pipeline/expression_distance.cpp @@ -0,0 +1,95 @@ +#if !defined(DISTANCE_EXPRESSION_NOT_BSON) && !defined(USE_AVX2) && !defined(USE_AVX512) + +#include "mongo/db/pipeline/expression_distance.h" + +namespace mongo { + +REGISTER_EXPRESSION(euclidean, ExpressionEuclidean::parse) +REGISTER_EXPRESSION(cossim, ExpressionCosineSimilarity::parse) +REGISTER_EXPRESSION(chi2, ExpressionChi2::parse) +REGISTER_EXPRESSION(squared_euclidean, ExpressionSquaredEuclidean::parse) +REGISTER_EXPRESSION(manhattan, ExpressionManhattan::parse) +REGISTER_EXPRESSION(no_op, ExpressionNoOp::parse) + +/* ------------------------- ExpressionEuclideanBin ----------------------------- */ + +Value ExpressionEuclidean::evaluateImpl(const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + float r = 0.0; + for (size_t i = 0; i < p_uiSize; ++i, ++p_pData1, ++p_pData2) { + float diff = *p_pData1 - *p_pData2; + r += diff * diff; + } + + return Value(double(std::sqrt(r))); +} + +/* ------------------------- ExpressionCosineSimilarityBin ----------------------------- */ + +Value ExpressionCosineSimilarity::evaluateImpl(const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + float dot = 0.0; + float norm_a = 0.0; + float norm_b = 0.0; + + for (size_t i = 0; i < p_uiSize; ++i, ++p_pData1, ++p_pData2) { + float a = *p_pData1; + float b = *p_pData2; + + dot += a * b; + norm_a += a * a; + norm_b += b * b; + } + + float result = 1 - ( dot / ( (std::sqrt(norm_a*norm_b) + FLT_MIN) )); + return Value(double(result)); +} + +/* ------------------------- ExpressionChi2Bin ----------------------------- */ + +Value ExpressionChi2::evaluateImpl(const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + float r = 0.0f; + for (size_t i = 0; i < p_uiSize; ++i, ++p_pData1, ++p_pData2) { + float a = *p_pData1; + float b = *p_pData2; + float t = a + b; + float diff = a - b; + + r += (diff * diff) / ( t + FLT_MIN); + } + + return Value(double(r)); +} + +/* ------------------------- ExpressionSquaredEuclideanBin ----------------------------- */ + +Value ExpressionSquaredEuclidean::evaluateImpl(const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + float r = 0.f; + for (size_t i = 0; i < p_uiSize; ++i, ++p_pData1, ++p_pData2) { + float diff = *p_pData1 - *p_pData2; + r += diff * diff; + } + + return Value(double(r)); +} + +/* ------------------------- ExpressionManhattanBin ----------------------------- */ + +Value ExpressionManhattan::evaluateImpl(const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + float r = 0.0; + + for (size_t i = 0; i < p_uiSize; ++i, ++p_pData1, ++p_pData2) { + r += std::fabs( *p_pData1 - *p_pData2 ); + } + + return Value( double(r) ); +} + +/* ------------------------- ExpressionNoOp ----------------------------- */ + +inline Value ExpressionNoOp::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + return Value( 0.0 ); +} + +} + +#endif \ No newline at end of file diff --git a/src/mongo/db/pipeline/expression_distance.h b/src/mongo/db/pipeline/expression_distance.h new file mode 100644 index 0000000000000..7a259c38f75e6 --- /dev/null +++ b/src/mongo/db/pipeline/expression_distance.h @@ -0,0 +1,124 @@ + +#pragma once + +#include "mongo/db/pipeline/expression.h" + +namespace mongo { + +// When not using BSON we will use vectors +#ifdef DISTANCE_EXPRESSION_NOT_BSON +#define DISTANCE_EVALUATE_IMPL_PROTO(type) \ + Value evaluateImpl( \ + const std::vector& vector1, \ + const std::vector& vector2) const override final; +#else +// When using BSON we will get a pointer +#define DISTANCE_EVALUATE_IMPL_PROTO(type) \ + Value evaluateImpl( \ + const type* vector1, \ + const type* vector2, const size_t size) const override final; +#endif + +#define DECLARE_DISTANCE_EXPRESSION(key, class_name, type, error) \ + class class_name final \ + : public ExpressionDistance { \ + public: \ + explicit class_name(const boost::intrusive_ptr& expCtx) \ + : ExpressionDistance(expCtx) {} \ + const char* getOpName() const final { \ + return "$" #key; \ + } \ + protected: \ + const std::vector>& getOperandList() const final { \ + return vpOperand; \ + } \ + DISTANCE_EVALUATE_IMPL_PROTO(type) \ + }; + +// Template class for defining a distance expression +template +class ExpressionDistance : public ExpressionNaryBase { +public: + explicit ExpressionDistance(const boost::intrusive_ptr& expCtx) + : ExpressionNaryBase(expCtx) {} + + Value evaluate(const Document& root) const final { + std::string sExpression = getOpName(); + const auto& vpOperand = getOperandList(); + const size_t n = vpOperand.size(); + + if (n != 2) { + uasserted(ERROR, + str::stream() << sExpression << " only suppports 2 expressions, not " << n); + } + + const Value& value1 = vpOperand[0]->evaluate(root); + const Value& value2 = vpOperand[1]->evaluate(root); + +#ifndef DISTANCE_EXPRESSION_NOT_BSON + const BSONBinData& vector1 = value1.getBinData(); + const BSONBinData& vector2 = value2.getBinData(); + + if (vector1.length != vector2.length) { + uasserted(ERROR + 1000L, + str::stream() << sExpression << " both operands must have the same length."); + } + + const T* pData1 = (const T*)vector1.data; + const T* pData2 = (const T*)vector2.data; + + return evaluateImpl(pData1, pData2, vector1.length / sizeof(T)); +#else + if (!value1.isArray()) { + uasserted(ErrorCodes::FailedToParse, + str::stream() << sExpression << " only supports array on 1st expression , not " + << typeName(value1.getType())); + } + + if (!value2.isArray()) { + uasserted(ErrorCodes::FailedToParse, + str::stream() << sExpression << " only supports array on 2nd expression, not " + << typeName(value2.getType())); + } + + const std::vector& vector1 = value1.getArray(); + const std::vector& vector2 = value2.getArray(); + + if(vector1.size() != vector2.size()){ + uasserted(ErrorCodes::FailedToParse, + str::stream() << sExpression << " vectors of different sizes found " + << vector1.size() << " " << vector2.size()); + } + + return evaluateImpl(vector1, vector2); +#endif + } + + bool isAssociative() const final { + return true; + } + + bool isCommutative() const final { + return false; + } + + virtual const char* getOpName() const = 0; + +protected: + virtual const std::vector>& getOperandList() const = 0; +#ifndef DISTANCE_EXPRESSION_NOT_BSON + virtual Value evaluateImpl(const T* p_pData1, const T* p_pData2, const size_t p_uiSize) const = 0; +#else + virtual Value evaluateImpl(const std::vector& vector1, const std::vector& vector2) const = 0; +#endif +}; + +DECLARE_DISTANCE_EXPRESSION(euclidean, ExpressionEuclidean, float, 9020) +DECLARE_DISTANCE_EXPRESSION(cossim, ExpressionCosineSimilarity, float, 90021) +DECLARE_DISTANCE_EXPRESSION(chi2, ExpressionChi2, float, 90022) +DECLARE_DISTANCE_EXPRESSION(squared_euclidean, ExpressionSquaredEuclidean, float, 90023) +DECLARE_DISTANCE_EXPRESSION(manhattan, ExpressionManhattan, float, 90024) +// Only for benchmarking +DECLARE_DISTANCE_EXPRESSION(no_op, ExpressionNoOp, float, 90025) + +} diff --git a/src/mongo/db/pipeline/expression_distance_avx2.cpp b/src/mongo/db/pipeline/expression_distance_avx2.cpp new file mode 100644 index 0000000000000..c45da12e47a9f --- /dev/null +++ b/src/mongo/db/pipeline/expression_distance_avx2.cpp @@ -0,0 +1,231 @@ +#if !defined(DISTANCE_EXPRESSION_NOT_BSON) && defined(USE_AVX2) && !defined(USE_AVX512) + +#include "mongo/db/pipeline/expression_distance.h" +#include + +namespace mongo { + +REGISTER_EXPRESSION(euclidean, ExpressionEuclidean::parse) +REGISTER_EXPRESSION(cossim, ExpressionCosineSimilarity::parse) +REGISTER_EXPRESSION(chi2, ExpressionChi2::parse) +REGISTER_EXPRESSION(squared_euclidean, ExpressionSquaredEuclidean::parse) +REGISTER_EXPRESSION(manhattan, ExpressionManhattan::parse) +REGISTER_EXPRESSION(no_op, ExpressionNoOp::parse) + +/* ------------------------- ExpressionEuclidean ----------------------------- */ + +/** + * @brief Union used to access the __m256 individual values + */ +union U256f +{ + __m256 v; ///< AVX vector + float a[8]; ///< Equivalent float array +}; + +Value ExpressionEuclidean::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U256f sPackVal; + __m256& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm256_set1_ps( 0.0f ); + + const size_t uiPackSize = sizeof(__m256) / sizeof(float); + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m256 packVec1 = _mm256_loadu_ps( p_pData1 ); + const __m256 packVec2 = _mm256_loadu_ps( p_pData2 ); + const __m256 packDiff = _mm256_sub_ps(packVec1, packVec2); + packVal = _mm256_fmadd_ps( packDiff, packDiff, packVal ); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float fDiff = *p_pData1 - *p_pData2; + r += fDiff * fDiff; + } + + return Value(double(std::sqrt(r))); +} + +/* ------------------------- ExpressionCosineSimilarity ----------------------------- */ + +Value ExpressionCosineSimilarity::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + + U256f sPackDot; + __m256& packDot = sPackDot.v; + const float* pPackDot = sPackDot.a; + packDot = _mm256_set1_ps( 0.0f ); + + U256f sPackNorm_a; + __m256& packNorm_a = sPackNorm_a.v; + const float* pPackNorm_a = sPackNorm_a.a; + packNorm_a = _mm256_set1_ps( 0.0f ); + + U256f sPackNorm_b; + __m256& packNorm_b = sPackNorm_b.v; + const float* pPackNorm_b = sPackNorm_b.a; + packNorm_b = _mm256_set1_ps( 0.0f ); + + const size_t uiPackSize = sizeof(__m256) / sizeof(float); + + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m256 packVec1 = _mm256_loadu_ps( p_pData1 ); + const __m256 packVec2 = _mm256_loadu_ps( p_pData2 ); + + packDot = _mm256_fmadd_ps( packVec1, packVec2, packDot ); + packNorm_a = _mm256_fmadd_ps( packVec1, packVec1, packNorm_a ); + packNorm_b = _mm256_fmadd_ps( packVec2, packVec2, packNorm_b ); + } + + float dot = 0.f; + float norm_a = 0.f; + float norm_b = 0.f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackDot, ++pPackNorm_a, ++pPackNorm_b) { + dot += *pPackDot; + norm_a += *pPackNorm_a; + norm_b += *pPackNorm_b; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float a = *p_pData1; + float b = *p_pData2; + + dot += a * b; + norm_a += a * a; + norm_b += b * b; + } + + float result = 1 - ( dot / ( (std::sqrt(norm_a * norm_b) + FLT_MIN) )); + return Value(double(result)); +} + +/* ------------------------- ExpressionChi2 ----------------------------- */ + +Value ExpressionChi2::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U256f sPackVal; + __m256& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm256_set1_ps( 0.0f ); + + const __m256 packDBL_MIN = _mm256_set1_ps( FLT_MIN ); + + const size_t uiPackSize = sizeof(__m256) / sizeof(float); + + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m256 packVec1 = _mm256_loadu_ps( p_pData1 ); + const __m256 packVec2 = _mm256_loadu_ps( p_pData2 ); + + const __m256 packAdd = _mm256_add_ps(packVec1, packVec2); + const __m256 packSub = _mm256_sub_ps(packVec1, packVec2); + + packVal = _mm256_add_ps( + packVal, + _mm256_div_ps( + _mm256_mul_ps(packSub, packSub), + _mm256_add_ps(packAdd, packDBL_MIN))); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float a = *p_pData1; + float b = *p_pData2; + float t = a + b; + float diff = a - b; + + r += (diff * diff) / ( t + FLT_MIN); + } + + return Value(double(r)); +} + +/* ------------------------- ExpressionSquaredEuclidean ----------------------------- */ + +Value ExpressionSquaredEuclidean::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U256f sPackVal; + __m256& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm256_set1_ps( 0.0f ); + + const size_t uiPackSize = sizeof(__m256) / sizeof(float); + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m256 packVec1 = _mm256_loadu_ps( p_pData1 ); + const __m256 packVec2 = _mm256_loadu_ps( p_pData2 ); + const __m256 packDiff = _mm256_sub_ps(packVec1, packVec2); + packVal = _mm256_fmadd_ps( packDiff, packDiff, packVal ); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float diff = *p_pData1 - *p_pData2; + r += diff * diff; + } + + return Value(double(r)); +} + +/* ------------------------- ExpressionManhattanBin ----------------------------- */ + +template +static inline __m256 constant8f() { + static const union { + int i[8]; + __m256 ymm; + } u = {{i0,i1,i2,i3,i4,i5,i6,i7}}; + return u.ymm; +} + +Value ExpressionManhattan::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U256f sPackVal; + __m256& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm256_set1_ps( 0.0f ); + + __m256 mask = constant8f<0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF> (); + + const size_t uiPackSize = sizeof(__m256) / sizeof(float); + + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m256 packVec1 = _mm256_loadu_ps( p_pData1 ); + const __m256 packVec2 = _mm256_loadu_ps( p_pData2 ); + const __m256 packABSDiff = _mm256_and_ps(_mm256_sub_ps(packVec1, packVec2), mask); + packVal = _mm256_add_ps( packABSDiff, packVal ); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + r += std::fabs( *p_pData1 - *p_pData2 ); + } + + return Value( double(r) ); +} + +/* ------------------------- ExpressionNoOp ----------------------------- */ + +inline Value ExpressionNoOp::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + return Value( 0.0 ); +} + +} + +#endif \ No newline at end of file diff --git a/src/mongo/db/pipeline/expression_distance_avx512.cpp b/src/mongo/db/pipeline/expression_distance_avx512.cpp new file mode 100644 index 0000000000000..075f17823d9c2 --- /dev/null +++ b/src/mongo/db/pipeline/expression_distance_avx512.cpp @@ -0,0 +1,231 @@ +#if !defined(DISTANCE_EXPRESSION_NOT_BSON) && defined(USE_AVX512) + +#include "mongo/db/pipeline/expression_distance.h" +#include + +namespace mongo { + +REGISTER_EXPRESSION(euclidean, ExpressionEuclidean::parse) +REGISTER_EXPRESSION(cossim, ExpressionCosineSimilarity::parse) +REGISTER_EXPRESSION(chi2, ExpressionChi2::parse) +REGISTER_EXPRESSION(squared_euclidean, ExpressionSquaredEuclidean::parse) +REGISTER_EXPRESSION(manhattan, ExpressionManhattan::parse) +REGISTER_EXPRESSION(no_op, ExpressionNoOp::parse) + +/* ------------------------- ExpressionEuclidean ----------------------------- */ + +/** + * @brief Union used to access the __m512 individual values + */ +union U512f +{ + __m512 v; ///< AVX vector + float a[16]; ///< Equivalent float array +}; + +Value ExpressionEuclidean::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U512f sPackVal; + __m512& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm512_set1_ps( 0.0f ); + + const size_t uiPackSize = sizeof(__m512) / sizeof(float); + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m512 packVec1 = _mm512_loadu_ps( p_pData1 ); + const __m512 packVec2 = _mm512_loadu_ps( p_pData2 ); + const __m512 packDiff = _mm512_sub_ps(packVec1, packVec2); + packVal = _mm512_fmadd_ps( packDiff, packDiff, packVal ); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float fDiff = *p_pData1 - *p_pData2; + r += fDiff * fDiff; + } + + return Value(double(std::sqrt(r))); +} + +/* ------------------------- ExpressionCosineSimilarity ----------------------------- */ + +Value ExpressionCosineSimilarity::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + + U512f sPackDot; + __m512& packDot = sPackDot.v; + const float* pPackDot = sPackDot.a; + packDot = _mm512_set1_ps( 0.0f ); + + U512f sPackNorm_a; + __m512& packNorm_a = sPackNorm_a.v; + const float* pPackNorm_a = sPackNorm_a.a; + packNorm_a = _mm512_set1_ps( 0.0f ); + + U512f sPackNorm_b; + __m512& packNorm_b = sPackNorm_b.v; + const float* pPackNorm_b = sPackNorm_b.a; + packNorm_b = _mm512_set1_ps( 0.0f ); + + const size_t uiPackSize = sizeof(__m512) / sizeof(float); + + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m512 packVec1 = _mm512_loadu_ps( p_pData1 ); + const __m512 packVec2 = _mm512_loadu_ps( p_pData2 ); + + packDot = _mm512_fmadd_ps( packVec1, packVec2, packDot ); + packNorm_a = _mm512_fmadd_ps( packVec1, packVec1, packNorm_a ); + packNorm_b = _mm512_fmadd_ps( packVec2, packVec2, packNorm_b ); + } + + float dot = 0.f; + float norm_a = 0.f; + float norm_b = 0.f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackDot, ++pPackNorm_a, ++pPackNorm_b) { + dot += *pPackDot; + norm_a += *pPackNorm_a; + norm_b += *pPackNorm_b; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float a = *p_pData1; + float b = *p_pData2; + + dot += a * b; + norm_a += a * a; + norm_b += b * b; + } + + float result = 1 - ( dot / ( (std::sqrt(norm_a*norm_b) + FLT_MIN) )); + return Value(double(result)); +} + +/* ------------------------- ExpressionChi2 ----------------------------- */ + +Value ExpressionChi2::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U512f sPackVal; + __m512& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm512_set1_ps( 0.0f ); + + const __m512 packDBL_MIN = _mm512_set1_ps( FLT_MIN ); + + const size_t uiPackSize = sizeof(__m512) / sizeof(float); + + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m512 packVec1 = _mm512_loadu_ps( p_pData1 ); + const __m512 packVec2 = _mm512_loadu_ps( p_pData2 ); + + const __m512 packAdd = _mm512_add_ps(packVec1, packVec2); + const __m512 packSub = _mm512_sub_ps(packVec1, packVec2); + + packVal = _mm512_add_ps( + packVal, + _mm512_div_ps( + _mm512_mul_ps(packSub, packSub), + _mm512_add_ps(packAdd, packDBL_MIN))); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float a = *p_pData1; + float b = *p_pData2; + float t = a + b; + float diff = a - b; + + r += (diff * diff) / ( t + FLT_MIN); + } + + return Value(double(r)); +} + +/* ------------------------- ExpressionSquaredEuclidean ----------------------------- */ + +Value ExpressionSquaredEuclidean::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U512f sPackVal; + __m512& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm512_set1_ps( 0.0f ); + + const size_t uiPackSize = sizeof(__m512) / sizeof(float); + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m512 packVec1 = _mm512_loadu_ps( p_pData1 ); + const __m512 packVec2 = _mm512_loadu_ps( p_pData2 ); + const __m512 packDiff = _mm512_sub_ps(packVec1, packVec2); + packVal = _mm512_fmadd_ps( packDiff, packDiff, packVal ); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + float diff = *p_pData1 - *p_pData2; + r += diff * diff; + } + + return Value(double(r)); +} + +/* ------------------------- ExpressionManhattanBin ----------------------------- */ + +template +static inline __m512 constant16f() { + static const union { + int i[16]; + __m512 ymm; + } u = {{i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12,i13,i14,i15}}; + return u.ymm; +} + +Value ExpressionManhattan::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + U512f sPackVal; + __m512& packVal = sPackVal.v; + const float* pPackVal = sPackVal.a; + packVal = _mm512_set1_ps( 0.0f ); + + __m512 mask = constant16f<0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF> (); + + const size_t uiPackSize = sizeof(__m512) / sizeof(float); + + for (size_t i = 0; i < p_uiSize / uiPackSize; ++i, p_pData1 += uiPackSize, p_pData2 += uiPackSize) { + const __m512 packVec1 = _mm512_loadu_ps( p_pData1 ); + const __m512 packVec2 = _mm512_loadu_ps( p_pData2 ); + const __m512 packABSDiff = _mm512_and_ps(_mm512_sub_ps(packVec1, packVec2), mask); + packVal = _mm512_add_ps( packABSDiff, packVal ); + } + + float r = 0.0f; + for (size_t i = 0; i < uiPackSize; ++i, ++pPackVal) { + r += *pPackVal; + } + + for (size_t i = 0; i < p_uiSize % uiPackSize; ++i, ++p_pData1, ++p_pData2) { + r += std::fabs( *p_pData1 - *p_pData2 ); + } + + return Value( double(r) ); +} + +/* ------------------------- ExpressionNoOp ----------------------------- */ + +inline Value ExpressionNoOp::evaluateImpl( + const float* p_pData1, const float* p_pData2, const size_t p_uiSize) const { + return Value( 0.0 ); +} + +} + +#endif diff --git a/src/mongo/db/pipeline/expression_distance_non_bson.cpp b/src/mongo/db/pipeline/expression_distance_non_bson.cpp new file mode 100644 index 0000000000000..b76f18c5c2cf5 --- /dev/null +++ b/src/mongo/db/pipeline/expression_distance_non_bson.cpp @@ -0,0 +1,117 @@ +#ifdef DISTANCE_EXPRESSION_NOT_BSON + +#include "mongo/db/pipeline/expression_distance.h" + +namespace mongo { + +REGISTER_EXPRESSION(euclidean, ExpressionEuclidean::parse) +REGISTER_EXPRESSION(cossim, ExpressionCosineSimilarity::parse) +REGISTER_EXPRESSION(chi2, ExpressionChi2::parse) +REGISTER_EXPRESSION(squared_euclidean, ExpressionSquaredEuclidean::parse) +REGISTER_EXPRESSION(manhattan, ExpressionManhattan::parse) +REGISTER_EXPRESSION(no_op, ExpressionNoOp::parse) + +/* ------------------------- ExpressionEuclidean ----------------------------- */ + +inline Value ExpressionEuclidean::evaluateImpl( + const std::vector& vector1, const std::vector& vector2) const { + double r = 0.0; + + auto it1 = vector1.cbegin(); + auto it2 = vector2.cbegin(); + + for (size_t i = 0; i < vector1.size(); ++i, ++it1, ++it2) { + double diff = it1->getDouble() - it2->getDouble(); + r += diff * diff; + } + + return Value(std::sqrt(r)); +} + +/* ------------------------- ExpressionCosineSimilarity ----------------------------- */ + +inline Value ExpressionCosineSimilarity::evaluateImpl( + const std::vector& vector1, const std::vector& vector2) const { + + double dot = 0.0; + double norm_a = 0.0; + double norm_b = 0.0; + + auto it1 = vector1.cbegin(); + auto it2 = vector2.cbegin(); + + for (size_t i = 0; i < vector1.size(); ++i, ++it1, ++it2) { + double a = it1->getDouble(); + double b = it2->getDouble(); + + dot += a * b; + norm_a += a * a; + norm_b += b * b; + } + + double result = 1 - ( dot / ( std::sqrt(norm_a*norm_b) + FLT_MIN) ); + return Value(result); +} + +/* ------------------------- ExpressionChi2 ----------------------------- */ + +inline Value ExpressionChi2::evaluateImpl( + const std::vector& vector1, const std::vector& vector2) const { + + double r = 0.0; + auto it1 = vector1.cbegin(); + auto it2 = vector2.cbegin(); + + for (size_t i = 0; i < vector1.size(); ++i, ++it1, ++it2) { + double a = it1->getDouble(); + double b = it2->getDouble(); + double t = a + b; + double diff = a - b; + + r += (diff * diff) / ( t + FLT_MIN); + } + + return Value(r); +} + +/* ------------------------- ExpressionSquaredEuclidean ----------------------------- */ + +inline Value ExpressionSquaredEuclidean::evaluateImpl( + const std::vector& vector1, const std::vector& vector2) const { + double r = 0.0; + + auto it1 = vector1.cbegin(); + auto it2 = vector2.cbegin(); + + for (size_t i = 0; i < vector1.size(); ++i, ++it1, ++it2) { + double diff = it1->getDouble() - it2->getDouble(); + r += diff * diff; + } + + return Value(r); +} + +/* ------------------------- ExpressionManhattan ----------------------------- */ + +inline Value ExpressionManhattan::evaluateImpl( + const std::vector& vector1, const std::vector& vector2) const { + double r = 0.0; + auto it1 = vector1.cbegin(); + auto it2 = vector2.cbegin(); + + for (size_t i = 0; i < vector1.size(); ++i, ++it1, ++it2) { + r += std::abs( it1->getDouble() - it2->getDouble() ); + } + + return Value( r ); +} + +/* ------------------------- ExpressionNoOp ----------------------------- */ + +inline Value ExpressionNoOp::evaluateImpl( + const std::vector& vector1, const std::vector& vector2) const { + return Value( 0.0 ); +} +} + +#endif