From 9fad0043ec2f8b4dfc26d5d4f8b7df2cfaef89b4 Mon Sep 17 00:00:00 2001 From: Jessica Garson Date: Thu, 17 Apr 2025 12:14:09 -0400 Subject: [PATCH] Add supporting blog content on the why and how for BBQ (#431) Co-authored-by: Carly Richmond <74931905+carlyrichmond@users.noreply.github.com> --- ...1-create-an-inference-endpoint-output.json | 18 + .../Outputs/02-embed-text-output.json | 392 +++++++++++++++++ ...03-create-byte-qauntized-index-output.json | 5 + ...e-and-simulate-ingest-pipeline-output.json | 408 ++++++++++++++++++ .../Outputs/05-bbq-index-output.json | 63 +++ .../Outputs/06-bbq-search-8-18-output.json | 35 ++ .../Outputs/07-raw-vector-output.json | 52 +++ .../01-create-an-inference-endpoint.json | 12 + .../Queries/02-embed-text.json | 4 + .../03-create-byte-qauntized-index.json | 17 + ...4-create-and-simulate-ingest-pipeline.json | 27 ++ .../how-and-why-bbq/Queries/05-bbq-index.json | 35 ++ .../Queries/06-bbq-search-8-18.json | 28 ++ .../Queries/07-raw-vector-index.json | 53 +++ .../how-and-why-bbq/README.md | 23 + 15 files changed, 1172 insertions(+) create mode 100644 supporting-blog-content/how-and-why-bbq/Outputs/01-create-an-inference-endpoint-output.json create mode 100644 supporting-blog-content/how-and-why-bbq/Outputs/02-embed-text-output.json create mode 100644 supporting-blog-content/how-and-why-bbq/Outputs/03-create-byte-qauntized-index-output.json create mode 100644 supporting-blog-content/how-and-why-bbq/Outputs/04-create-and-simulate-ingest-pipeline-output.json create mode 100644 supporting-blog-content/how-and-why-bbq/Outputs/05-bbq-index-output.json create mode 100644 supporting-blog-content/how-and-why-bbq/Outputs/06-bbq-search-8-18-output.json create mode 100644 supporting-blog-content/how-and-why-bbq/Outputs/07-raw-vector-output.json create mode 100644 supporting-blog-content/how-and-why-bbq/Queries/01-create-an-inference-endpoint.json create mode 100644 supporting-blog-content/how-and-why-bbq/Queries/02-embed-text.json create mode 100644 supporting-blog-content/how-and-why-bbq/Queries/03-create-byte-qauntized-index.json create mode 100644 supporting-blog-content/how-and-why-bbq/Queries/04-create-and-simulate-ingest-pipeline.json create mode 100644 supporting-blog-content/how-and-why-bbq/Queries/05-bbq-index.json create mode 100644 supporting-blog-content/how-and-why-bbq/Queries/06-bbq-search-8-18.json create mode 100644 supporting-blog-content/how-and-why-bbq/Queries/07-raw-vector-index.json create mode 100644 supporting-blog-content/how-and-why-bbq/README.md diff --git a/supporting-blog-content/how-and-why-bbq/Outputs/01-create-an-inference-endpoint-output.json b/supporting-blog-content/how-and-why-bbq/Outputs/01-create-an-inference-endpoint-output.json new file mode 100644 index 00000000..1c7de850 --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Outputs/01-create-an-inference-endpoint-output.json @@ -0,0 +1,18 @@ +{ + "inference_id": "my_e5_model", + "task_type": "text_embedding", + "service": "elasticsearch", + "service_settings": { + "num_threads": 1, + "model_id": ".multilingual-e5-small", + "adaptive_allocations": { + "enabled": true, + "min_number_of_allocations": 1 + } + }, + "chunking_settings": { + "strategy": "sentence", + "max_chunk_size": 250, + "sentence_overlap": 1 + } +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Outputs/02-embed-text-output.json b/supporting-blog-content/how-and-why-bbq/Outputs/02-embed-text-output.json new file mode 100644 index 00000000..f57adeeb --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Outputs/02-embed-text-output.json @@ -0,0 +1,392 @@ +{ + "text_embedding": [ + { + "embedding": [ + 0.017882707, + 0.00754774, + -0.05140426, + -0.0794881, + 0.07463729, + -0.03337092, + 0.053825304, + 0.0377874, + 0.0067667975, + 0.008241221, + 0.01010678, + 0.033864856, + 0.08292519, + -0.019575868, + -0.058219254, + 0.04311379, + 0.083135895, + -0.07104059, + -0.0647233, + -0.02983887, + 0.028270015, + 0.014934712, + -0.034972306, + 0.01985291, + 0.044561125, + 0.041011654, + -0.05760242, + 0.003273025, + 0.0108314445, + -0.069619864, + -0.06764081, + -0.021722095, + 0.050636988, + -0.05783613, + 0.08788557, + 0.040935434, + -0.031601917, + -0.07880007, + 0.0804327, + -0.08104602, + 0.010797213, + 0.025301866, + 0.05047634, + 0.060484745, + 0.020690108, + 0.034209967, + -0.048974272, + 0.07277602, + -0.05646165, + -0.04160833, + -0.011512723, + 0.07017709, + -0.0009960972, + 0.09233865, + -0.016680103, + -0.08042656, + -0.058609486, + -0.074038155, + -0.09787799, + 0.029524267, + 0.106590435, + 0.02814243, + -0.044649575, + 0.0635766, + 0.05412523, + 0.08160417, + 0.045977954, + 0.046586476, + -0.029980162, + -0.023234848, + -0.007620109, + 0.049971495, + -0.003910199, + -0.041484706, + 0.0044540297, + 0.04530542, + 0.043684065, + -0.056457844, + 0.044441435, + -0.0531048, + -0.0116769, + -0.036178127, + -0.08401818, + 0.015129895, + -0.04466162, + 0.04826808, + 0.039240766, + -0.06186057, + 0.07367377, + -0.018674573, + 0.073961996, + 0.05229366, + -0.054177817, + -0.06961572, + -0.075259306, + -0.02926739, + -0.0671777, + 0.03137406, + 0.09228432, + -0.05071651, + 0.06463561, + -0.013042693, + 0.010146317, + -0.05629183, + -0.03316972, + 0.042261325, + 0.05922734, + -0.06399453, + 0.03861958, + -0.03447209, + -0.011935472, + -0.0022027188, + 0.04597909, + 0.0276734, + -0.06637124, + -0.033742618, + -0.062120177, + -0.026836405, + 0.0589634, + -0.04745798, + 0.074215755, + -0.03509744, + -0.032141436, + -0.032578144, + -0.035351794, + -0.020173872, + 0.013632728, + 0.056301866, + 0.084614724, + 0.011942883, + 0.032317948, + 0.028599454, + 0.026472442, + 0.032935087, + 0.044419926, + 0.071029276, + 0.002326048, + 0.029171288, + 0.029059634, + -0.018400453, + -0.07531271, + 0.07322439, + -0.063976005, + 0.03636873, + 0.028450442, + 0.024329077, + 0.061258025, + 0.0006878879, + 0.121465504, + -0.037444018, + 0.0317553, + -0.052851383, + 0.059440065, + 0.031601388, + 0.028706929, + -0.02706372, + -0.06413192, + -0.059457105, + 0.023011703, + 0.058344685, + -0.095321335, + -0.07921855, + -0.08235325, + -0.010943376, + -0.07797337, + -0.06430494, + 0.029538997, + 0.07767968, + -0.07928501, + -0.038389068, + -0.043971557, + 0.043048978, + -0.05903736, + 0.039005242, + -0.011643711, + 0.04166799, + -0.05254361, + 0.053826477, + 0.0536851, + 0.05272543, + -0.00997026, + -0.00022187422, + -0.11801685, + -0.018299107, + -0.04725747, + -0.051873036, + -0.06563313, + 0.03288842, + 0.042040218, + -0.06340394, + 0.0020295405, + 0.027572919, + -0.03830274, + -0.10479678, + -0.064379975, + -0.014465894, + -0.024353584, + 0.057436146, + 0.051797643, + 0.018925602, + -0.023694647, + -0.036631647, + 0.03931117, + 0.035966646, + 0.075625904, + 0.07963725, + -0.0620353, + 0.017363677, + -0.0060705314, + 0.048133038, + 0.061502654, + -0.047099743, + -0.08938604, + 0.060441542, + -0.09073369, + -0.07462157, + -0.017155174, + 0.06851411, + -0.03466907, + -0.0070933076, + 0.09468233, + -0.034249544, + 0.03942899, + -0.034940742, + -0.040276565, + 0.042472836, + 0.07576726, + -0.056162287, + -0.08219915, + 0.021380004, + -0.024304898, + -0.004749481, + -0.074420616, + -0.023951275, + -0.06449255, + -0.06275474, + -0.024615264, + 0.051247228, + 0.056509957, + -0.02456814, + -0.014344284, + 0.0012114489, + 0.023018232, + -0.043505844, + 0.050323028, + 0.006390831, + -0.066652484, + 0.03025928, + -0.0017910877, + 0.04944026, + 0.015356705, + -0.04991971, + -0.060540304, + -0.10785267, + -0.018113675, + 0.00698045, + 0.07371833, + 0.06344217, + -0.068986714, + 0.03320045, + 0.035164386, + 0.019179309, + 0.08137483, + 0.055223003, + 0.061054908, + 0.050525956, + -0.05645053, + -0.05685308, + -0.06607613, + -0.025397861, + -0.037875056, + -0.020294027, + 0.03402069, + -0.07022249, + -0.07152459, + -0.03397254, + 0.010944378, + 0.09868991, + -0.051756073, + -0.056134425, + 0.026761072, + 0.02256256, + 0.038909007, + 0.05124409, + 0.04919525, + 0.0069734324, + -0.01927199, + 0.063995786, + -0.028564125, + -0.04289471, + -0.037628923, + -0.0906451, + 0.021897383, + -0.015711036, + 0.038053565, + 0.03104304, + -0.011339446, + 0.045803826, + -0.05910163, + 0.077166826, + -0.0056588943, + -0.056046203, + 0.06780512, + 0.06279508, + -0.036007978, + 0.036326967, + -0.008667612, + 0.04016127, + 0.014136794, + 0.05789559, + 0.07011927, + 0.023931358, + -0.044535495, + -0.062862635, + 0.037587166, + 0.03913124, + -0.023209842, + -0.065808654, + -0.07194348, + -0.046462987, + -0.026283259, + -0.052352536, + -0.04431873, + -0.043205053, + 0.07228551, + 0.03459116, + -0.034738187, + -0.07032639, + 0.045328703, + -0.010402172, + 0.015478816, + -0.054134957, + -0.013555574, + 0.0707372, + -0.03362771, + -0.06710364, + -0.017245375, + 0.040216405, + -0.057644565, + -0.09179435, + 0.023175245, + 0.03170296, + -0.025988122, + 0.049363554, + -0.052197084, + -0.04411872, + 0.02953782, + -0.011917903, + -0.038588587, + 0.044539195, + 0.08822522, + -0.093097664, + -0.009709117, + 0.07747781, + -0.049614716, + 0.06911009, + -0.07285798, + -0.0001584932, + 0.038182963, + 0.06512034, + -0.061488036, + 0.007169193, + 0.031480115, + 0.07539637, + 0.051582407, + 0.050536785, + -0.01973314, + 0.041353483, + 0.047523044, + -0.04574848, + 0.052923046, + 0.03476711, + -0.032278378, + 0.03399122, + -0.034692813, + -0.07219401, + -0.065978095, + 0.030079346, + -0.05844583, + 0.0062834006, + 0.010926522, + 0.065977775, + 0.027326902, + 0.057721164 + ] + } + ] +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Outputs/03-create-byte-qauntized-index-output.json b/supporting-blog-content/how-and-why-bbq/Outputs/03-create-byte-qauntized-index-output.json new file mode 100644 index 00000000..8b60187f --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Outputs/03-create-byte-qauntized-index-output.json @@ -0,0 +1,5 @@ +{ + "acknowledged": true, + "shards_acknowledged": true, + "index": "bbq-my-byte-quantized-index" +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Outputs/04-create-and-simulate-ingest-pipeline-output.json b/supporting-blog-content/how-and-why-bbq/Outputs/04-create-and-simulate-ingest-pipeline-output.json new file mode 100644 index 00000000..394b6333 --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Outputs/04-create-and-simulate-ingest-pipeline-output.json @@ -0,0 +1,408 @@ +{ + "acknowledged": true +} + +{ + "docs": [ + { + "doc": { + "_index": "_index", + "_version": "-3", + "_id": "_id", + "_source": { + "my_field": "my awesome text field", + "model_id": "my_e5_model", + "my_vector": [ + 0.014117230661213398, + 0.01686558872461319, + -0.008850667625665665, + -0.09943139553070068, + 0.07708954066038132, + -0.05163203552365303, + 0.031171923503279686, + 0.04832465201616287, + -0.020899513736367226, + -0.018053947016596794, + 0.027316104620695114, + 0.014158264733850956, + 0.11380597949028015, + -0.015239515341818333, + -0.06438341736793518, + 0.05711129680275917, + 0.11058849096298218, + -0.06255044043064117, + -0.0790872871875763, + -0.030549775809049606, + 0.040180306881666183, + 0.004955410957336426, + -0.04646746814250946, + 0.021012170240283012, + 0.040134672075510025, + 0.05860847607254982, + -0.010344144888222218, + 0.025695478543639183, + 0.01739308424293995, + -0.08437623828649521, + -0.06068837270140648, + -0.009244256652891636, + 0.040913231670856476, + -0.035049907863140106, + 0.1083223968744278, + 0.05680833384394646, + -0.05127633363008499, + -0.0781271755695343, + 0.09563241899013519, + -0.07934834063053131, + -0.0021085920743644238, + 0.011026011779904366, + 0.0575995035469532, + 0.055666204541921616, + 0.008698184974491596, + 0.052413638681173325, + -0.0530681312084198, + 0.04424352943897247, + -0.05874579772353172, + -0.04111277684569359, + -0.025011995807290077, + 0.05002828687429428, + 0.024922635406255722, + 0.09471873939037323, + 0.010852992534637451, + -0.05357332527637482, + -0.03864268958568573, + -0.06396279484033585, + -0.07514549791812897, + 0.014889034442603588, + 0.06409039348363876, + -0.010277634486556053, + -0.021992946043610573, + 0.0228470079600811, + 0.06578108668327332, + 0.06204702705144882, + 0.04228932037949562, + 0.04630361869931221, + -0.060939010232686996, + -0.026216907426714897, + -0.028264254331588745, + 0.04786313325166702, + -0.010688557289540768, + -0.0259508416056633, + 0.003618428483605385, + 0.05101398378610611, + 0.017485808581113815, + -0.07494461536407471, + 0.06631576269865036, + -0.04921891540288925, + -0.03283367678523064, + -0.0374385304749012, + -0.07021112740039825, + 0.01079645287245512, + -0.0577499158680439, + 0.03755074739456177, + 0.031208734959363937, + -0.058882080018520355, + 0.07008558511734009, + -0.0016942503862082958, + 0.048379648476839066, + 0.041923727840185165, + -0.07495062053203583, + -0.049852948635816574, + -0.08567177504301071, + -0.022148169577121735, + -0.06854794919490814, + 0.04855164512991905, + 0.08454372733831406, + -0.019299620762467384, + 0.05618176609277725, + -0.008493575267493725, + 0.03973887860774994, + -0.08072596788406372, + -0.04546397551894188, + 0.065889872610569, + 0.09201831370592117, + -0.06945619732141495, + 0.03968527913093567, + -0.021963248029351234, + -0.017526201903820038, + -0.004171743988990784, + 0.06078216806054115, + 0.02763814479112625, + -0.06340096890926361, + -0.00816992949694395, + -0.04951060563325882, + -0.0377911739051342, + 0.03692726045846939, + -0.05377606675028801, + 0.08344040811061859, + -0.013646180741488934, + -0.023783689364790916, + -0.02773597463965416, + -0.038830969482660294, + -0.022136112675070763, + 0.037402600049972534, + 0.05393223837018013, + 0.053188905119895935, + 0.0012201241916045547, + 0.021060524508357048, + 0.05806620046496391, + 0.03334949538111687, + 0.05800417810678482, + 0.05594460293650627, + 0.05958021432161331, + 0.0018745578126981854, + 0.04669446125626564, + 0.0005995833780616522, + -0.021198472008109093, + -0.05856002867221832, + 0.07705002278089523, + -0.08741088956594467, + 0.0471649169921875, + 0.01792702078819275, + 0.02156677469611168, + 0.04489818215370178, + 0.02181761898100376, + 0.09009192138910294, + -0.026028715074062347, + 0.049218643456697464, + -0.06272420287132263, + 0.04283611848950386, + 0.032210689038038254, + 0.01947902701795101, + -0.02781132608652115, + -0.06560772657394409, + -0.03242624178528786, + 0.04794762656092644, + 0.05626260116696358, + -0.057930923998355865, + -0.07916364818811417, + -0.07625249028205872, + -0.021680274978280067, + -0.06768984347581863, + -0.0395563468337059, + 0.011145554482936859, + 0.06499660015106201, + -0.08485601842403412, + -0.033887118101119995, + -0.051309872418642044, + 0.03737208992242813, + -0.08072631806135178, + 0.036131761968135834, + -0.0032977224327623844, + 0.061484336853027344, + -0.06715035438537598, + 0.053662654012441635, + 0.052834395319223404, + 0.040192462503910065, + -0.012980467639863491, + -0.01864015869796276, + -0.10407830774784088, + -0.020117323845624924, + -0.06154811754822731, + -0.045898959040641785, + -0.05987504497170448, + 0.03721935302019119, + -0.014564362354576588, + -0.07120151072740555, + -0.019318722188472748, + 0.027949335053563118, + -0.04139215871691704, + -0.08864161372184753, + -0.05543921887874603, + -0.021943338215351105, + -0.017345717176795006, + 0.05379396677017212, + 0.06460326164960861, + 0.003120285924524069, + -0.02253117598593235, + -0.04169423505663872, + 0.04711073637008667, + 0.037893567234277725, + 0.05186891183257103, + 0.09436721354722977, + -0.09036459028720856, + 0.020539160817861557, + -0.0013195073697715998, + 0.037375129759311676, + 0.06031087785959244, + -0.03964502364397049, + -0.07984009385108948, + 0.08090915530920029, + -0.10296571254730225, + -0.03074764832854271, + -0.017119836062192917, + 0.07522668689489365, + -0.04393104091286659, + 0.01065216027200222, + 0.07896833121776581, + -0.05151388421654701, + 0.06806302815675735, + -0.02601413056254387, + -0.026585636660456657, + 0.03885912522673607, + 0.05946934223175049, + -0.07516327500343323, + -0.09997010231018066, + 0.05006811395287514, + -0.0471038743853569, + -0.0030829140450805426, + -0.08138088881969452, + -0.026151059195399284, + -0.07786097377538681, + -0.05088263377547264, + -0.0025396491400897503, + 0.05392291769385338, + 0.0711384117603302, + -0.028657495975494385, + -0.009028338827192783, + -0.012254527769982815, + 0.028684021905064583, + -0.06996504217386246, + 0.05126490071415901, + -0.0034317406825721264, + -0.059986378997564316, + 0.03686971589922905, + -0.019696544855833054, + 0.04870177432894707, + 0.023220060393214226, + -0.05806862935423851, + -0.05559173971414566, + -0.08834340423345566, + -0.020225433632731438, + 0.010443247854709625, + 0.08161189407110214, + 0.05111324414610863, + -0.09018630534410477, + 0.021659476682543755, + 0.03562246263027191, + 0.014981559477746487, + 0.10940670967102051, + 0.04175182059407234, + 0.04251711443066597, + 0.02694385126233101, + -0.08188968151807785, + -0.05437199026346207, + -0.060208216309547424, + -0.054743148386478424, + -0.040658388286828995, + 0.015488430857658386, + 0.02630746178328991, + -0.08103477954864502, + -0.056114789098501205, + -0.029888926073908806, + 0.02763628587126732, + 0.10846730321645737, + -0.030491631478071213, + -0.0429709255695343, + 0.013818247243762016, + 0.020855089649558067, + 0.06539192795753479, + 0.0320354700088501, + 0.04852644354104996, + 0.00739345233887434, + -0.002488560276106, + 0.0442243330180645, + -0.020559733733534813, + -0.05292633920907974, + -0.06256753206253052, + -0.07259473949670792, + 0.030493004247546196, + -0.04630422219634056, + 0.056189533323049545, + 0.011653215624392033, + 0.021204788237810135, + 0.050748132169246674, + -0.044360484927892685, + 0.08774808049201965, + -0.028432324528694153, + -0.04178493097424507, + 0.04014130309224129, + 0.032405417412519455, + -0.04994963854551315, + 0.043360695242881775, + 0.011222160421311855, + 0.030981596559286118, + 0.0028910990804433823, + 0.05272964760661125, + 0.051144517958164215, + 0.0010531097650527954, + -0.05501853674650192, + -0.05875313654541969, + 0.03988390415906906, + 0.0460275299847126, + -0.03135129064321518, + -0.03215976431965828, + -0.05605093762278557, + -0.0543675497174263, + -0.046055153012275696, + -0.03556269407272339, + -0.03483748808503151, + -0.04381099343299866, + 0.04829642176628113, + 0.060529675334692, + -0.030956391245126724, + -0.06672877818346024, + 0.04131341353058815, + 0.020059049129486084, + 0.030499283224344254, + -0.057492446154356, + -0.011442874558269978, + 0.04339754953980446, + -0.021672511473298073, + -0.07497026026248932, + -0.030129842460155487, + 0.03924402967095375, + -0.07070865482091904, + -0.06562688201665878, + 0.01431313157081604, + 0.057927556335926056, + -0.033735163509845734, + 0.05423822999000549, + -0.04328663647174835, + -0.05953366681933403, + 0.02309931069612503, + 0.006716874893754721, + -0.06043805927038193, + 0.03666606917977333, + 0.07687642425298691, + -0.08779513835906982, + 0.018356066197156906, + 0.09742846339941025, + -0.023382343351840973, + 0.08747196942567825, + -0.05327869579195976, + 0.006299706641584635, + 0.0521012544631958, + 0.05386003479361534, + -0.0619797445833683, + 0.02068590372800827, + 0.05123700201511383, + 0.08105035126209259, + 0.05335316061973572, + 0.03420057147741318, + -0.038168229162693024, + 0.0334908701479435, + 0.022008391097187996, + -0.05659943073987961, + 0.051370225846767426, + 0.03789215162396431, + -0.01978694461286068, + 0.02210892364382744, + -0.04820941388607025, + -0.05846530944108963, + -0.07841888815164566, + 0.025156378746032715, + -0.07234911620616913, + 0.00008498232637066394, + 0.028960250318050385, + 0.060869671404361725, + 0.02083706296980381, + 0.05244610458612442 + ] + }, + "_ingest": { + "timestamp": "2025-04-02T16:12:58.965979531Z" + } + } + } + ] +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Outputs/05-bbq-index-output.json b/supporting-blog-content/how-and-why-bbq/Outputs/05-bbq-index-output.json new file mode 100644 index 00000000..3fc3028d --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Outputs/05-bbq-index-output.json @@ -0,0 +1,63 @@ +{ + "_index": "bbq-my-byte-quantized-index", + "_id": "1", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 2, + "failed": 0 + }, + "_seq_no": 0, + "_primary_term": 1 +} + +{ + "_index": "bbq-my-byte-quantized-index", + "_id": "2", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 2, + "failed": 0 + }, + "_seq_no": 1, + "_primary_term": 1 +} + +{ + "took": 154, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 0.9601393, + "hits": [ + { + "_index": "bbq-my-byte-quantized-index", + "_id": "1", + "_score": 0.9601393, + "_source": { + "my_field": "my awesome text field" + } + }, + { + "_index": "bbq-my-byte-quantized-index", + "_id": "2", + "_score": 0.8988675, + "_source": { + "my_field": "some other sentence" + } + } + ] + } +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Outputs/06-bbq-search-8-18-output.json b/supporting-blog-content/how-and-why-bbq/Outputs/06-bbq-search-8-18-output.json new file mode 100644 index 00000000..918f01e8 --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Outputs/06-bbq-search-8-18-output.json @@ -0,0 +1,35 @@ +{ + "took": 18, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 0.9632075, + "hits": [ + { + "_index": "bbq-my-byte-quantized-index", + "_id": "1", + "_score": 0.9632075, + "_source": { + "my_field": "my awesome text field" + } + }, + { + "_index": "bbq-my-byte-quantized-index", + "_id": "2", + "_score": 0.89579916, + "_source": { + "my_field": "some other sentence" + } + } + ] + } +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Outputs/07-raw-vector-output.json b/supporting-blog-content/how-and-why-bbq/Outputs/07-raw-vector-output.json new file mode 100644 index 00000000..1d1a715c --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Outputs/07-raw-vector-output.json @@ -0,0 +1,52 @@ +{ + "acknowledged": true, + "shards_acknowledged": true, + "index": "my-raw-vector-index" +} + +{ + "_index": "my-raw-vector-index", + "_id": "1", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 2, + "failed": 0 + }, + "_seq_no": 0, + "_primary_term": 1 +} + +{ + "_index": "my-raw-vector-index", + "_id": "2", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 2, + "failed": 0 + }, + "_seq_no": 1, + "_primary_term": 1 +} + +{ + "took": 11, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 0, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Queries/01-create-an-inference-endpoint.json b/supporting-blog-content/how-and-why-bbq/Queries/01-create-an-inference-endpoint.json new file mode 100644 index 00000000..9c8bb6e4 --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Queries/01-create-an-inference-endpoint.json @@ -0,0 +1,12 @@ +PUT _inference/text_embedding/my_e5_model +{ + "service": "elasticsearch", + "service_settings": { + "num_threads": 1, + "model_id": ".multilingual-e5-small", + "adaptive_allocations": { + "enabled": true, + "min_number_of_allocations": 1 + } + } +} diff --git a/supporting-blog-content/how-and-why-bbq/Queries/02-embed-text.json b/supporting-blog-content/how-and-why-bbq/Queries/02-embed-text.json new file mode 100644 index 00000000..19ce4100 --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Queries/02-embed-text.json @@ -0,0 +1,4 @@ +POST _inference/text_embedding/my_e5_model +{ + "input": "my awesome piece of text" +} diff --git a/supporting-blog-content/how-and-why-bbq/Queries/03-create-byte-qauntized-index.json b/supporting-blog-content/how-and-why-bbq/Queries/03-create-byte-qauntized-index.json new file mode 100644 index 00000000..27b3573a --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Queries/03-create-byte-qauntized-index.json @@ -0,0 +1,17 @@ +PUT bbq-my-byte-quantized-index +{ + "mappings": { + "properties": { + "my_field": { + "type": "text" + }, + "my_vector": { + "type": "dense_vector", + "dims": 384, + "index_options": { + "type": "bbq_hnsw" + } + } + } + } +} diff --git a/supporting-blog-content/how-and-why-bbq/Queries/04-create-and-simulate-ingest-pipeline.json b/supporting-blog-content/how-and-why-bbq/Queries/04-create-and-simulate-ingest-pipeline.json new file mode 100644 index 00000000..49b0325f --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Queries/04-create-and-simulate-ingest-pipeline.json @@ -0,0 +1,27 @@ +PUT _ingest/pipeline/my_inference_pipeline +{ + "processors": [ + { + "inference": { + "model_id": "my_e5_model", + "input_output": [ + { + "input_field": "my_field", + "output_field": "my_vector" + } + ] + } + } + ] +} + +POST _ingest/pipeline/my_inference_pipeline/_simulate +{ + "docs": [ + { + "_source": { + "my_field": "my awesome text field" + } + } + ] +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/Queries/05-bbq-index.json b/supporting-blog-content/how-and-why-bbq/Queries/05-bbq-index.json new file mode 100644 index 00000000..37653c18 --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Queries/05-bbq-index.json @@ -0,0 +1,35 @@ +PUT bbq-my-byte-quantized-index/_doc/1?pipeline=my_inference_pipeline +{ + "my_field": "my awesome text field" +} + +PUT bbq-my-byte-quantized-index/_doc/2?pipeline=my_inference_pipeline +{ + "my_field": "some other sentence" +} + +GET bbq-my-byte-quantized-index/_search +{ + "query": { + "bool": { + "must": [ + { + "knn": { + "field": "my_vector", + "query_vector_builder": { + "text_embedding": { + "model_id": "my_e5_model", + "model_text": "my awesome search field" + } + }, + "k": 10, + "num_candidates": 100 + } + } + ] + } + }, + "_source": [ + "my_field" + ] +} diff --git a/supporting-blog-content/how-and-why-bbq/Queries/06-bbq-search-8-18.json b/supporting-blog-content/how-and-why-bbq/Queries/06-bbq-search-8-18.json new file mode 100644 index 00000000..57789ff7 --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Queries/06-bbq-search-8-18.json @@ -0,0 +1,28 @@ +GET bbq-my-byte-quantized-index/_search +{ + "query": { + "bool": { + "must": [ + { + "knn": { + "field": "my_vector", + "query_vector_builder": { + "text_embedding": { + "model_id": "my_e5_model", + "model_text": "my awesome search field" + } + }, + "rescore_vector": { + "oversample": 3 + }, + "k": 10, + "num_candidates": 100 + } + } + ] + } + }, + "_source": [ + "my_field" + ] +} diff --git a/supporting-blog-content/how-and-why-bbq/Queries/07-raw-vector-index.json b/supporting-blog-content/how-and-why-bbq/Queries/07-raw-vector-index.json new file mode 100644 index 00000000..805591be --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/Queries/07-raw-vector-index.json @@ -0,0 +1,53 @@ +PUT my-raw-vector-index +{ + "mappings": { + "properties": { + "my_field": { + "type": "text" + }, + "my_vector": { + "type": "dense_vector", + "dims": 384, + "index_options": { + "type": "hnsw" + } + } + } + } +} + +PUT my-raw-vector-index/_doc/1?pipeline=my_inference_pipeline +{ + "my_field": "my awesome text field" +} + +PUT my-raw-vector-index/_doc/2?pipeline=my_inference_pipeline +{ + "my_field": "some other sentence" +} + +GET my-raw-vector-index/_search +{ + "query": { + "bool": { + "must": [ + { + "knn": { + "field": "my_vector", + "query_vector_builder": { + "text_embedding": { + "model_id": "my_e5_model", + "model_text": "my awesome search field" + } + }, + "k": 10, + "num_candidates": 100 + } + } + ] + } + }, + "_source": [ + "my_field" + ] +} \ No newline at end of file diff --git a/supporting-blog-content/how-and-why-bbq/README.md b/supporting-blog-content/how-and-why-bbq/README.md new file mode 100644 index 00000000..3e18c72c --- /dev/null +++ b/supporting-blog-content/how-and-why-bbq/README.md @@ -0,0 +1,23 @@ +# How to implement Better Binary Quantization (BBQ) into your use case and why you should + +Vector search underpins semantic search for text and similarity search for images, videos, or audio. It uses mathematical representations called vectors, which can be large and slow. Better Binary Quantization (BBQ) helps compress these vectors, enabling faster searching while maintaining accuracy. + +This repository contains all the queries corresponding to the article "How to implement Better Binary Quantization (BBQ) into your use case and why you should." This code demonstrates how to use BBQ and the `rescore_vector` feature, which automatically resizes vectors for quantized indices. + +## Prerequisites + +- Elasticsearch version 8.18 or higher (BBQ was introduced in 8.16, but `rescore_vector` is available from 8.18) +- A machine learning node in your cluster +- For Elastic Cloud serverless, select an instance optimized for vectors + +## How to use the code in this repoistory + +This repository has two folders, `Queries` and `Outputs`. `Queries` contain commands that you will run the queries from the [Kibana Dev Tools Console](https://www.elastic.co/guide/en/kibana/8.18/console-kibana.html), while `Outputs` has the corresponding JSON outputs of those commands. + +## Troubleshooting + +If you run into issues around your trained model not being allocated to any nodes, you may need to start your model manually. + +``` +POST _ml/trained_models/.multilingual-e5-small/deployment/_start +``` \ No newline at end of file