# Lab 4 - Hybrid Search of title embeddings and full-text fields in Opensearch

In [22]:
require 'opensearch'
# require 'opensearch-transport'
require 'transformers-rb'
require 'tqdm'
require 'date'
require 'json'
require 'polars-df'

false

In [23]:
# https://github.com/opensearch-project/opensearch-ruby
host = 'opensearch-node'
port = 9200

$client = OpenSearch::Client.new(hosts: [{ host: host, port: port }])
info = $client.info
puts "Welcome to #{info['version']['distribution']} #{info['version']['number']}!"

Welcome to opensearch 2.11.0!


## Use the same model and method to get the query embedding, with some defaults changed
Remember, the model is `intfloat/e5-small-v2` and we need to prefix any query with 'query:'

In [24]:
# The E5 models expect 'query:' and 'passage:' prefixes
# model = SentenceTransformer.new('intfloat/e5-small-v2')
# def get_embeddings(texts, prefix: "query: ")
#     # The E5 models expects either 'query: ' or 'passage: ' prefix
#     texts = [texts] unless texts.is_a?(Array)
#     prefixed = texts.map { |text| "#{prefix}#{text}" }
#     model.encode(prefixed, show_progress_bar: false)
# end

In [25]:
# The E5 models expect 'query:' and 'passage:' prefixes
$model = Transformers.pipeline("embedding", 'intfloat/e5-small-v2')


def get_embeddings(texts, prefix: "passage: ")
  # puts texts
  texts = [texts] unless texts.is_a?(Array)
  total = texts.length
  embeddings = []
  
  texts.each_with_index do |text, i|
    prefixed_text = "#{prefix}#{text}"
    embedding = $model.(prefixed_text)
    embeddings << embedding
    
    percent = ((i + 1).to_f / total * 100).to_i
    print "\rProcessing embeddings: #{percent}% (#{i + 1}/#{total})"
  end
  
  print "\nDone!\n"
  embeddings
end

:get_embeddings

## Define different query types
Here we define Opensearch query bodies for:
 - BM25
 - KNN
 - Hybrid

In [26]:
def get_bm25_body(querystring)
    {
      "query" => {
        "bool" => {
          "should" => [
            {
              "multi_match" => {
                "query" => querystring,
                "type" => "cross_fields",
                "fields" => ["description"],
                "boost" => 1.0
              }
            },
            {
              "multi_match" => {
                "query" => querystring,
                "type" => "cross_fields",
                "fields" => ["title"],
                "boost" => 1.1
              }
            },
            {
              "multi_match" => {
                "query" => querystring,
                "type" => "cross_fields",
                "fields" => ["title_exactish"],
                "boost" => 1.2
              }
            }
          ]
        }
      },
      "_source" => {"exclude" => ["title_embedding"]}
    }
end

:get_bm25_body

In [27]:
def get_knn_body(querystring)
    embeddings = get_embeddings(querystring)
    {
      "query" => {
        "bool" => {
          "should" => [
            {
              "knn" => {
                "title_embedding" => {
                  "vector" => embeddings[0],
                  "k" => 20
                }
              }
            }
          ]
        }
      },
      "_source" => {"exclude" => ["title_embedding"]}
    }
end

:get_knn_body

## Hybrid Query

In [28]:
def get_hybrid_body(querystring)
    embeddings = get_embeddings(querystring)
    {
      "query" => {
        "hybrid" => {
          "queries" => [
            {
              "bool" => {
                "should" => [
                  {
                    "multi_match" => {
                      "query" => querystring,
                      "type" => "cross_fields",
                      "fields" => ["description"],
                      "boost" => 1.0
                    }
                  },
                  {
                    "multi_match" => {
                      "query" => querystring,
                      "type" => "cross_fields",
                      "fields" => ["title"],
                      "boost" => 1.1
                    }
                  },
                  {
                    "multi_match" => {
                      "query" => querystring,
                      "type" => "cross_fields",
                      "fields" => ["title_exactish"],
                      "boost" => 1.2
                    }
                  }
                ]
              }        
            },
            {
              "knn" => {
                "title_embedding" => {
                  "vector" => embeddings[0],
                  "k" => 100
                }
              }
            }
          ]
        }
      },
      "_source" => {"exclude" => ["title_embedding"]}
    }
end

:get_hybrid_body

In [76]:
def serps(querystring, resp, k: 5, show: true)
    # this doesn't match the value in the other notebook.
    count = resp["hits"]["total"]["value"]
    puts resp
    results = resp["hits"]["hits"]
    
    html_str = "<h4>Showing #{count} Results for <em>#{querystring}</em></h4><ol>"
    
    results[0...k].each do |result|
        score = result["_score"]
        title = result["_source"]["title"] || "No title"
        url = result["_source"]["url"] || "No URL"
        description = result["_source"]["description"]
        text = result["_source"]["text"] || ""
        snippet = description || "#{text[0...140]}..."
        
        html_str += "<li><b>#{title}</b>(#{score})<br>#{snippet}<br>"
        html_str += "<span style=\"font-size:0.8em\"><a href=\"#{url}\">#{url}</a></span></li>"
    end
    
    html_str += "</ol>"
    
    if show
        IRuby.display(IRuby.html(html_str))
    else
        html_str
    end
end

:serps

In [77]:
def make_normalization_pipeline(name, bm25_weight: 0.5, knn_weight: 0.5)
    body = {
      "description" => "Post processor for hybrid search with bm25=#{bm25_weight} and knn=#{knn_weight}",
      "phase_results_processors" => [
        {
          "normalization-processor" => {
            "normalization" => {
              "technique" => "min_max"
            },
            "combination" => {
              "technique" => "arithmetic_mean",
              "parameters" => {
                "weights" => [
                  bm25_weight,
                  knn_weight
                ]
              }
            }
          }
        }
      ]
    }
    resp = $client.transport.perform_request(method: "PUT", url: "/_search/pipeline/#{name}", params: {}, body: body)
end

:make_normalization_pipeline

In [78]:
def make_normalization_pipeline(name, bm25_weight: 0.5, knn_weight: 0.5)
  body = {
    "description" => "Post processor for hybrid search with bm25=#{bm25_weight} and knn=#{knn_weight}",
    "phase_results_processors" => [
      {
        "normalization-processor" => {
          "normalization" => {
            "technique" => "min_max"
          },
          "combination" => {
            "technique" => "arithmetic_mean",
            "parameters" => {
              "weights" => [
                bm25_weight,
                knn_weight
              ]
            }
          }
        }
      }
    ]
  }
  
  # Changed to use positional parameters instead of named parameters
  resp = $client.transport.perform_request(
    "PUT",                              # method
    "/_search/pipeline/#{name}",        # path
    {},                                 # params
    body                                # body
  )
end

:make_normalization_pipeline

In [79]:
make_normalization_pipeline("nlp-search-pipeline-equal", bm25_weight: 0.5, knn_weight: 0.5)
make_normalization_pipeline("nlp-search-pipeline-bm25-heavy", bm25_weight: 0.6, knn_weight: 0.4)
make_normalization_pipeline("nlp-search-pipeline-knn-heavy", bm25_weight: 0.4, knn_weight: 0.6)

#<OpenSearch::Transport::Transport::Response:0x00007ffff836fcb8 @status=200, @body={"acknowledged"=>true}, @headers={"content-type"=>"application/json; charset=UTF-8", "content-encoding"=>"gzip", "content-length"=>"47"}>

In [80]:
# def search(querystring, body, pipeline: "nlp-search-pipeline-equal", show: true)
#     resp = $client.search(body: body, index: "ai-search", params: {"search_pipeline" => pipeline})
#     serps(querystring, resp) if show
#     resp
# end

def search(querystring, body, pipeline: "nlp-search-pipeline-equal", show: true)
  resp = $client.search({
    index: "ai-search",
    body: body,
    search_pipeline: pipeline  # Moved from params into main options hash
  })
  serps(querystring, resp) if show
  resp
end

def search_bm25(querystring, show: true)
    body = get_bm25_body(querystring)
    search(querystring, body, show: show)
end

def search_knn(querystring, show: true)
    body = get_knn_body(querystring)
    search(querystring, body, show: show)
end

# def search_hybrid(querystring, pipeline: "nlp-search-pipeline-equal", show: true)
#     body = get_hybrid_body(querystring)
#     search(querystring, body, pipeline: pipeline, show: show)
# end

def search_hybrid(querystring, pipeline: "nlp-search-pipeline-equal", show: true)
  body = get_hybrid_body(querystring)
  search(querystring, body, pipeline: pipeline, show: show)
end

:search_hybrid

In [81]:
resp = search_hybrid("crypto scandal")

Processing embeddings: 100% (1/1)
Done!
{"took"=>7, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>10, "relation"=>"eq"}, "max_score"=>0.5, "hits"=>[{"_index"=>"ai-search", "_id"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "_score"=>0.5, "_source"=>{"date"=>nil, "image_url"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "domain"=>"www.sparkspread.com", "description"=>"Real time energy financing and trading news", "text"=>"June 14, 2017 16:48 EST\nWelcome to SparkSpread.com\nThis page is only available to subscribers.\nIf you are already a subscriber, please log in below...\nLog in\nIf you would like to register for a free trial to SparkSpread.com, click here", "title"=>"Real time energy financing and trading news", "url"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fspa

{"took"=>7, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>10, "relation"=>"eq"}, "max_score"=>0.5, "hits"=>[{"_index"=>"ai-search", "_id"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "_score"=>0.5, "_source"=>{"date"=>nil, "image_url"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "domain"=>"www.sparkspread.com", "description"=>"Real time energy financing and trading news", "text"=>"June 14, 2017 16:48 EST\nWelcome to SparkSpread.com\nThis page is only available to subscribers.\nIf you are already a subscriber, please log in below...\nLog in\nIf you would like to register for a free trial to SparkSpread.com, click here", "title"=>"Real time energy financing and trading news", "url"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878"}}

In [82]:
resp = search_bm25("crypto scandal")

{"took"=>3, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>2, "relation"=>"eq"}, "max_score"=>22.885584, "hits"=>[{"_index"=>"ai-search", "_id"=>"https://www.taiwannews.com.tw/en/news/3354788", "_score"=>22.885584, "_source"=>{"date"=>"2018-01-31T21:08:00", "image_url"=>"https://tnimage.taiwannews.com.tw/photos/shares/AP/20180203/95534fa56fb24671a669c62c3da71149.jpg", "domain"=>"www.taiwannews.com.tw", "description"=>"Putin to athletes: Ignore doping scandals at Olympics", "text"=>"MOSCOW (AP) — Russian President Vladimir Putin has told the country's athletes to forget about doping scandals when they compete at the Pyeongchang Olympics.\nAs punishment for what it deemed a doping scheme during the 2014 Sochi Olympics, the International Olympic Committee has invited 169 Russians to compete under a neutral flag using the name \"Olympic Athletes from Russia.\"\nAt a meeting with athletes at his presidential residence out

{"took"=>3, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>2, "relation"=>"eq"}, "max_score"=>22.885584, "hits"=>[{"_index"=>"ai-search", "_id"=>"https://www.taiwannews.com.tw/en/news/3354788", "_score"=>22.885584, "_source"=>{"date"=>"2018-01-31T21:08:00", "image_url"=>"https://tnimage.taiwannews.com.tw/photos/shares/AP/20180203/95534fa56fb24671a669c62c3da71149.jpg", "domain"=>"www.taiwannews.com.tw", "description"=>"Putin to athletes: Ignore doping scandals at Olympics", "text"=>"MOSCOW (AP) — Russian President Vladimir Putin has told the country's athletes to forget about doping scandals when they compete at the Pyeongchang Olympics.\nAs punishment for what it deemed a doping scheme during the 2014 Sochi Olympics, the International Olympic Committee has invited 169 Russians to compete under a neutral flag using the name \"Olympic Athletes from Russia.\"\nAt a meeting with athletes at his presidential residence out

In [83]:
resp = search_knn("crypto scandal")

Processing embeddings: 100% (1/1)
Done!
{"took"=>4, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>20, "relation"=>"eq"}, "max_score"=>0.88546175, "hits"=>[{"_index"=>"ai-search", "_id"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "_score"=>0.88546175, "_source"=>{"date"=>nil, "image_url"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "domain"=>"www.sparkspread.com", "description"=>"Real time energy financing and trading news", "text"=>"June 14, 2017 16:48 EST\nWelcome to SparkSpread.com\nThis page is only available to subscribers.\nIf you are already a subscriber, please log in below...\nLog in\nIf you would like to register for a free trial to SparkSpread.com, click here", "title"=>"Real time energy financing and trading news", "url"=>"http://www.sparkspread.com/login.php?return=%2Fsparksp

{"took"=>4, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>20, "relation"=>"eq"}, "max_score"=>0.88546175, "hits"=>[{"_index"=>"ai-search", "_id"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "_score"=>0.88546175, "_source"=>{"date"=>nil, "image_url"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878", "domain"=>"www.sparkspread.com", "description"=>"Real time energy financing and trading news", "text"=>"June 14, 2017 16:48 EST\nWelcome to SparkSpread.com\nThis page is only available to subscribers.\nIf you are already a subscriber, please log in below...\nLog in\nIf you would like to register for a free trial to SparkSpread.com, click here", "title"=>"Real time energy financing and trading news", "url"=>"http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%2

In [84]:
def search_compare(querystring, pipeline: "nlp-search-pipeline-equal")
    bm25 = serps(querystring, search_bm25(querystring, show: false), k: 5, show: false)
    knn = serps(querystring, search_knn(querystring, show: false), k: 5, show: false)
    hybrid = serps(querystring, search_hybrid(querystring, pipeline: pipeline, show: false), k: 5, show: false)
    
    html_all = <<-HTML
        <style>
            .compare li {overflow-x:hidden;width:320px!important;text-align:left;height:200px;border-bottom:1px solid #333;}
        </style>
        <table class="compare">
            <tr><td>BM25</td><td>KNN</td><td>Hybrid (#{pipeline})</td></tr>
            <tr><td>#{bm25}</td><td>#{knn}</td><td>#{hybrid}</td></tr>
        </table>
    HTML
    
    IRuby.display(IRuby.html(html_all))
end

:search_compare

In [85]:
search_compare("property market", pipeline: "nlp-search-pipeline-equal")

{"took"=>7, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>21, "relation"=>"eq"}, "max_score"=>19.945515, "hits"=>[{"_index"=>"ai-search", "_id"=>"https://www.techrepublic.com/blog/microsoft-office/use-windows-tags-property-to-manage-office-files/", "_score"=>19.945515, "_source"=>{"date"=>"2018-02-02T00:00:00", "image_url"=>"https://tr2.cbsistatic.com/hub/i/r/2018/02/02/60e23d1b-2b58-42b0-bf52-0325cc60be29/thumbnail/770x578/44fc5bb146ee55bef46f5c38e696aaa5/picstudioistock-686632992.jpg", "domain"=>"www.techrepublic.com", "description"=>"Searching, grouping, and even filtering files is easier if you know how to use the Windows Tags property.", "text"=>"Sometimes you don't know you need a feature until you discover it and put it to use for a bit. Then you wonder how you ever got your work done without it! That's how you might feel about Windows Tags. Yes, it's a Windows feature, but you can use Tags to manage Office f

Processing embeddings: 100% (1/1)
Done!
{"took"=>4, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>20, "relation"=>"eq"}, "max_score"=>0.87082547, "hits"=>[{"_index"=>"ai-search", "_id"=>"https://www.taiwannews.com.tw/en/news/3385834", "_score"=>0.87082547, "_source"=>{"date"=>"2018-03-20T06:09:00", "image_url"=>"https://www.taiwannews.com.tw/images/category/580888eb17740.jpg", "domain"=>"www.taiwannews.com.tw", "description"=>"Business Highlights", "text"=>"___\nHow Facebook likes could profile voters for manipulation\nNEW YORK (AP) — Facebook likes can tell a lot about a person. Maybe even enough to fuel a voter-manipulation effort like the one a Trump-affiliated data-mining firm stands accused of — and which Facebook may have enabled. A Trump-affiliated group, Cambridge Analytica, reportedly tapped similar techniques to try to influence elections using data, including likes, inappropriately obtained on tens of mil

Processing embeddings: 100% (1/1)
Done!
{"took"=>6, "timed_out"=>false, "_shards"=>{"total"=>1, "successful"=>1, "skipped"=>0, "failed"=>0}, "hits"=>{"total"=>{"value"=>10, "relation"=>"eq"}, "max_score"=>0.5, "hits"=>[{"_index"=>"ai-search", "_id"=>"http://www.taiwannews.com.tw/en/news/3095159", "_score"=>0.5, "_source"=>{"date"=>"2017-02-15T07:51:00", "image_url"=>"http://www.taiwannews.com.tw/images/category/580888eb17740.jpg", "domain"=>"www.taiwannews.com.tw", "description"=>"Business Highlights", "text"=>"___\n2 big insurance breakups on Valentine's Day\nCigna says it is ending Anthem's proposed, $48-billion acquisition bid and seeking billions in damages from the Blue Cross-Blue Shield insurer. The announcement comes hours after another major insurer, Aetna Inc., said it was abandoning its planned, $34-billion purchase of Medicare Advantage provider Humana Inc.\n___\nYellen: Expect Fed to resume raising rates in coming months\nWASHINGTON (AP) — Federal Reserve Chair Janet Yellen

0,1,2
BM25,KNN,Hybrid (nlp-search-pipeline-equal)
"Showing 21 Results for property marketHow to use the Windows Tags property to manage Office files(19.945515) Searching, grouping, and even filtering files is easier if you know how to use the Windows Tags property. https://www.techrepublic.com/blog/microsoft-office/use-windows-tags-property-to-manage-office-files/Global GIS Market in Telecom Sector - Use of GIS and Big Data is an Emerging Trend in the Market(16.333023) Global GIS Market in Telecom Sector - Use of GIS and Big Data is an Emerging Trend in the Market | Technavio https://www.taiwannews.com.tw/en/news/3413697Global Allergy Immunotherapies Market - Industry Analysis and Forecast(14.88765) Global Allergy Immunotherapies Market - Industry Analysis and Forecast | Technavio https://www.taiwannews.com.tw/en/news/3444398Top Factors Driving the Global Potassium Sulfate Market(14.88765) Top Factors Driving the Global Potassium Sulfate Market | Technavio https://www.taiwannews.com.tw/en/news/3413580Technological Advances to Drive the Global Dental Handpieces Market(14.643564) Technological Advances to Drive the Global Dental Handpieces Market| Technavio https://www.taiwannews.com.tw/en/news/3444389",Showing 20 Results for property marketBusiness Highlights(0.87082547) Business Highlights https://www.taiwannews.com.tw/en/news/3385834Business Highlights(0.87082547) Business Highlights http://www.taiwannews.com.tw/en/news/3095159Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20881,"Showing 10 Results for property marketBusiness Highlights(0.5) Business Highlights http://www.taiwannews.com.tw/en/news/3095159Business Highlights(0.5) Business Highlights https://www.taiwannews.com.tw/en/news/3385834How to use the Windows Tags property to manage Office files(0.5) Searching, grouping, and even filtering files is easier if you know how to use the Windows Tags property. https://www.techrepublic.com/blog/microsoft-office/use-windows-tags-property-to-manage-office-files/Real time energy financing and trading news(0.47490948) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.47490948) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879"


In [39]:
search_compare("property market", pipeline: "nlp-search-pipeline-bm25-heavy")

Processing embeddings: 100% (1/1)
Done!
Processing embeddings: 100% (1/1)
Done!


0,1,2
BM25,KNN,Hybrid (nlp-search-pipeline-bm25-heavy)
"Showing 21 Results for property marketHow to use the Windows Tags property to manage Office files(19.945515) Searching, grouping, and even filtering files is easier if you know how to use the Windows Tags property. https://www.techrepublic.com/blog/microsoft-office/use-windows-tags-property-to-manage-office-files/Global GIS Market in Telecom Sector - Use of GIS and Big Data is an Emerging Trend in the Market(16.333023) Global GIS Market in Telecom Sector - Use of GIS and Big Data is an Emerging Trend in the Market | Technavio https://www.taiwannews.com.tw/en/news/3413697Global Allergy Immunotherapies Market - Industry Analysis and Forecast(14.88765) Global Allergy Immunotherapies Market - Industry Analysis and Forecast | Technavio https://www.taiwannews.com.tw/en/news/3444398Top Factors Driving the Global Potassium Sulfate Market(14.88765) Top Factors Driving the Global Potassium Sulfate Market | Technavio https://www.taiwannews.com.tw/en/news/3413580Technological Advances to Drive the Global Dental Handpieces Market(14.643564) Technological Advances to Drive the Global Dental Handpieces Market| Technavio https://www.taiwannews.com.tw/en/news/3444389",Showing 20 Results for property marketBusiness Highlights(0.87082547) Business Highlights https://www.taiwannews.com.tw/en/news/3385834Business Highlights(0.87082547) Business Highlights http://www.taiwannews.com.tw/en/news/3095159Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20881,"Showing 10 Results for property marketHow to use the Windows Tags property to manage Office files(0.6) Searching, grouping, and even filtering files is easier if you know how to use the Windows Tags property. https://www.techrepublic.com/blog/microsoft-office/use-windows-tags-property-to-manage-office-files/Business Highlights(0.4) Business Highlights http://www.taiwannews.com.tw/en/news/3095159Business Highlights(0.4) Business Highlights https://www.taiwannews.com.tw/en/news/3385834Real time energy financing and trading news(0.3799276) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.3799276) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879"


In [40]:
search_compare("property market", pipeline: "nlp-search-pipeline-knn-heavy")

Processing embeddings: 100% (1/1)
Done!
Processing embeddings: 100% (1/1)
Done!


0,1,2
BM25,KNN,Hybrid (nlp-search-pipeline-knn-heavy)
"Showing 21 Results for property marketHow to use the Windows Tags property to manage Office files(19.945515) Searching, grouping, and even filtering files is easier if you know how to use the Windows Tags property. https://www.techrepublic.com/blog/microsoft-office/use-windows-tags-property-to-manage-office-files/Global GIS Market in Telecom Sector - Use of GIS and Big Data is an Emerging Trend in the Market(16.333023) Global GIS Market in Telecom Sector - Use of GIS and Big Data is an Emerging Trend in the Market | Technavio https://www.taiwannews.com.tw/en/news/3413697Global Allergy Immunotherapies Market - Industry Analysis and Forecast(14.88765) Global Allergy Immunotherapies Market - Industry Analysis and Forecast | Technavio https://www.taiwannews.com.tw/en/news/3444398Top Factors Driving the Global Potassium Sulfate Market(14.88765) Top Factors Driving the Global Potassium Sulfate Market | Technavio https://www.taiwannews.com.tw/en/news/3413580Technological Advances to Drive the Global Dental Handpieces Market(14.643564) Technological Advances to Drive the Global Dental Handpieces Market| Technavio https://www.taiwannews.com.tw/en/news/3444389",Showing 20 Results for property marketBusiness Highlights(0.87082547) Business Highlights https://www.taiwannews.com.tw/en/news/3385834Business Highlights(0.87082547) Business Highlights http://www.taiwannews.com.tw/en/news/3095159Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879Real time energy financing and trading news(0.8686811) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20881,Showing 10 Results for property marketBusiness Highlights(0.6) Business Highlights http://www.taiwannews.com.tw/en/news/3095159Business Highlights(0.6) Business Highlights https://www.taiwannews.com.tw/en/news/3385834Real time energy financing and trading news(0.5698914) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.5698914) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879Real time energy financing and trading news(0.5698914) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20881


In [41]:
search_compare("crypto scandal")

Processing embeddings: 100% (1/1)
Done!
Processing embeddings: 100% (1/1)
Done!


0,1,2
BM25,KNN,Hybrid (nlp-search-pipeline-equal)
"Showing 2 Results for crypto scandalPutin to athletes: Ignore doping scandals at Olympics(22.885584) Putin to athletes: Ignore doping scandals at Olympics https://www.taiwannews.com.tw/en/news/3354788More than 4,000 students and teachers to be compensated for Taiwan food safety scandal(11.866891) Tainted oil from Chang Guann turned up in food at 22 schools in 2014.A total of 4,048 students and teachers can soon expect to receive between NT$3,000 (US$98) and NT$9,000 each as compensation for the consumption of tainted cooking oil from Chang Guann. https://www.taiwannews.com.tw/en/news/3473912","Showing 20 Results for crypto scandalReal time energy financing and trading news(0.88546175) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.88546175) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879Real time energy financing and trading news(0.88546175) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20881Real time energy financing and trading news(0.88546175) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20254আরিব্বাস, জাল ডাক্তার!(0.88258606) In recent past there has been many reported incidents of fake doctors being arrested from every nuke and corner of Bengal - a funny take on that http://banglalive.com/the-increasing-number-of-fake-doctors-and-their-treatment/",Showing 10 Results for crypto scandalReal time energy financing and trading news(0.5) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20878Real time energy financing and trading news(0.5) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20879Putin to athletes: Ignore doping scandals at Olympics(0.5) Putin to athletes: Ignore doping scandals at Olympics https://www.taiwannews.com.tw/en/news/3354788Real time energy financing and trading news(0.5) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20881Real time energy financing and trading news(0.5) Real time energy financing and trading news http://www.sparkspread.com/login.php?return=%2Fsparkspread.php%3Fsparkspread_concept%3Dstory%26id%3D20254


In [42]:
search_compare("US economic recovery")

Processing embeddings: 100% (1/1)
Done!
Processing embeddings: 100% (1/1)
Done!


0,1,2
BM25,KNN,Hybrid (nlp-search-pipeline-equal)
"Showing 82 Results for US economic recoveryTrump to herald economic progress in State of the Union(23.450245) Trump to herald economic progress in State of the Union https://www.taiwannews.com.tw/en/news/3354102BC-US--Sugar, US(14.562808) BC-US--Sugar, US https://www.taiwannews.com.tw/en/news/3385764BC-US--Gold, US(14.562808) BC-US--Gold, US https://www.taiwannews.com.tw/en/news/3354953BC-US--Copper, US(14.562808) BC-US--Copper, US https://www.taiwannews.com.tw/en/news/3354438BC-US--Cocoa, US(14.562808) BC-US--Cocoa, US https://www.taiwannews.com.tw/en/news/3385769","Showing 20 Results for US economic recoveryTrump to herald economic progress in State of the Union(0.8791541) Trump to herald economic progress in State of the Union https://www.taiwannews.com.tw/en/news/3354102How Fed hike will affect US consumers and overseas economies(0.8613023) How Fed hike will affect US consumers and overseas economies http://www.taiwannews.com.tw/en/news/3188359Text of President Donald Trump's State of the Union address(0.8606802) Text of President Donald Trump's State of the Union address https://www.taiwannews.com.tw/en/news/3354631BC-US--Index, US(0.8571551) BC-US--Index, US https://www.taiwannews.com.tw/en/news/3318227AP NewsAlert(0.85561657) AP NewsAlert http://www.taiwannews.com.tw/en/news/3188148","Showing 10 Results for US economic recoveryTrump to herald economic progress in State of the Union(1.0) Trump to herald economic progress in State of the Union https://www.taiwannews.com.tw/en/news/3354102How Fed hike will affect US consumers and overseas economies(0.3170116) How Fed hike will affect US consumers and overseas economies http://www.taiwannews.com.tw/en/news/3188359Text of President Donald Trump's State of the Union address(0.31063485) Text of President Donald Trump's State of the Union address https://www.taiwannews.com.tw/en/news/3354631BC-US--Index, US(0.27450064) BC-US--Index, US https://www.taiwannews.com.tw/en/news/3318227AP NewsAlert(0.2587302) AP NewsAlert http://www.taiwannews.com.tw/en/news/3188148"
