Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Added file with the full DSL example, used for documentation/website

  • Loading branch information...
commit ad8b38fb3e832817d76f8e5730eb092c4ca925d0 1 parent ae131a5
Karel Minarik karmi authored

Showing 3 changed files with 353 additions and 8 deletions. Show diff stats Hide diff stats

  1. +1 0  .gitignore
  2. +10 8 Rakefile
  3. +342 0 examples/slingshot-dsl.rb
1  .gitignore
@@ -5,3 +5,4 @@ pkg/*
5 5 rdoc/
6 6 coverage/
7 7 scratch/
  8 +examples/*.html
18 Rakefile
@@ -53,14 +53,8 @@ end
53 53
54 54 namespace :web do
55 55
56   - desc "Generate and update website documentation"
57   - task :update do
58   - system "rocco examples/slingshot-dsl.rb"
59   - html = File.read('examples/slingshot-dsl.html').gsub!(/slingshot\-dsl\.rb/, 'slingshot.rb')
60   - File.open('examples/slingshot-dsl.html', 'w') { |f| f.write html }
61   - system "open examples/slingshot-dsl.html"
62   -
63   - # Update the Github website
  56 + desc "Update the Github website"
  57 + task :update => :generate do
64 58 current_branch = `git branch --no-color`.split("\n").select { |line| line =~ /^\* / }.to_s.gsub(/\* (.*)/, '\1')
65 59 (puts "Unable to determine current branch"; exit(1) ) unless current_branch
66 60 system "git stash save && git checkout web"
@@ -69,4 +63,12 @@ namespace :web do
69 63 system "git push origin web:gh-pages -f"
70 64 system "git checkout #{current_branch} && git stash pop"
71 65 end
  66 +
  67 + desc "Generate the Rocco documentation page"
  68 + task :generate do
  69 + system "rocco examples/slingshot-dsl.rb"
  70 + html = File.read('examples/slingshot-dsl.html').gsub!(/slingshot\-dsl\.rb/, 'slingshot.rb')
  71 + File.open('examples/slingshot-dsl.html', 'w') { |f| f.write html }
  72 + system "open examples/slingshot-dsl.html"
  73 + end
72 74 end
342 examples/slingshot-dsl.rb
... ... @@ -0,0 +1,342 @@
  1 +# **Slingshot** is a rich and comfortable Ruby API and DSL for the
  2 +# [_ElasticSearch_](http://www.elasticsearch.org/) search engine/database.
  3 +#
  4 +# _ElasticSearch_ is a scalable, distributed, highly-available,
  5 +# RESTful database communicating by JSON over HTTP, based on [Lucene](http://lucene.apache.org/),
  6 +# written in Java. It manages to be very simple and very powerful at the same time.
  7 +#
  8 +# By following these instructions you should have the search running
  9 +# on a sane operation system in less then 10 minutes.
  10 +
  11 +#### Installation
  12 +
  13 +# Install Slingshot with Rubygems.
  14 +#
  15 +# gem install slingshot-rb
  16 +#
  17 +require 'rubygems'
  18 +require 'slingshot'
  19 +
  20 +#### Prerequisites
  21 +
  22 +# You'll need a working and running _ElasticSearch_ server. Thankfully, that's easy.
  23 +( puts <<-"INSTALL" ; exit(1) ) unless RestClient.get('http://localhost:9200') rescue false
  24 + [!] You don’t appear to have ElasticSearch installed. Please install and launch it with the following commands.
  25 + curl -k -L -o elasticsearch-0.15.0.tar.gz http://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-0.15.0.tar.gz
  26 + tar -zxvf elasticsearch-0.15.0.tar.gz
  27 + ./elasticsearch-0.15.0/bin/elasticsearch -f
  28 +INSTALL
  29 +
  30 +### Simple Usage
  31 +
  32 +#### Storing and indexing documents
  33 +
  34 +# Let's initialize an index named “articles”.
  35 +Slingshot.index 'articles' do
  36 + # To make sure it's fresh, let's delete any existing index with the same name.
  37 + delete
  38 + # And then, let's create it.
  39 + create
  40 +
  41 + # We want to store and index some articles with title and tags. Simple Hashes are OK.
  42 + store :title => 'One', :tags => ['ruby'], :published_on => '2011-01-01'
  43 + store :title => 'Two', :tags => ['ruby', 'python'], :published_on => '2011-01-02'
  44 + store :title => 'Three', :tags => ['java'], :published_on => '2011-01-02'
  45 + store :title => 'Four', :tags => ['ruby', 'php'], :published_on => '2011-01-03'
  46 +
  47 + # We force refreshing the index, so we can query it immediately.
  48 + refresh
  49 +end
  50 +
  51 +# We may want to define a specific [mapping](http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html)
  52 +# for the index.
  53 +
  54 +Slingshot.index 'articles' do
  55 + # To do so, just pass a Hash containing the specified mapping to the `Index#create` method.
  56 + create :mappings => {
  57 + # Specify for which type of documents this mapping should be used (`article` in this case).
  58 + :article => {
  59 + :properties => {
  60 + # Specify the type of the field, whether it should be analyzed, etc.
  61 + :id => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
  62 + # Set the boost or analyzer settings for the field, et cetera. The _ElasticSearch_ guide
  63 + # has [more information](http://elasticsearch.org/guide/reference/mapping/index.html).
  64 + :title => { :type => 'string', :boost => 2.0, :analyzer => 'snowball' },
  65 + :tags => { :type => 'string', :analyzer => 'keyword' },
  66 + :content => { :type => 'string', :analyzer => 'snowball' }
  67 + }
  68 + }
  69 + }
  70 +end
  71 +
  72 +
  73 +
  74 +#### Searching
  75 +
  76 +# With the documents indexed and stored in the _ElasticSearch_ database, we want to search for them.
  77 +#
  78 +# Slingshot exposes the search interface via simple domain-specific language.
  79 +
  80 +
  81 +##### Simple Query String Searches
  82 +
  83 +# We can do simple searches, like searching for articles containing “One” in their title.
  84 +s = Slingshot.search('articles') do
  85 + query do
  86 + string "title:One"
  87 + end
  88 +end
  89 +
  90 +# The results:
  91 +# * One [tags: ruby]
  92 +s.results.each do |document|
  93 + puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
  94 +end
  95 +
  96 +# Of course, we may write the blocks in shorter notation.
  97 +
  98 +# Let's search for articles whose titles begin with letter “T”.
  99 +s = Slingshot.search('articles') { query { string "title:T*" } }
  100 +
  101 +# The results:
  102 +# * Two [tags: ruby, python]
  103 +# * Three [tags: java]
  104 +s.results.each do |document|
  105 + puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
  106 +end
  107 +
  108 +# We can use any valid [Lucene query syntax](http://lucene.apache.org/java/3_0_3/queryparsersyntax.html)
  109 +# for the query string queries.
  110 +
  111 +# For debugging, we can display the JSON which is being sent to _ElasticSearch_.
  112 +#
  113 +# {"query":{"query_string":{"query":"title:T*"}}}
  114 +#
  115 +puts "", "Query:", "-"*80
  116 +puts s.to_json
  117 +
  118 +# Or better, we may display a complete `curl` command, so we can execute it in terminal
  119 +# to see the raw output, tweak params and debug any problems.
  120 +#
  121 +# curl -X POST "http://localhost:9200/articles/_search?pretty=true" \
  122 +# -d '{"query":{"query_string":{"query":"title:T*"}}}'
  123 +#
  124 +puts "", "Try the query in Curl:", "-"*80
  125 +puts s.to_curl
  126 +
  127 +
  128 +##### Other Types of Queries
  129 +
  130 +# Of course, we may want to define our queries more expressively, for instance
  131 +# when we're searching for articles with specific _tags_.
  132 +
  133 +# Let's suppose we want to search for articles tagged “ruby” _or_ “python”.
  134 +# That's a great excuse to use a [_terms_](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
  135 +# query.
  136 +s = Slingshot.search('articles') do
  137 + query do
  138 + terms :tags, ['ruby', 'python']
  139 + end
  140 +end
  141 +
  142 +# The search, as expected, returns three articles, all tagged “ruby” — among other tags:
  143 +#
  144 +# * Two [tags: ruby, python]
  145 +# * One [tags: ruby]
  146 +# * Four [tags: ruby, php]
  147 +s.results.each do |document|
  148 + puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
  149 +end
  150 +
  151 +# What if we wanted to search for articles tagged both “ruby” _and_ “python”.
  152 +# That's a great excuse to specify `minimum_match` for the query.
  153 +s = Slingshot.search('articles') do
  154 + query do
  155 + terms :tags, ['ruby', 'python'], :minimum_match => 2
  156 + end
  157 +end
  158 +
  159 +# The search, as expected, returns one article, tagged with _both_ “ruby” and “python”:
  160 +#
  161 +# * Two [tags: ruby, python]
  162 +s.results.each do |document|
  163 + puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
  164 +end
  165 +
  166 +# _ElasticSearch_ allows us to do many more types of queries.
  167 +# Eventually, _Slingshot_ will support all of them.
  168 +# So far, only these are supported:
  169 +#
  170 +# * [term](http://elasticsearch.org/guide/reference/query-dsl/term-query.html)
  171 +# * [terms](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
  172 +
  173 +##### Faceted Search
  174 +
  175 +# _ElasticSearch_ makes it trivial to retrieve complex aggregated data from our index/database,
  176 +# so called [_facets_](http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Faceted-Search-Solr).
  177 +
  178 +# Let's say we want to display article counts for every tag in the database.
  179 +# For that, we'll use a _terms_ facet.
  180 +
  181 +#
  182 +s = Slingshot.search 'articles' do
  183 + # We will search for articles whose title begins with letter “T”,
  184 + query { string 'title:T*' }
  185 +
  186 + # and retrieve their counts “bucketed” by their `tags`.
  187 + facet 'tags' do
  188 + terms :tags
  189 + end
  190 +end
  191 +
  192 +# As we see, our query has found two articles, and if you recall our articles from above,
  193 +# _Two_ is tagged with “ruby” and “python”, _Three_ is tagged with “java”. So the counts
  194 +# won't surprise us:
  195 +# Found 2 articles: Three, Two
  196 +# Counts:
  197 +# -------
  198 +# ruby 1
  199 +# python 1
  200 +# java 1
  201 +puts "Found #{s.results.count} articles: #{s.results.map(&:title).join(', ')}"
  202 +puts "Counts based on tags:", "-"*25
  203 +s.results.facets['tags']['terms'].each do |f|
  204 + puts "#{f['term'].ljust(10)} #{f['count']}"
  205 +end
  206 +
  207 +# These counts are based on the scope of our current query (called `main` in _ElasticSearch_).
  208 +# What if we wanted to display aggregated counts by `tags` across the whole database?
  209 +
  210 +#
  211 +s = Slingshot.search 'articles' do
  212 + query { string 'title:T*' }
  213 +
  214 + facet 'global-tags' do
  215 + # That's where the `global` scope for a facet comes in.
  216 + terms :tags, :global => true
  217 + end
  218 +
  219 + # As you can see, we can even combine facets scoped
  220 + # to the current query with global facets.
  221 + facet 'current-tags' do
  222 + terms :tags
  223 + end
  224 +end
  225 +
  226 +# Aggregated results for the current query are the same as previously:
  227 +# Current query facets:
  228 +# -------------------------
  229 +# ruby 1
  230 +# python 1
  231 +# java 1
  232 +puts "Current query facets:", "-"*25
  233 +s.results.facets['current-tags']['terms'].each do |f|
  234 + puts "#{f['term'].ljust(10)} #{f['count']}"
  235 +end
  236 +
  237 +# As we see, aggregated results for the global scope include also
  238 +# tags for articles not matched by the query, such as “java” or “php”:
  239 +# Global facets:
  240 +# -------------------------
  241 +# ruby 3
  242 +# python 1
  243 +# php 1
  244 +# java 1
  245 +puts "Global facets:", "-"*25
  246 +s.results.facets['global-tags']['terms'].each do |f|
  247 + puts "#{f['term'].ljust(10)} #{f['count']}"
  248 +end
  249 +
  250 +# The real power of facets lies in their combination with
  251 +# [filters](http://elasticsearch.karmi.cz/guide/reference/api/search/filter.html),
  252 +# though:
  253 +
  254 +# > When doing things like facet navigation,
  255 +# > sometimes only the hits are needed to be filtered by the chosen facet,
  256 +# > and all the facets should continue to be calculated based on the original query.
  257 +
  258 +
  259 +##### Filtered Search
  260 +
  261 +# So, let's make out search a bit complex. Let's search for articles whose titles begin
  262 +# with letter “T”, again, but filter the results, so only the articles tagged “ruby”
  263 +# are returned.
  264 +s = Slingshot.search 'articles' do
  265 +
  266 + # We use the same **query** as before.
  267 + query { string 'title:T*' }
  268 +
  269 + # And add a _terms_ **filter** based on tags.
  270 + filter :terms, :tags => ['ruby']
  271 +
  272 + # And, of course, our facet definition.
  273 + facet('tags') { terms :tags }
  274 +
  275 +end
  276 +
  277 +# We see that only the article _Two_ (tagged “ruby” and “python”) was returned,
  278 +# _not_ the article _Three_ (tagged “java”):
  279 +#
  280 +# * Two [tags: ruby, python]
  281 +s.results.each do |document|
  282 + puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
  283 +end
  284 +
  285 +# However, count for article _Three_'s tags, “java”, _is_ in fact included in facets:
  286 +#
  287 +# Counts based on tags:
  288 +# -------------------------
  289 +# ruby 1
  290 +# python 1
  291 +# java 1
  292 +puts "Counts based on tags:", "-"*25
  293 +s.results.facets['tags']['terms'].each do |f|
  294 + puts "#{f['term'].ljust(10)} #{f['count']}"
  295 +end
  296 +
  297 +
  298 +##### Sorting
  299 +
  300 +# By default, the results are sorted according to their relevancy
  301 +# (available as the `_score` property).
  302 +
  303 +# But, what if we want to sort the results based on some other criteria,
  304 +# such as published date, price, etc? We can do that.
  305 +s = Slingshot.search 'articles' do
  306 + # We search for articles tagged “ruby”
  307 + query { string 'tags:ruby' }
  308 +
  309 + # And sort them by their `title`, in descending order.
  310 + sort { title 'desc' }
  311 +end
  312 +
  313 +# The results:
  314 +# * Two
  315 +# * One
  316 +# * Four
  317 +s.results.each do |document|
  318 + puts "* #{ document.title }"
  319 +end
  320 +
  321 +# Of course, it's possible to combine more fields in the sorting definition.
  322 +
  323 +s = Slingshot.search 'articles' do
  324 + # We will just get all articles for this case.
  325 + query { string '*' }
  326 +
  327 + sort do
  328 + # We will sort the results by their `published_on` property in ascending (default) order,
  329 + published_on
  330 + # and by their `title` property, in descending order.
  331 + title 'desc'
  332 + end
  333 +end
  334 +
  335 +# The results:
  336 +# * One (Published on: 2011-01-01)
  337 +# * Two (Published on: 2011-01-02)
  338 +# * Three (Published on: 2011-01-02)
  339 +# * Four (Published on: 2011-01-03)
  340 +s.results.each do |document|
  341 + puts "* #{ document.title.ljust(10) } (Published on: #{ document.published_on })"
  342 +end

0 comments on commit ad8b38f

Please sign in to comment.
Something went wrong with that request. Please try again.