Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added file with the full DSL example, used for documentation/website

  • Loading branch information...
commit ad8b38fb3e832817d76f8e5730eb092c4ca925d0 1 parent ae131a5
@karmi karmi authored
Showing with 353 additions and 8 deletions.
  1. +1 −0  .gitignore
  2. +10 −8 Rakefile
  3. +342 −0 examples/slingshot-dsl.rb
View
1  .gitignore
@@ -5,3 +5,4 @@ pkg/*
rdoc/
coverage/
scratch/
+examples/*.html
View
18 Rakefile
@@ -53,14 +53,8 @@ end
namespace :web do
- desc "Generate and update website documentation"
- task :update do
- system "rocco examples/slingshot-dsl.rb"
- html = File.read('examples/slingshot-dsl.html').gsub!(/slingshot\-dsl\.rb/, 'slingshot.rb')
- File.open('examples/slingshot-dsl.html', 'w') { |f| f.write html }
- system "open examples/slingshot-dsl.html"
-
- # Update the Github website
+ desc "Update the Github website"
+ task :update => :generate do
current_branch = `git branch --no-color`.split("\n").select { |line| line =~ /^\* / }.to_s.gsub(/\* (.*)/, '\1')
(puts "Unable to determine current branch"; exit(1) ) unless current_branch
system "git stash save && git checkout web"
@@ -69,4 +63,12 @@ namespace :web do
system "git push origin web:gh-pages -f"
system "git checkout #{current_branch} && git stash pop"
end
+
+ desc "Generate the Rocco documentation page"
+ task :generate do
+ system "rocco examples/slingshot-dsl.rb"
+ html = File.read('examples/slingshot-dsl.html').gsub!(/slingshot\-dsl\.rb/, 'slingshot.rb')
+ File.open('examples/slingshot-dsl.html', 'w') { |f| f.write html }
+ system "open examples/slingshot-dsl.html"
+ end
end
View
342 examples/slingshot-dsl.rb
@@ -0,0 +1,342 @@
+# **Slingshot** is a rich and comfortable Ruby API and DSL for the
+# [_ElasticSearch_](http://www.elasticsearch.org/) search engine/database.
+#
+# _ElasticSearch_ is a scalable, distributed, highly-available,
+# RESTful database communicating by JSON over HTTP, based on [Lucene](http://lucene.apache.org/),
+# written in Java. It manages to be very simple and very powerful at the same time.
+#
+# By following these instructions you should have the search running
+# on a sane operation system in less then 10 minutes.
+
+#### Installation
+
+# Install Slingshot with Rubygems.
+#
+# gem install slingshot-rb
+#
+require 'rubygems'
+require 'slingshot'
+
+#### Prerequisites
+
+# You'll need a working and running _ElasticSearch_ server. Thankfully, that's easy.
+( puts <<-"INSTALL" ; exit(1) ) unless RestClient.get('http://localhost:9200') rescue false
+ [!] You don’t appear to have ElasticSearch installed. Please install and launch it with the following commands.
+ curl -k -L -o elasticsearch-0.15.0.tar.gz http://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-0.15.0.tar.gz
+ tar -zxvf elasticsearch-0.15.0.tar.gz
+ ./elasticsearch-0.15.0/bin/elasticsearch -f
+INSTALL
+
+### Simple Usage
+
+#### Storing and indexing documents
+
+# Let's initialize an index named “articles”.
+Slingshot.index 'articles' do
+ # To make sure it's fresh, let's delete any existing index with the same name.
+ delete
+ # And then, let's create it.
+ create
+
+ # We want to store and index some articles with title and tags. Simple Hashes are OK.
+ store :title => 'One', :tags => ['ruby'], :published_on => '2011-01-01'
+ store :title => 'Two', :tags => ['ruby', 'python'], :published_on => '2011-01-02'
+ store :title => 'Three', :tags => ['java'], :published_on => '2011-01-02'
+ store :title => 'Four', :tags => ['ruby', 'php'], :published_on => '2011-01-03'
+
+ # We force refreshing the index, so we can query it immediately.
+ refresh
+end
+
+# We may want to define a specific [mapping](http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html)
+# for the index.
+
+Slingshot.index 'articles' do
+ # To do so, just pass a Hash containing the specified mapping to the `Index#create` method.
+ create :mappings => {
+ # Specify for which type of documents this mapping should be used (`article` in this case).
+ :article => {
+ :properties => {
+ # Specify the type of the field, whether it should be analyzed, etc.
+ :id => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
+ # Set the boost or analyzer settings for the field, et cetera. The _ElasticSearch_ guide
+ # has [more information](http://elasticsearch.org/guide/reference/mapping/index.html).
+ :title => { :type => 'string', :boost => 2.0, :analyzer => 'snowball' },
+ :tags => { :type => 'string', :analyzer => 'keyword' },
+ :content => { :type => 'string', :analyzer => 'snowball' }
+ }
+ }
+ }
+end
+
+
+
+#### Searching
+
+# With the documents indexed and stored in the _ElasticSearch_ database, we want to search for them.
+#
+# Slingshot exposes the search interface via simple domain-specific language.
+
+
+##### Simple Query String Searches
+
+# We can do simple searches, like searching for articles containing “One” in their title.
+s = Slingshot.search('articles') do
+ query do
+ string "title:One"
+ end
+end
+
+# The results:
+# * One [tags: ruby]
+s.results.each do |document|
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
+end
+
+# Of course, we may write the blocks in shorter notation.
+
+# Let's search for articles whose titles begin with letter “T”.
+s = Slingshot.search('articles') { query { string "title:T*" } }
+
+# The results:
+# * Two [tags: ruby, python]
+# * Three [tags: java]
+s.results.each do |document|
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
+end
+
+# We can use any valid [Lucene query syntax](http://lucene.apache.org/java/3_0_3/queryparsersyntax.html)
+# for the query string queries.
+
+# For debugging, we can display the JSON which is being sent to _ElasticSearch_.
+#
+# {"query":{"query_string":{"query":"title:T*"}}}
+#
+puts "", "Query:", "-"*80
+puts s.to_json
+
+# Or better, we may display a complete `curl` command, so we can execute it in terminal
+# to see the raw output, tweak params and debug any problems.
+#
+# curl -X POST "http://localhost:9200/articles/_search?pretty=true" \
+# -d '{"query":{"query_string":{"query":"title:T*"}}}'
+#
+puts "", "Try the query in Curl:", "-"*80
+puts s.to_curl
+
+
+##### Other Types of Queries
+
+# Of course, we may want to define our queries more expressively, for instance
+# when we're searching for articles with specific _tags_.
+
+# Let's suppose we want to search for articles tagged “ruby” _or_ “python”.
+# That's a great excuse to use a [_terms_](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
+# query.
+s = Slingshot.search('articles') do
+ query do
+ terms :tags, ['ruby', 'python']
+ end
+end
+
+# The search, as expected, returns three articles, all tagged “ruby” — among other tags:
+#
+# * Two [tags: ruby, python]
+# * One [tags: ruby]
+# * Four [tags: ruby, php]
+s.results.each do |document|
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
+end
+
+# What if we wanted to search for articles tagged both “ruby” _and_ “python”.
+# That's a great excuse to specify `minimum_match` for the query.
+s = Slingshot.search('articles') do
+ query do
+ terms :tags, ['ruby', 'python'], :minimum_match => 2
+ end
+end
+
+# The search, as expected, returns one article, tagged with _both_ “ruby” and “python”:
+#
+# * Two [tags: ruby, python]
+s.results.each do |document|
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
+end
+
+# _ElasticSearch_ allows us to do many more types of queries.
+# Eventually, _Slingshot_ will support all of them.
+# So far, only these are supported:
+#
+# * [term](http://elasticsearch.org/guide/reference/query-dsl/term-query.html)
+# * [terms](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
+
+##### Faceted Search
+
+# _ElasticSearch_ makes it trivial to retrieve complex aggregated data from our index/database,
+# so called [_facets_](http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Faceted-Search-Solr).
+
+# Let's say we want to display article counts for every tag in the database.
+# For that, we'll use a _terms_ facet.
+
+#
+s = Slingshot.search 'articles' do
+ # We will search for articles whose title begins with letter “T”,
+ query { string 'title:T*' }
+
+ # and retrieve their counts “bucketed” by their `tags`.
+ facet 'tags' do
+ terms :tags
+ end
+end
+
+# As we see, our query has found two articles, and if you recall our articles from above,
+# _Two_ is tagged with “ruby” and “python”, _Three_ is tagged with “java”. So the counts
+# won't surprise us:
+# Found 2 articles: Three, Two
+# Counts:
+# -------
+# ruby 1
+# python 1
+# java 1
+puts "Found #{s.results.count} articles: #{s.results.map(&:title).join(', ')}"
+puts "Counts based on tags:", "-"*25
+s.results.facets['tags']['terms'].each do |f|
+ puts "#{f['term'].ljust(10)} #{f['count']}"
+end
+
+# These counts are based on the scope of our current query (called `main` in _ElasticSearch_).
+# What if we wanted to display aggregated counts by `tags` across the whole database?
+
+#
+s = Slingshot.search 'articles' do
+ query { string 'title:T*' }
+
+ facet 'global-tags' do
+ # That's where the `global` scope for a facet comes in.
+ terms :tags, :global => true
+ end
+
+ # As you can see, we can even combine facets scoped
+ # to the current query with global facets.
+ facet 'current-tags' do
+ terms :tags
+ end
+end
+
+# Aggregated results for the current query are the same as previously:
+# Current query facets:
+# -------------------------
+# ruby 1
+# python 1
+# java 1
+puts "Current query facets:", "-"*25
+s.results.facets['current-tags']['terms'].each do |f|
+ puts "#{f['term'].ljust(10)} #{f['count']}"
+end
+
+# As we see, aggregated results for the global scope include also
+# tags for articles not matched by the query, such as “java” or “php”:
+# Global facets:
+# -------------------------
+# ruby 3
+# python 1
+# php 1
+# java 1
+puts "Global facets:", "-"*25
+s.results.facets['global-tags']['terms'].each do |f|
+ puts "#{f['term'].ljust(10)} #{f['count']}"
+end
+
+# The real power of facets lies in their combination with
+# [filters](http://elasticsearch.karmi.cz/guide/reference/api/search/filter.html),
+# though:
+
+# > When doing things like facet navigation,
+# > sometimes only the hits are needed to be filtered by the chosen facet,
+# > and all the facets should continue to be calculated based on the original query.
+
+
+##### Filtered Search
+
+# So, let's make out search a bit complex. Let's search for articles whose titles begin
+# with letter “T”, again, but filter the results, so only the articles tagged “ruby”
+# are returned.
+s = Slingshot.search 'articles' do
+
+ # We use the same **query** as before.
+ query { string 'title:T*' }
+
+ # And add a _terms_ **filter** based on tags.
+ filter :terms, :tags => ['ruby']
+
+ # And, of course, our facet definition.
+ facet('tags') { terms :tags }
+
+end
+
+# We see that only the article _Two_ (tagged “ruby” and “python”) was returned,
+# _not_ the article _Three_ (tagged “java”):
+#
+# * Two [tags: ruby, python]
+s.results.each do |document|
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
+end
+
+# However, count for article _Three_'s tags, “java”, _is_ in fact included in facets:
+#
+# Counts based on tags:
+# -------------------------
+# ruby 1
+# python 1
+# java 1
+puts "Counts based on tags:", "-"*25
+s.results.facets['tags']['terms'].each do |f|
+ puts "#{f['term'].ljust(10)} #{f['count']}"
+end
+
+
+##### Sorting
+
+# By default, the results are sorted according to their relevancy
+# (available as the `_score` property).
+
+# But, what if we want to sort the results based on some other criteria,
+# such as published date, price, etc? We can do that.
+s = Slingshot.search 'articles' do
+ # We search for articles tagged “ruby”
+ query { string 'tags:ruby' }
+
+ # And sort them by their `title`, in descending order.
+ sort { title 'desc' }
+end
+
+# The results:
+# * Two
+# * One
+# * Four
+s.results.each do |document|
+ puts "* #{ document.title }"
+end
+
+# Of course, it's possible to combine more fields in the sorting definition.
+
+s = Slingshot.search 'articles' do
+ # We will just get all articles for this case.
+ query { string '*' }
+
+ sort do
+ # We will sort the results by their `published_on` property in ascending (default) order,
+ published_on
+ # and by their `title` property, in descending order.
+ title 'desc'
+ end
+end
+
+# The results:
+# * One (Published on: 2011-01-01)
+# * Two (Published on: 2011-01-02)
+# * Three (Published on: 2011-01-02)
+# * Four (Published on: 2011-01-03)
+s.results.each do |document|
+ puts "* #{ document.title.ljust(10) } (Published on: #{ document.published_on })"
+end
Please sign in to comment.
Something went wrong with that request. Please try again.