Skip to content
Browse files

Add sample program for facets

  • Loading branch information...
1 parent 3fa555d commit 27a375f250a308392d0fd15bd4553b3b6fdf8bd0 Will Kahn-Greene committed
Showing with 152 additions and 0 deletions.
  1. +8 −0 docs/index.rst
  2. +7 −0 docs/sampleprogram1.rst
  3. +137 −0 docs/samples/sample_facets.py
View
8 docs/index.rst
@@ -44,3 +44,11 @@ Contributor's Guide
dev_documentation
dev_testing
+
+Sample programs
+===============
+
+.. toctree::
+ :maxdepth: 1
+
+ sampleprogram1
View
7 docs/sampleprogram1.rst
@@ -0,0 +1,7 @@
+====================
+ Sample with facets
+====================
+
+.. literalinclude:: samples/sample_facets.py
+ :language: python
+ :linenos:
View
137 docs/samples/sample_facets.py
@@ -0,0 +1,137 @@
+"""
+This is a sample program that uses PyES ES to create an index, create
+a mapping, and index some data. Then it uses ElasticUtils S to show
+some behavior with facets.
+"""
+
+from elasticutils import get_es, S
+
+
+HOST = 'localhost:9200'
+INDEX = 'fooindex'
+DOCTYPE = 'testdoc'
+
+
+es = get_es(hosts=HOST, default_indexes=[INDEX])
+
+# This uses pyes ES.delete_index_if_exists.
+es.delete_index_if_exists(INDEX)
+
+# Define the mapping for the doctype 'testdoc'. It's got an id field,
+# a title which is analyzed, and two fields that are lists of tags, so
+# we don't want to analyze them.
+#
+# Note: The alternative for the tags is to analyze them and use the
+# 'keyword' analyzer. Both not analyzing and using the keyword
+# analyzer treats the values as a single term rather than tokenizing
+# them and treating as multiple terms.
+mapping = {
+ DOCTYPE: {
+ 'properties': {
+ 'id': {'type': 'integer'},
+ 'title': {'type': 'string'},
+ 'topics': {'type': 'string'},
+ 'product': {'type': 'string', 'index': 'not_analyzed'},
+ }
+ }
+ }
+
+# This uses pyes ES.create_index.
+es.create_index(INDEX, settings={'mappings': mapping})
+
+
+# This indexes a series of documents each is a Python dict.
+for mem in [
+ {'id': 1,
+ 'title': 'Deleting cookies',
+ 'topics': ['cookies', 'privacy'],
+ 'product': ['Firefox', 'Firefox for mobile']},
+ {'id': 2,
+ 'title': 'What is a cookie?',
+ 'topics': ['cookies', 'privacy', 'basic'],
+ 'product': ['Firefox', 'Firefox for mobile']},
+ {'id': 3,
+ 'title': 'Websites say cookies are blocked - Unblock them',
+ 'topics': ['cookies', 'privacy', 'websites'],
+ 'product': ['Firefox', 'Firefox for mobile', 'Boot2Gecko']},
+ {'id': 4,
+ 'title': 'Awesome Bar',
+ 'topics': ['tips', 'search', 'basic', 'user interface'],
+ 'product': ['Firefox']},
+ {'id': 5,
+ 'title': 'Flash',
+ 'topics': ['flash'],
+ 'product': ['Firefox']},]:
+
+ es.index(mem, INDEX, DOCTYPE, id=mem['id'])
+
+# After indexing, you need to refresh the index.
+es.refresh(INDEX)
+
+# Let's build a basic S with the important things in it.
+basic_s = S().indexes(INDEX).doctypes(DOCTYPE).values_dict()
+
+# Now let's see facet counts for all the products.
+s = basic_s.facet('product')
+
+print s.facet_counts()
+# Pretty-printed output:
+# {u'product': [
+# {u'count': 5, u'term': u'Firefox'},
+# {u'count': 3, u'term': u'Firefox for mobile'},
+# {u'count': 1, u'term': u'Boot2Gecko'}
+# ]}
+
+# Let's do a query for 'cookie' and do a facet count.
+print s.query(title__text='cookie').facet_counts()
+# Pretty-printed output:
+# {u'product': [
+# {u'count': 1, u'term': u'Firefox for mobile'},
+# {u'count': 1, u'term': u'Firefox'}
+# ]}
+
+# The facet_counts are affected by the query.
+
+# Let's do a filter for 'flash' in the topic.
+print s.filter(topics='flash').facet_counts()
+# Pretty-printed output:
+# {u'product': [
+# {u'count': 5, u'term': u'Firefox'},
+# {u'count': 3, u'term': u'Firefox for mobile'},
+# {u'count': 1, u'term': u'Boot2Gecko'}
+# ]}
+
+# The facet_counts are NOT affected by filters.
+
+# Let's do a filter for 'flash' in the topic, and specify filtered=True.
+print s.facet('product', filtered=True).filter(topics='flash').facet_counts()
+# Pretty-printed output:
+# {u'product': [
+# {u'count': 1, u'term': u'Firefox'}
+# ]}
+
+# We've done a bunch of faceting on a field that is not
+# analyzed. Let's look at what happens when we try to use facets on a
+# field that is analyzed.
+print basic_s.facet('topics').facet_counts()
+# Pretty-printed output:
+# {u'topics': [
+# {u'count': 3, u'term': u'privacy'},
+# {u'count': 3, u'term': u'cookies'},
+# {u'count': 2, u'term': u'basic'},
+# {u'count': 1, u'term': u'websites'},
+# {u'count': 1, u'term': u'user'},
+# {u'count': 1, u'term': u'tips'},
+# {u'count': 1, u'term': u'search'},
+# {u'count': 1, u'term': u'interface'},
+# {u'count': 1, u'term': u'flash'}
+# ]}
+
+# Note how the facet counts shows 'user' and 'interface' as two
+# separate terms even though they're a single topic for document with
+# id=4. When that document is indexed, the topic field is analyzed and
+# the default analyzer tokenizes it splitting it into two terms.
+#
+# Moral of the story is that you want fields you facet on to be
+# analyzed as keyword fields or not analyzed at all.
+

0 comments on commit 27a375f

Please sign in to comment.
Something went wrong with that request. Please try again.