In [None]:
""" Mapping in Kibana"""

"""
PUT books
{
    "settings" : {
        "number_of_shards" : 2
        , "number_of_replicas": 2,
    
    "analysis": {
      "normalizer": {
         "my_normalizer": {
           "type": "custom",
           "char_filter": [],
           "filter": ["uppercase", "asciifolding"]
         }
       },
      "analyzer": {
        "standard_analyzer": {
          "type": "standard",
          "max_token_length": 5,
          "stopwords": "_english_"
        },
          "simple_analyzer": {
              "type":"simple"
          },
          "whitespace_analyzer" :{
              "type" : "whitespace"
          },
          "stop_analyzer" : {
              "type" : "stop", 
              "stopwords" : [ "the","of","over" ]
          },
          "keyword_analyzer" :{
              "type" : "keyword"
          },
          "pattern_analyzer" : {
              "type" : "pattern",
              "pattern" : "\\w|_",
              "lowercase" : true 
          },
          "fingerprint_analyzer" : {
              "type" : "fingerprint" ,
              "stopwords" : "_english_"
          },
          "cust_analyzer":{
              "type" : "custom" , 
              "tokenizer" : "standard",
              "char_filter" : [
                  "html_strip"
                  ],
              "filter": [ "lowercase", "asciifolding"]
          }
      }
      }
    },
    "mappings" : {
        "bookinfo" : {
            "properties" : {
                "book_id":{
                  "type" : "text"
                },
                "breadcrumbs" : {
                  "type" : "text"
                  },
                "desc" : {
                  "type" : "text"
                  , "similarity": "classic"
                },
                "list_price" : {
                  "type" : "text"
                },
                "title":{
                  "type" : "keyword",
                  "similarity": "boolean"
                  , "boost": 3
                },
                "isbn":{
                  "type" : "text"
                }
                
            }
        }
    }
}

PUT normalize
 {
   "settings": {
     "analysis": {
       "normalizer": {
         "my_normalizer": {
           "type": "custom",
           "char_filter": [],
           "filter": ["uppercase", "asciifolding"]
         }
       }
     }
   },
   "mappings": {
     "type": {
       "properties": {
         "name": {
           "type": "keyword",
           "normalizer": "my_normalizer"
         }
       }
     }
   }
 }
 
  PUT normalize/type/p1
 {
   "name": "BAR"
 }
 PUT normalize/type/p2
 {
   "name": "bAr"
 }
 PUT normalize/type/p3
 {
   "name": "BAR band hai "
 }
 PUT normalize/type/p4
 {
   "name": "bar band hai "
 }
 PUT normalize/type/p5
 {
   "name": "Bazzigar"
 }
 """

In [12]:
import csv
from elasticsearch import Elasticsearch, helpers

In [2]:
es = Elasticsearch()

In [8]:
with open('Desktop/Python/paytm_com-ecommerce_sample.csv', 'rU') as f:
  reader = csv.DictReader(f)
  helpers.bulk(es, reader, index='books', doc_type='bookinfo')




In [12]:
anabody= {
        
  "analyzer": "standard_analyzer",
  "text": " The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."

        }
es.indices.analyze(index="books" ,body= anabody)

{u'tokens': [{u'end_offset': 6,
   u'position': 1,
   u'start_offset': 5,
   u'token': u'2',
   u'type': u'<NUM>'},
  {u'end_offset': 12,
   u'position': 2,
   u'start_offset': 7,
   u'token': u'quick',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 18,
   u'position': 3,
   u'start_offset': 13,
   u'token': u'brown',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 24,
   u'position': 4,
   u'start_offset': 19,
   u'token': u'foxes',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 30,
   u'position': 5,
   u'start_offset': 25,
   u'token': u'jumpe',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 31,
   u'position': 6,
   u'start_offset': 30,
   u'token': u'd',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 36,
   u'position': 7,
   u'start_offset': 32,
   u'token': u'over',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 45,
   u'position': 9,
   u'start_offset': 41,
   u'token': u'lazy',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 51,
   u'position': 10,
   u'start_offset': 46,
   u'token': 

In [13]:
anabody= {
        
  "analyzer": "simple_analyzer",
  "text": " The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."

        }
es.indices.analyze(index="books" ,body= anabody)

{u'tokens': [{u'end_offset': 4,
   u'position': 0,
   u'start_offset': 1,
   u'token': u'the',
   u'type': u'word'},
  {u'end_offset': 12,
   u'position': 1,
   u'start_offset': 7,
   u'token': u'quick',
   u'type': u'word'},
  {u'end_offset': 18,
   u'position': 2,
   u'start_offset': 13,
   u'token': u'brown',
   u'type': u'word'},
  {u'end_offset': 24,
   u'position': 3,
   u'start_offset': 19,
   u'token': u'foxes',
   u'type': u'word'},
  {u'end_offset': 31,
   u'position': 4,
   u'start_offset': 25,
   u'token': u'jumped',
   u'type': u'word'},
  {u'end_offset': 36,
   u'position': 5,
   u'start_offset': 32,
   u'token': u'over',
   u'type': u'word'},
  {u'end_offset': 40,
   u'position': 6,
   u'start_offset': 37,
   u'token': u'the',
   u'type': u'word'},
  {u'end_offset': 45,
   u'position': 7,
   u'start_offset': 41,
   u'token': u'lazy',
   u'type': u'word'},
  {u'end_offset': 49,
   u'position': 8,
   u'start_offset': 46,
   u'token': u'dog',
   u'type': u'word'},
  {u'end_

In [14]:
anabody= {
        
  "analyzer": "whitespace_analyzer",
  "text": " The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."

        }
es.indices.analyze(index="books" ,body= anabody)

{u'tokens': [{u'end_offset': 4,
   u'position': 0,
   u'start_offset': 1,
   u'token': u'The',
   u'type': u'word'},
  {u'end_offset': 6,
   u'position': 1,
   u'start_offset': 5,
   u'token': u'2',
   u'type': u'word'},
  {u'end_offset': 12,
   u'position': 2,
   u'start_offset': 7,
   u'token': u'QUICK',
   u'type': u'word'},
  {u'end_offset': 24,
   u'position': 3,
   u'start_offset': 13,
   u'token': u'Brown-Foxes',
   u'type': u'word'},
  {u'end_offset': 31,
   u'position': 4,
   u'start_offset': 25,
   u'token': u'jumped',
   u'type': u'word'},
  {u'end_offset': 36,
   u'position': 5,
   u'start_offset': 32,
   u'token': u'over',
   u'type': u'word'},
  {u'end_offset': 40,
   u'position': 6,
   u'start_offset': 37,
   u'token': u'the',
   u'type': u'word'},
  {u'end_offset': 45,
   u'position': 7,
   u'start_offset': 41,
   u'token': u'lazy',
   u'type': u'word'},
  {u'end_offset': 51,
   u'position': 8,
   u'start_offset': 46,
   u'token': u"dog's",
   u'type': u'word'},
  {u'en

In [15]:
anabody= {
        
  "analyzer": "stop_analyzer",
  "text": " The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."

        }
es.indices.analyze(index="books" ,body= anabody)

{u'tokens': [{u'end_offset': 12,
   u'position': 1,
   u'start_offset': 7,
   u'token': u'quick',
   u'type': u'word'},
  {u'end_offset': 18,
   u'position': 2,
   u'start_offset': 13,
   u'token': u'brown',
   u'type': u'word'},
  {u'end_offset': 24,
   u'position': 3,
   u'start_offset': 19,
   u'token': u'foxes',
   u'type': u'word'},
  {u'end_offset': 31,
   u'position': 4,
   u'start_offset': 25,
   u'token': u'jumped',
   u'type': u'word'},
  {u'end_offset': 45,
   u'position': 7,
   u'start_offset': 41,
   u'token': u'lazy',
   u'type': u'word'},
  {u'end_offset': 49,
   u'position': 8,
   u'start_offset': 46,
   u'token': u'dog',
   u'type': u'word'},
  {u'end_offset': 51,
   u'position': 9,
   u'start_offset': 50,
   u'token': u's',
   u'type': u'word'},
  {u'end_offset': 56,
   u'position': 10,
   u'start_offset': 52,
   u'token': u'bone',
   u'type': u'word'}]}

In [16]:
anabody= {
        
  "analyzer": "fingerprint_analyzer",
  "text": "Yes yes, Gödel said this sentence is consistent and."
        }
es.indices.analyze(index="books" ,body= anabody)

{u'tokens': [{u'end_offset': 52,
   u'position': 0,
   u'start_offset': 0,
   u'token': u'consistent godel said sentence yes',
   u'type': u'fingerprint'}]}

In [17]:
anabody= {
        
      "analyzer": "cust_analyzer",
    "text": "Is this <b>déjà vu</b>?"
        }
es.indices.analyze(index="books" ,body= anabody)

{u'tokens': [{u'end_offset': 2,
   u'position': 0,
   u'start_offset': 0,
   u'token': u'is',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 7,
   u'position': 1,
   u'start_offset': 3,
   u'token': u'this',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 15,
   u'position': 2,
   u'start_offset': 11,
   u'token': u'deja',
   u'type': u'<ALPHANUM>'},
  {u'end_offset': 22,
   u'position': 3,
   u'start_offset': 16,
   u'token': u'vu',
   u'type': u'<ALPHANUM>'}]}

In [31]:
es.search(index="books",body={
  "query": {
    "bool" : {
      "must" : {
        "term" : { "isbn" : "95d2c4ad11ae01b8fd125a7bea56c123" }
      },
        "filter": {
        "term" : {"book_id" : "57"}
      }
    }
  }
})

{u'_shards': {u'failed': 0, u'skipped': 0, u'successful': 2, u'total': 2},
 u'hits': {u'hits': [{u'_id': u'3s-QLGEBkjedA5ZS4zBV',
    u'_index': u'books',
    u'_score': 6.468961,
    u'_source': {u'book_id': u'57',
     u'breadcrumbs': u'Books | Literature & Fiction | Romance | The Golden Valkyrie',
     u'desc': u'{"Description"=>{"Brand"=>"Random House", "Product Code"=>"9780553591675_44120", "ISBN-13"=>"9780553591675", "ISBN-10"=>"553591673", "Authored By"=>"Iris Johansen", "Publication Year"=>"2008", "Imprint"=>"Bantam", "Binding"=>"Others", "Languages"=>"English"}, "Shipping Details"=>{"Estimated Arrival"=>"23 - 26 days", "Return Policy"=>"Seller will accept returns within  7 days from date of delivery of the item only in the case of damaged, defective or wrong products received. Order once placed , will not be cancelled."}}',
     u'isbn': u'95d2c4ad11ae01b8fd125a7bea56c123',
     u'list_price': u'416',
     u'title': u'The Golden Valkyrie'},
    u'_type': u'bookinfo'},
   {u'_i

In [38]:
es.search(index="books",body={
    "query": {
        "boosting" : {
            "positive" : {
                "term" : {
                    "book_id" : "72"
                }
            },
            "negative" : {
                 "match" : {
                     "list_price" : "395"
                }
            },
            "negative_boost" : 0.2
        }
    }
})

{u'_shards': {u'failed': 0, u'skipped': 0, u'successful': 2, u'total': 2},
 u'hits': {u'hits': [{u'_id': u'xs-QLGEBkjedA5ZS4zBV',
    u'_index': u'books',
    u'_score': 5.1474943,
    u'_source': {u'book_id': u'72',
     u'breadcrumbs': u"Books | Literature & Fiction | Romance | C.J.'s Fate",
     u'desc': u'{"Description"=>{"Product Code"=>"9780553590487_20567", "ISBN-13"=>"9780553590487", "ISBN-10"=>"553590480", "Authored By"=>"Kay Hooper", "Publication Year"=>"2007", "Imprint"=>"Bantam", "Binding"=>"Others", "Languages"=>"English", "ISBN 13"=>"9780553590487"}, "Shipping Details"=>{"Estimated Arrival"=>"8 - 11 days", "Return Policy"=>"Seller will accept returns within a 15 days from date of delivery of the item"}}',
     u'isbn': u'0448410537f096ca8766c1072d1e4419',
     u'list_price': u'358',
     u'title': u"C.J.'s Fate"},
    u'_type': u'bookinfo'},
   {u'_id': u'cc-QLGEBkjedA5ZS4zJW',
    u'_index': u'books',
    u'_score': 5.1474943,
    u'_source': {u'book_id': u'72',
     u'b

In [44]:
es.search(index="books",body={
  "query": {
    "bool" : {
      "must" : {
        "match" : { "title" : "Potter" }
      },
      "should" : {
          "match" : {
              "title" : "Harry" 
          },
          "match" : {
            "list_price" : "1699"
          },
          "match" : {
             "book_id" : "224"
          }
      },
      "minimum_should_match" : 1,
      "boost" : 1.0
    }
  }
})

{u'_shards': {u'failed': 0, u'skipped': 0, u'successful': 2, u'total': 2},
 u'hits': {u'hits': [{u'_id': u'7M-RLGEBkjedA5ZS5jZ_',
    u'_index': u'books',
    u'_score': 9.4219475,
    u'_source': {u'book_id': u'224',
     u'breadcrumbs': u'Books | Literature & Fiction | Famous Series | Harry Potter And The Order Of',
     u'desc': u'{"Description"=>{"Brand"=>"Books", "Product Code"=>"9781408855690_20038", "ISBN-13"=>"9781408855690", "Binding"=>"Paperback", "Languages"=>"English", "Publication Year"=>"2014", "ISBN 13"=>"CMPLX9781408855690"}, "Shipping Details"=>{"Estimated Arrival"=>"9 - 12 days", "Return Policy"=>"Seller will accept returns within a 15 days from date of delivery of the item"}}',
     u'isbn': u'b5ce044821a0372312fa06de818786c6',
     u'list_price': u'699',
     u'title': u'Harry Potter And The Order Of The Phoenix'},
    u'_type': u'bookinfo'},
   {u'_id': u'Ec-QLGEBkjedA5ZS4zFV',
    u'_index': u'books',
    u'_score': 9.371269,
    u'_source': {u'book_id': u'224',
 

In [9]:
normalizedQuery={
  "query": {
    "match": {
      "name": "bar"
    }
  }
}
res = es.search(index="normalize",body=normalizedQuery)
print res

{u'hits': {u'hits': [{u'_score': 0.6931472, u'_type': u'type', u'_id': u'p1', u'_source': {u'name': u'BAR'}, u'_index': u'normalize'}, {u'_score': 0.2876821, u'_type': u'type', u'_id': u'p2', u'_source': {u'name': u'bAr'}, u'_index': u'normalize'}], u'total': 2, u'max_score': 0.6931472}, u'_shards': {u'successful': 5, u'failed': 0, u'skipped': 0, u'total': 5}, u'took': 3, u'timed_out': False}


In [11]:
aggregateQuery={
  "size": 0,
  "aggs": {
    "name_terms": {
      "terms": {
        "field": "name"
      }
    }
  }
}
res = es.search(index="normalize",body=aggregateQuery)
print res

{u'hits': {u'hits': [], u'total': 5, u'max_score': 0.0}, u'_shards': {u'successful': 5, u'failed': 0, u'skipped': 0, u'total': 5}, u'took': 1476, u'aggregations': {u'name_terms': {u'buckets': [{u'key': u'BAR', u'doc_count': 2}, {u'key': u'BAR BAND HAI ', u'doc_count': 2}, {u'key': u'BAZZIGAR', u'doc_count': 1}], u'sum_other_doc_count': 0, u'doc_count_error_upper_bound': 0}}, u'timed_out': False}
