In [3]:
from elasticsearch import Elasticsearch
import elasticsearch_dsl

In [4]:
from datetime import datetime
es = Elasticsearch()

In [8]:
doc = {
    'author': 'kimchy',
    'text': 'Elasticsearch: cool. bonsai cool.',
    'timestamp': datetime.now(),
}

In [9]:
# add
res = es.index(index="test-index", doc_type='tweet', id=1, body=doc)
print(res['result'])

# get
res = es.get(index="test-index", doc_type='tweet', id=1)
print(res['_source'])

# refresh
es.indices.refresh(index="test-index")

# query
res = es.search(index="test-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total']['value'])
for hit in res['hits']['hits']:
    print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])

updated
{'text': 'Elasticsearch: cool. bonsai cool.', 'author': 'kimchy', 'timestamp': '2019-04-21T22:19:59.891847'}
Got 4 Hits:
2019-04-21T21:46:58.764720 borat: de veghe in cacat
2019-04-21T21:46:58.764720 taranu: ce-ti mai place
2019-04-21T21:46:58.764720 americanu: veghe in lan
2019-04-21T22:19:59.891847 kimchy: Elasticsearch: cool. bonsai cool.


In [7]:
bulk_list = [
    {
    'author': 'kimchy',
    'text': 'Elasticsearch: cool. bonsai cool.',
    'timestamp': datetime.now(),
    },
    {
    'author': 'borat',
    'text': 'de veghe in cacat',
    'timestamp': datetime.now(),
    },
    {
    'author': 'taranu',
    'text': 'ce-ti mai place',
    'timestamp': datetime.now(),
    },
    {
    'author': 'americanu',
    'text': 'veghe in lan',
    'timestamp': datetime.now(),
    },
]

for idx, b in enumerate(bulk_list):
    res = es.index(index="second_index", doc_type='book', id=idx+1, body=b)
    print(res['result'])

created
created
created
created


In [5]:
es.indices.refresh(index="second_index")

{'_shards': {'failed': 0, 'successful': 1, 'total': 2}}

## using elasticsearch-dsl

In [14]:
# Build a DSL Search object on the 'commits' index, 'summary' document type
request = elasticsearch_dsl.Search(using=es, index='second_index',
                                    doc_type='book')

# Restrict to only some fields
request = request.source(['text', 'author'])

# Run the Search, using the scan interface to get all resuls
response = request.scan()
dd = []
for commit in response:
    dd.append(commit)

## querying

In [37]:
s = elasticsearch_dsl.Search(using=es, index='second_index',
                                    doc_type='book')
# 1
s1 = s.query('match', text='veghe')

In [33]:
s1.execute()

<Response: [<Hit(second_index/book/4): {'text': 'veghe in lan', 'author': 'americanu', 'timestamp':...}>, <Hit(second_index/book/2): {'text': 'de veghe in cacat', 'author': 'borat', 'timestamp'...}>]>

In [75]:
# 2
from elasticsearch_dsl.query import MultiMatch, Match, Fuzzy

# {"multi_match": {"query": "python django", "fields": ["title", "body"]}}
mm = MultiMatch(query='americanu', fields=['text', 'author'])

# {"match": {"title": {"query": "web framework", "type": "phrase"}}}
m = Match(text={"query": "veghe"})

s21 = s.query(mm)
s22 = s.query(m)

In [58]:
s21.execute()

<Response: [<Hit(second_index/book/4): {'text': 'veghe in lan', 'author': 'americanu', 'timestamp':...}>]>

In [59]:
s22.execute()

<Response: [<Hit(second_index/book/4): {'text': 'veghe in lan', 'author': 'americanu', 'timestamp':...}>, <Hit(second_index/book/2): {'text': 'de veghe in cacat', 'author': 'borat', 'timestamp'...}>]>


## Making fuzzy queries only using -dsl library

### indexing

In [101]:
from elasticsearch_dsl import Index, Document, Text, Integer, Completion, analyzer, tokenizer, connections

skills = [
    {'name': 'python'},
    {'name': 'c++'},
    {'name': 'pycharm'},
    {'name': 'php'},
    {'name': 'curl'},
    {'name': 'pyglass'},
    {'name': 'pig farm'},
]

my_analyzer = analyzer('my_analyzer',
    tokenizer=tokenizer('trigram', 'edge_ngram', min_gram=1, max_gram=20),
    filter=['lowercase']
)

class SkillDoc(Document):
  name = Text(
    analyzer=my_analyzer
  )
  id = Integer()

  class Index:
    name = 'skills'
    
connections.create_connection(hosts=['localhost:9200'], timeout=20)

for i, skill in enumerate(skills):
    sk = SkillDoc(
        name=skill['name'],
        id = i,
        meta = {'id':i},
    )
    sk.save()

In [103]:
skills_index = Index('skills')
skills_index.delete()

{'acknowledged': True}