#### Setup

In [16]:
!pip install elasticsearch
!pip install joblib

Collecting joblib
[?25l  Downloading https://files.pythonhosted.org/packages/4f/51/870b2ec270fc29c5d89f85353da420606a9cb39fba4747127e7c7d7eb25d/joblib-0.11-py2.py3-none-any.whl (176kB)
[K    100% |████████████████████████████████| 184kB 671kB/s ta 0:00:01
[?25hInstalling collected packages: joblib
Successfully installed joblib-0.11


#### Check if Elaslticsearch is responding

In [8]:
import requests
import pprint as pp

In [4]:
 response = requests.get('http://localhost:9200')

In [9]:
pp.pprint(response.content)

(b'{\n  "name" : "VtgP4eS",\n  "cluster_name" : "elasticsearch",\n  "cluster_u'
 b'uid" : "wGMHjF_-RHOn5-8CgCBx9Q",\n  "version" : {\n    "number" : "6.2.4",'
 b'\n    "build_hash" : "ccec39f",\n    "build_date" : "2018-04-12T20:37:28.4'
 b'97551Z",\n    "build_snapshot" : false,\n    "lucene_version" : "7.2.1",\n '
 b'   "minimum_wire_compatibility_version" : "5.6.0",\n    "minimum_index_co'
 b'mpatibility_version" : "5.0.0"\n  },\n  "tagline" : "You Know, for Search"'
 b'\n}\n')


#### Check using elasticsearch library

In [10]:
from elasticsearch import Elasticsearch

In [11]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

#### Index data from the Star Wars API

In [17]:
import json
from joblib import Parallel, delayed

In [39]:
def get_star_wars_character(index: int) -> 'bytes':
    response = requests.get('http://swapi.co/api/people/{}'.format(index))
    return json.loads(response.content)

In [45]:
# Delete index if already present
es.indices.delete(index='star_wars', ignore=[400, 404])

{'acknowledged': True}

In [29]:
# Make sure Elasticsearch is still working
response = requests.get('http://localhost:9200')

i = 0
while response.status_code == 200:
    i += 1
    # Read people from the Star Wars API
    response = requests.get('http://swapi.co/api/people/{}'.format(i))
#     es.index(index='star_wars', doc_type='characters', id=i, body=json.loads(response.content))
    print(type(response.content))

<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>
<class 'bytes'>


In [46]:
contents_list = Parallel(n_jobs=32)(delayed(get_star_wars_character)(i) for i in range(100))

In [47]:
contents_list_filtered = [content for content in contents_list if 'detail' not in content]

In [48]:
contents_list_filtered

[{'birth_year': '19BBY',
  'created': '2014-12-09T13:50:51.644000Z',
  'edited': '2014-12-20T21:17:56.891000Z',
  'eye_color': 'blue',
  'films': ['https://swapi.co/api/films/2/',
   'https://swapi.co/api/films/6/',
   'https://swapi.co/api/films/3/',
   'https://swapi.co/api/films/1/',
   'https://swapi.co/api/films/7/'],
  'gender': 'male',
  'hair_color': 'blond',
  'height': '172',
  'homeworld': 'https://swapi.co/api/planets/1/',
  'mass': '77',
  'name': 'Luke Skywalker',
  'skin_color': 'fair',
  'species': ['https://swapi.co/api/species/1/'],
  'starships': ['https://swapi.co/api/starships/12/',
   'https://swapi.co/api/starships/22/'],
  'url': 'https://swapi.co/api/people/1/',
  'vehicles': ['https://swapi.co/api/vehicles/14/',
   'https://swapi.co/api/vehicles/30/']},
 {'birth_year': '112BBY',
  'created': '2014-12-10T15:10:51.357000Z',
  'edited': '2014-12-20T21:17:50.309000Z',
  'eye_color': 'yellow',
  'films': ['https://swapi.co/api/films/2/',
   'https://swapi.co/api/fi

In [49]:
for i in range (len(contents_list_filtered)):
    es.index(index='star_wars', doc_type='characters', id=i+1, body=contents_list_filtered[i])

#### Search

In [51]:
# Match
es.search(index='star_wars', body={'query': {'match': {'name': 'Darth Vader'}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '4',
    '_index': 'star_wars',
    '_score': 4.344671,
    '_source': {'birth_year': '41.9BBY',
     'created': '2014-12-10T15:18:20.704000Z',
     'edited': '2014-12-20T21:17:50.313000Z',
     'eye_color': 'yellow',
     'films': ['https://swapi.co/api/films/2/',
      'https://swapi.co/api/films/6/',
      'https://swapi.co/api/films/3/',
      'https://swapi.co/api/films/1/'],
     'gender': 'male',
     'hair_color': 'none',
     'height': '202',
     'homeworld': 'https://swapi.co/api/planets/1/',
     'mass': '136',
     'name': 'Darth Vader',
     'skin_color': 'white',
     'species': ['https://swapi.co/api/species/1/'],
     'starships': ['https://swapi.co/api/starships/13/'],
     'url': 'https://swapi.co/api/people/4/',
     'vehicles': []},
    '_type': 'characters'},
   {'_id': '43',
    '_index': 'star_wars',
    '_score': 1.9169226,
    '_source': {'birth_year': '54BBY',
   

In [53]:
# Prefix match
es.search(index='star_wars', body={'query': {'prefix': {'name': 'lei'}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '5',
    '_index': 'star_wars',
    '_score': 1.0,
    '_source': {'birth_year': '19BBY',
     'created': '2014-12-10T15:20:09.791000Z',
     'edited': '2014-12-20T21:17:50.315000Z',
     'eye_color': 'brown',
     'films': ['https://swapi.co/api/films/2/',
      'https://swapi.co/api/films/6/',
      'https://swapi.co/api/films/3/',
      'https://swapi.co/api/films/1/',
      'https://swapi.co/api/films/7/'],
     'gender': 'female',
     'hair_color': 'brown',
     'height': '150',
     'homeworld': 'https://swapi.co/api/planets/2/',
     'mass': '49',
     'name': 'Leia Organa',
     'skin_color': 'light',
     'species': ['https://swapi.co/api/species/1/'],
     'starships': [],
     'url': 'https://swapi.co/api/people/5/',
     'vehicles': ['https://swapi.co/api/vehicles/30/']},
    '_type': 'characters'}],
  'max_score': 1.0,
  'total': 1},
 'timed_out': False,
 'took': 1}

In [56]:
# Fuzzy match
es.search(index='star_wars', 
          body={'query': {'fuzzy_like_this_field': {'name': {'like_text': 'jaba', 'max_query_terms': 10}}}})

GET http://localhost:9200/star_wars/_search [status:400 request:0.002s]


RequestError: TransportError(400, 'parsing_exception', 'no [query] registered for [fuzzy_like_this_field]')