In [7]:
import requests, json
#reference link: https://www.elastic.co/blog/a-practical-introduction-to-elasticsearch
#reference to load data: https://www.elastic.co/guide/en/kibana/5.5/tutorial-load-dataset.html
url = "http://localhost:9200/accounts/person/1"
user = "elastic"
password = "password"
data = {
    "name" : "John",
    "lastname" : "Doe",
    "job_description" : "Systems administrator and Linux specialit"
}
json_data = json.dumps(data) #serialize dict to json string
test = requests.post(url, data=json_data, auth=(user, password), headers={"Content-Type": "application/json"})
print(test.text)

{"_index":"accounts","_type":"person","_id":"1","_version":2,"result":"updated","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":1,"_primary_term":1}


In [13]:
#get document from elastic search
url = "http://localhost:9200/accounts/person/1"
test = requests.get(url, auth=(user, password))
print(test.content)

b'{"_index":"accounts","_type":"person","_id":"1","_version":3,"_seq_no":2,"_primary_term":1,"found":true,"_source":{"name":"John","lastname":"Doe","job_description":"Systems administrator and Linux specialist"}}'


In [11]:
#update
url = "http://localhost:9200/accounts/person/1/_update"
data = {
      "doc":{
          "job_description" : "Systems administrator and Linux specialist"
       }
}
json_data = json.dumps(data)
test = requests.post(url, data=json_data, auth=(user, password), headers={"Content-Type": "application/json"})
test.content

b'{"_index":"accounts","_type":"person","_id":"1","_version":3,"result":"updated","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":2,"_primary_term":1}'

### Note about objects in elastic search
- documents (doc) is smallest entity it is just like row in sql database
- kind: is like a table in sql database
- index: is like a database in sql database which is a collection of kinds
- example: index:accounts, type: person, id: doc id

In [15]:
#create doc about person 2
url = "http://localhost:9200/accounts/person/2"
data = {
    "name" : "John",
    "lastname" : "Smith",
    "job_description" : "Systems administrator"
}
json_data = json.dumps(data)
test = requests.post(url, data=json_data, auth=(user, password), headers={"Content-Type": "application/json"})
test.content

b'{"_index":"accounts","_type":"person","_id":"2","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":3,"_primary_term":1}'

In [19]:
#search string john
url = "http://localhost:9200/_search?q=john"
test = requests.get(url, auth=(user, password))
#using json.dumps() to beauify json string
print(json.dumps(test.json(), indent=4, sort_keys=True))

{
    "_shards": {
        "failed": 0,
        "skipped": 0,
        "successful": 57,
        "total": 57
    },
    "hits": {
        "hits": [
            {
                "_id": "1",
                "_index": "accounts",
                "_score": 0.18232156,
                "_source": {
                    "job_description": "Systems administrator and Linux specialist",
                    "lastname": "Doe",
                    "name": "John"
                },
                "_type": "person"
            },
            {
                "_id": "2",
                "_index": "accounts",
                "_score": 0.18232156,
                "_source": {
                    "job_description": "Systems administrator",
                    "lastname": "Smith",
                    "name": "John"
                },
                "_type": "person"
            }
        ],
        "max_score": 0.18232156,
        "total": {
            "relation": "eq",
            "value": 2
        }

In [20]:
#search for string smith
url = "http://localhost:9200/_search?q=smith"
test = requests.get(url, auth=(user, password))
#using json.dumps() to beauify json string
print(json.dumps(test.json(), indent=4, sort_keys=True))

{
    "_shards": {
        "failed": 0,
        "skipped": 0,
        "successful": 57,
        "total": 57
    },
    "hits": {
        "hits": [
            {
                "_id": "2",
                "_index": "accounts",
                "_score": 0.6931471,
                "_source": {
                    "job_description": "Systems administrator",
                    "lastname": "Smith",
                    "name": "John"
                },
                "_type": "person"
            }
        ],
        "max_score": 0.6931471,
        "total": {
            "relation": "eq",
            "value": 1
        }
    },
    "timed_out": false,
    "took": 129
}


In [21]:
url = "http://localhost:9200/_search?q=job_description:john"
test = requests.get(url, auth=(user, password))
#using json.dumps() to beauify json string
print(json.dumps(test.json(), indent=4, sort_keys=True))

{
    "_shards": {
        "failed": 0,
        "skipped": 0,
        "successful": 57,
        "total": 57
    },
    "hits": {
        "hits": [],
        "max_score": null,
        "total": {
            "relation": "eq",
            "value": 0
        }
    },
    "timed_out": false,
    "took": 15
}


In [22]:
#search job_description with string lin
url = "http://localhost:9200/accounts/person/_search?q=job_description:linux"
test = requests.get(url, auth=(user, password))
#using json.dumps() to beauify json string
print(json.dumps(test.json(), indent=4, sort_keys=True))

{
    "_shards": {
        "failed": 0,
        "skipped": 0,
        "successful": 1,
        "total": 1
    },
    "hits": {
        "hits": [
            {
                "_id": "1",
                "_index": "accounts",
                "_score": 0.5897495,
                "_source": {
                    "job_description": "Systems administrator and Linux specialist",
                    "lastname": "Doe",
                    "name": "John"
                },
                "_type": "person"
            }
        ],
        "max_score": 0.5897495,
        "total": {
            "relation": "eq",
            "value": 1
        }
    },
    "timed_out": false,
    "took": 5
}


In [23]:
# delete docs and index


--2023-08-03 13:34:53--  https://download.elastic.co/demos/kibana/gettingstarted/shakespeare.json
Resolving download.elastic.co (download.elastic.co)... 34.120.127.130, 2600:1901:0:1d7::
Connecting to download.elastic.co (download.elastic.co)|34.120.127.130|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 25216068 (24M) [binary/octet-stream]
Saving to: ‘shakespeare.json’


2023-08-03 13:34:59 (4.64 MB/s) - ‘shakespeare.json’ saved [25216068/25216068]



### Buld load data into elasticsearch

In [None]:
#download example data
!wget https://download.elastic.co/demos/kibana/gettingstarted/shakespeare.json


In [33]:
#define mapping for an index
    #mapping: is the process to define how documents and the fields it contained stored and index
    #mapping reference link: https://www.elastic.co/guide/en/elasticsearch/reference/5.5/mapping.html
url = "http://localhost:9200/shakespeare"
# with open("./shakespeare_mapping.json", mode="r") as fp:
#     json_data = json.dumps(json.load(fp))

json_data = json.dumps(data)
test = requests.put(url, data=json.dumps({}), auth=(user, password), headers={"Content-Type": "application/json"})
test.content

b'{"error":{"root_cause":[{"type":"mapper_parsing_exception","reason":"Root mapping definition has unsupported parameters:  [_default_ : {properties={play_name={type=keyword}, speech_number={type=integer}, speaker={type=keyword}, line_id={type=integer}}}]"}],"type":"mapper_parsing_exception","reason":"Failed to parse mapping [_doc]: Root mapping definition has unsupported parameters:  [_default_ : {properties={play_name={type=keyword}, speech_number={type=integer}, speaker={type=keyword}, line_id={type=integer}}}]","caused_by":{"type":"mapper_parsing_exception","reason":"Root mapping definition has unsupported parameters:  [_default_ : {properties={play_name={type=keyword}, speech_number={type=integer}, speaker={type=keyword}, line_id={type=integer}}}]"}},"status":400}'

In [45]:
#get all indeces
url = "http://localhost:9200/_cat/indices/_all"
test = requests.get(url, auth=(user, password))
print(test.content)

b'yellow open chart_chartusagestatisticsaspect_v1                      dodSkMSASKKTXnG5lh9DEw 1 1   0  0   226b   226b\nyellow open datajobindex_v2                                          gvYJxdq9SQSuLzrJ8IVn_w 1 1   0  0   226b   226b\nyellow open dataflowindex_v2                                         RJy9jxgzTgC03OUQ5P2KNw 1 1   0  0   226b   226b\nyellow open mlmodelgroupindex_v2                                     dOJ5rMldQtOAHLqhJrvgIA 1 1   0  0   226b   226b\nyellow open assertionindex_v2                                        vIiOHoVoSz-sLOVcQNjVvg 1 1   0  0   226b   226b\nyellow open dataprocessindex_v2                                      7f1xBrgjQKm31eygbm75Xg 1 1   0  0   226b   226b\nyellow open globalsettingsindex_v2                                   TYhBhsW_RYCT_F_Q9t9Eww 1 1   0  0   226b   226b\nyellow open chartindex_v2                                            t3L_OCyST12cFtyp9u63jw 1 1   0  0   226b   226b\nyellow open tagindex_v2                               