In [6]:
import requests as rq
import json
import time

In [34]:
# Define some shared variables

# determines if the script will drop and recreate the indices for the indices that will be refreshed
dropIndices = True

# run a limited test
testMode = False
testCount = 5

# Elastic Search Host
esHost = "http://es:9200"

# define headers for requests
headers = {'Content-Type':'application/json'}


# Index Names, Sources, and mapping definitions
# Set 'refresh' to True for the indices that you want to rebuild
indexInfo = [
    {
        'refresh':False,
        'indexName':"watersheds", 
        'dataSource':"https://github.com/nawrs/nawrs/raw/GeoJSON/JSON/ReferenceLayers/wbdhu2_PR_1hundthDD.geojson",
        'mapping':"https://raw.githubusercontent.com/nawrs/nawrs/GeoJSON/JSON/ReferenceLayers/wbdhu2_PR_1hundthDD-mapping.json"
    },
    {
        'refresh':True,
        'indexName':"triballands", 
        'dataSource':"https://raw.githubusercontent.com/nawrs/nawrs/GeoJSON/JSON/ReferenceLayers/tl_2016_us_aiannh_Subset_.001_.geojson",
        'mapping':"https://raw.githubusercontent.com/nawrs/nawrs/GeoJSON/JSON/ReferenceLayers/tl_2016_us_aiannh_Subset_.001-mapping.json"
    },
    {
        'refresh':False,
        'indexName':"usstates", 
        'dataSource':"https://github.com/nawrs/nawrs/raw/GeoJSON/JSON/ReferenceLayers/us_state_BS_5hundthDD.geojson",
        'mapping':"https://raw.githubusercontent.com/nawrs/nawrs/GeoJSON/JSON/ReferenceLayers/us_state_BS_5hundthDD-mapping.json"
    }
]

def writelog(logfileName,message):
    timestr = time.strftime("%Y%m%d-%H%M%S")
    with open(logfileName,"a") as f:
        f.write(timestr + ": "+ message + "\n")

In [35]:
# test to see if the host is responding
reqString = esHost
r = rq.get(reqString)
print(r.status_code)
print(r.json())

200
{'tagline': 'You Know, for Search', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'NRcv8IVgRiGUjDbbZX95sw', 'name': 'edBz89I', 'version': {'lucene_version': '6.3.0', 'build_date': '2016-12-06T12:36:15.409Z', 'number': '5.1.1', 'build_hash': '5395e21', 'build_snapshot': False}}


In [36]:
# Drop and Create the indices

if dropIndices:
    for index in indexInfo:
        if index['refresh']:
            requestURL = esHost+"/"+index['indexName']
            indexMapping = rq.get(index['mapping']).json()[index['indexName']]
            #print(indexMapping)

            print("Deleting index: "+requestURL)
            r = rq.delete(requestURL)
            print("\tStatus Code: "+str(r.status_code))

            #print("Creating index: "+requestURL)
            #r = rq.put(requestURL, headers=headers)
            #print("\tStatus Code: "+str(r.status_code)+": "+str(r.json()))

            print("Adding index and mapping: "+requestURL)
            r = rq.put(requestURL, headers=headers, data=json.dumps(indexMapping))
            print("\tStatus Code: "+str(r.status_code)+": "+str(r.json()))
else:
    print("Indices were retained")

Deleting index: http://es:9200/triballands
	Status Code: 200
Adding index and mapping: http://es:9200/triballands
	Status Code: 200: {'shards_acknowledged': True, 'acknowledged': True}


In [37]:
# load a GeoJSON files for processing
timestr = time.strftime("%Y%m%d-%H%M%S")
logfileName = "logs/rebuildIndices_"+timestr+".txt"

for sourceFile in indexInfo:
    if sourceFile['refresh']:
        r = rq.get(sourceFile['dataSource'])
        workingGeoString = r.json() # r.text
        workingGeo = workingGeoString # json.loads(workingGeoString)
        docType = workingGeo['type']
        docCRS = workingGeo['crs']
        #print(workingGeo['features'][0].keys())

        i = 1
        for feature in workingGeo['features']:
            logMessage = ''
            writelog(logfileName,logMessage)
            logMessage = "Processing: ("+str(i)+") "+sourceFile['indexName']
            writelog(logfileName,logMessage)
            print(logMessage)

            docFeatureType = feature['type']
            docFeatureProperties = feature['properties']
            docFeatureGeometry = feature['geometry']

            builtDoc = '{"type":\"%s\","crs":%s,"features":{"type":\"%s\","properties":%s,"geometry":%s}}'%(docType,json.dumps(docCRS),docFeatureType,json.dumps(docFeatureProperties),json.dumps(docFeatureGeometry))
            #builtDoc = builtDoc.replace("\'",'*--*')
            #builtDoc = builtDoc.replace("'",'"')
            #builtDoc = builtDoc.replace("*--*","'")


            try:
                docJSON = json.loads(builtDoc)
            except:
                print("\tFailed to create the GeoJSON document")
                print("==========================================")
                print(builtDoc)
                print("==========================================")
                print()

            # put the document into the index
            requestURL = esHost+"/"+sourceFile['indexName']+"/geojson/"+str(i)
            logMessage = "Adding Document: "+requestURL
            writelog(logfileName,logMessage)
            print(logMessage)
            r = rq.post(requestURL, data=json.dumps(docJSON), headers=headers)
            try:
                resultText = str(r.json()['result'])
            except:
                resultText = "ERROR\n\n==========================" + builtDoc + "\n"

            logMessage = "\tResult: "+str(r.status_code)+": "+resultText + "\n" + str(r.json())
            writelog(logfileName,logMessage)
            print(logMessage)
            print()


            i += 1

            if testMode:
                if i > testCount: break
    else:
        print("skipping: "+sourceFile['indexName'])



skipping: watersheds
Processing: (1) triballands
Adding Document: http://es:9200/triballands/geojson/1
	Result: 201: created
{'_id': '1', 'created': True, '_type': 'geojson', '_shards': {'failed': 0, 'successful': 1, 'total': 2}, '_version': 1, 'result': 'created', '_index': 'triballands'}

Processing: (2) triballands
Adding Document: http://es:9200/triballands/geojson/2
	Result: 201: created
{'_id': '2', 'created': True, '_type': 'geojson', '_shards': {'failed': 0, 'successful': 1, 'total': 2}, '_version': 1, 'result': 'created', '_index': 'triballands'}

Processing: (3) triballands
Adding Document: http://es:9200/triballands/geojson/3
	Result: 201: created
{'_id': '3', 'created': True, '_type': 'geojson', '_shards': {'failed': 0, 'successful': 1, 'total': 2}, '_version': 1, 'result': 'created', '_index': 'triballands'}

Processing: (4) triballands
Adding Document: http://es:9200/triballands/geojson/4
	Result: 201: created
{'_id': '4', 'created': True, '_type': 'geojson', '_shards': {