# ElasticSearch Upload
---
- Connect to Bonsai ES client
- Create cluster / shards if non existant 
- Add alias to cluser 
- Connect to Firestore
- Extract SkiBoards from Firestore
- Format / Validate SkiBoard objects
- Upload data to ElasticSearch

In [3]:
# https://docs.bonsai.io/article/102-python
import io, sys, os, base64, re, logging
import json
import datetime

from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch.helpers import streaming_bulk

import firebase_admin
from firebase_admin import credentials, firestore

ImportError: cannot import name 'ElasticsearchWarning' from 'elasticsearch.exceptions' (/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/elasticsearch/exceptions.py)

## Connect to Firestore

In [21]:
# Inititalise Firebase connection
try:
    firebase_cred = credentials.Certificate(sys.path[0] + '/../application/config/firebase_service_account_key.json')
    firebase_admin.initialize_app(firebase_cred)
except:
    firebase_admin.get_app()

In [22]:
db = firestore.client()

## Collect all SkiBoards from Firestore

In [23]:
docs = db.collection('SkiBoards').stream()
skiboards = []

for doc in docs:
    doc_id = doc.id
    doc = doc.to_dict()

    skiboard = {
        'id': doc_id
    }
    print('Extracting... {} {} ({})'.format(doc['brand'], doc['model'], doc['year']))
    for key in doc:
        skiboard[key] = doc[key]
        
    sizes = []
    collection_docs = db.collection('SkiBoards').document(doc_id).collection('Sizes').get()
    for collection in collection_docs:
        size = {
            'size': collection.id
        }
        collection = collection.to_dict()
        for detail in collection:
            size[detail] = collection[detail]
                
        sizes.append(size)
        
    skiboard['sizes'] = sizes
    skiboards.append(skiboard)
    
    print(skiboard)

Extracting... Burton HometownHeroSplit (2023)
{'id': '0Rn7K21ipoWG6ZsJYWwc', 'slug': 'burton-hometownherosplit-2023', 'year': 2023, 'category': 'Snowboard', 'model': 'HometownHeroSplit', 'updated': DatetimeWithNanoseconds(2023, 5, 5, 1, 23, 45, 599581, tzinfo=datetime.timezone.utc), 'created': DatetimeWithNanoseconds(2023, 5, 5, 1, 23, 45, 599534, tzinfo=datetime.timezone.utc), 'author': '7aLK5kOucZVIJ2EiXZlj', 'brand': 'Burton', 'name': 'Burton HometownHeroSplit 2023', 'sizes': []}
Extracting... YES BasicWomens (2023)
{'id': 'Cg2ACYTvkdYoPXJDinDe', 'slug': 'yes-basicwomens-2023', 'year': 2023, 'asym': False, 'profile': 'Hybrid Camber', 'category': 'Snowboard', 'model': 'BasicWomens', 'updated': DatetimeWithNanoseconds(2023, 4, 25, 20, 21, 56, 471555, tzinfo=datetime.timezone.utc), 'created': DatetimeWithNanoseconds(2023, 4, 25, 20, 21, 30, 814415, tzinfo=datetime.timezone.utc), 'author': '7aLK5kOucZVIJ2EiXZlj', 'brand': 'YES', 'flex': '5', 'name': 'YES BasicWomens 2023', 'sizes': [{'s

In [24]:
print('Extracted {} SkiBoards\n'.format(len(skiboards)))
print(skiboards[0])

Extracted 4 SkiBoards

{'id': '0Rn7K21ipoWG6ZsJYWwc', 'slug': 'burton-hometownherosplit-2023', 'year': 2023, 'category': 'Snowboard', 'model': 'HometownHeroSplit', 'updated': DatetimeWithNanoseconds(2023, 5, 5, 1, 23, 45, 599581, tzinfo=datetime.timezone.utc), 'created': DatetimeWithNanoseconds(2023, 5, 5, 1, 23, 45, 599534, tzinfo=datetime.timezone.utc), 'author': '7aLK5kOucZVIJ2EiXZlj', 'brand': 'Burton', 'name': 'Burton HometownHeroSplit 2023', 'sizes': []}


## Connect to ElasticSearch

In [25]:
f = open(sys.path[0] + '/../application/config/bonsai_config.json')

In [26]:
es_config = json.load(f)

In [27]:
es_endpoint = es_config['url']
es_user = es_config['key']
es_password = es_config['secret']

In [28]:
es_client = Elasticsearch([es_endpoint], basic_auth=(es_user, es_password))

In [29]:
new_index_name = "skiboards-{}".format(datetime.date.today())
alias = "skiboards"
print("New Index: " + new_index_name + " AKA: " + alias)

New Index: skiboards-2023-10-05 AKA: skiboards


In [30]:
idx_manager = IndicesClient(es_client)

In [31]:
# Create new index
try:
    idx_manager.create(index=new_index_name)
except RequestError:
    print("Error initializing index manager")

# Add alias to new index
try:
    idx_manager.put_alias(new_index_name, alias)
except:
    print("Error adding alias to cluster")

## Upload Firestore data to ElasticSearch

In [32]:
def generate_update_docs():    
    docs = db.collection('SkiBoards').stream()
    skiboards = []

    for doc in docs:
        doc_id = doc.id
        doc = doc.to_dict()

        skiboard = {
            'id': doc_id
        }
        print('Extracting... {} {} ({})'.format(doc['brand'], doc['model'], doc['year']))
        for key in doc:
            skiboard[key] = doc[key]

        sizes = []
        collection_docs = db.collection('SkiBoards').document(doc_id).collection('Sizes').get()
        for collection in collection_docs:
            size = {
                'size': collection.id
            }
            collection = collection.to_dict()
            for detail in collection:
                size[detail] = collection[detail]

            sizes.append(size)

        skiboard['sizes'] = sizes
        document = {
            '_type': 'document',
            '_id': doc_id,
            '_source': skiboard
        }
        yield document
        

In [33]:
successes = 0
print("Processing update...")
print("New Index: " + new_index_name)
for ok, action in streaming_bulk(client=es_client, index=new_index_name, actions=generate_update_docs()):
    successes += ok

Processing update...
New Index: skiboards-2023-10-05
Extracting... Burton HometownHeroSplit (2023)
Extracting... YES BasicWomens (2023)
Extracting... Burton Custom (2022)
Extracting... Capita DOA (2022)




## Search

In [34]:
query = "burton custom"

In [35]:
search_body = {
        "query": {
            "multi_match": {
                "query": query,
                "type": "bool_prefix",
                "fields": [
                    "brand",
                    "model",
                    "year"
                ]
            }
        }
    }
res = es_client.search(index=new_index_name, body=search_body)

RequestError: RequestError(400, 'search_phase_execution_exception', 'failed to create query: For input string: "burton custom"')

## Delete Index

In [150]:
try:
    idx_manager.delete(index=new_index_name)
except:
    print("No index found with alias: {}".format(new_index_name))