# ElasticSearch Upload
---
- Connect to Bonsai ES client
- Create cluster / shards if non existant 
- Add alias to cluser 
- Connect to Firestore
- Extract SkiBoards from Firestore
- Format / Validate SkiBoard objects
- Upload data to ElasticSearch

In [78]:
# https://docs.bonsai.io/article/102-python
import io, sys, os, base64, re, logging
import json
import datetime

from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch.helpers import streaming_bulk

import firebase_admin
from firebase_admin import credentials, firestore

## Connect to Firestore

In [45]:
# Inititalise Firebase connection
try:
    firebase_cred = credentials.Certificate(sys.path[0] + '/../application/config/firebase_service_account_key.json')
    firebase_admin.initialize_app(firebase_cred)
except:
    firebase_admin.get_app()

In [46]:
db = firestore.client()

## Collect all SkiBoards from Firestore

In [69]:
docs = db.collection('SkiBoards').stream()
skiboards = []

for doc in docs:
    doc_id = doc.id
    doc = doc.to_dict()

    skiboard = {
        'id': doc_id
    }
    print('Extracting... {} {} ({})'.format(doc['brand'], doc['model'], doc['year']))
    for key in doc:
        skiboard[key] = doc[key]
        
    sizes = []
    collection_docs = db.collection('SkiBoards').document(doc_id).collection('Sizes').get()
    for collection in collection_docs:
        size = {
            'size': collection.id
        }
        collection = collection.to_dict()
        for detail in collection:
            size[detail] = collection[detail]
                
        sizes.append(size)
        
    skiboard['sizes'] = sizes
    skiboards.append(skiboard)

Extracting... Capita DOA (2023)
Extracting... Burton Custom (2022)


In [75]:
print('Extracted {} SkiBoards\n'.format(len(skiboards)))
print(skiboards[0])

Extracted 2 SkiBoards

{'id': '9hGKnaJY92cLF0PQByn6', 'year': 2023, 'asym': False, 'profile': 'Hybrid Camber', 'category': 'Snowboard', 'model': 'DOA', 'updated': DatetimeWithNanoseconds(2023, 3, 31, 4, 44, 56, 646571, tzinfo=datetime.timezone.utc), 'created': DatetimeWithNanoseconds(2023, 3, 31, 4, 44, 56, 646531, tzinfo=datetime.timezone.utc), 'brand': 'Capita', 'flex': '5.5', 'sizes': [{'size': '148', 'effective_edge': '1176', 'stance_width': '24.2', 'setback': '0', 'nose_width': '287', 'tail_width': '287', 'sidecut': '7.6', 'waist_width': '244'}, {'size': '150', 'effective_edge': '1191', 'stance_width': '24.2', 'setback': '0', 'nose_width': '290', 'tail_width': '290', 'sidecut': '7.7', 'waist_width': '246'}, {'size': '152', 'effective_edge': '1207', 'stance_width': '25.2', 'setback': '0', 'nose_width': '292', 'tail_width': '292', 'sidecut': '7.8', 'waist_width': '248'}, {'size': '153W', 'effective_edge': '1185', 'stance_width': '25.2', 'setback': '0', 'nose_width': '299', 'tail_wid

## Connect to ElasticSearch

In [16]:
es_endpoint = "https://gmcbdi8n2l:qd603uuo9b@skiboards-3495559625.us-west-2.bonsaisearch.net:443"
es_user = "gmcbdi8n2l"
es_password = "qd603uuo9b"

In [17]:
es_client = Elasticsearch([es_endpoint], basic_auth=(es_user, es_password))

In [18]:
new_index_name = "skiboards-{}".format(datetime.date.today())
alias = "skiboards"
print("New Index: " + new_index_name + " AKA: " + alias)

New Index: skiboards-2023-04-01 AKA: skiboards


In [19]:
idx_manager = IndicesClient(es_client)

In [20]:
# Create new index
try:
    idx_manager.create(index=new_index_name)
except RequestError:
    print("Error initializing index manager")

# Add alias to new index
try:
    idx_manager.put_alias(new_index_name, alias)
except:
    print("Error adding alias to cluster")

## Upload Firestore data to ElasticSearch

In [76]:
def generate_update_docs():    
    docs = db.collection('SkiBoards').stream()
    skiboards = []

    for doc in docs:
        doc_id = doc.id
        doc = doc.to_dict()

        skiboard = {
            'id': doc_id
        }
        print('Extracting... {} {} ({})'.format(doc['brand'], doc['model'], doc['year']))
        for key in doc:
            skiboard[key] = doc[key]

        sizes = []
        collection_docs = db.collection('SkiBoards').document(doc_id).collection('Sizes').get()
        for collection in collection_docs:
            size = {
                'size': collection.id
            }
            collection = collection.to_dict()
            for detail in collection:
                size[detail] = collection[detail]

            sizes.append(size)

        skiboard['sizes'] = sizes
        yield skiboard
        

In [79]:
successes = 0
print("Processing update...")
print("New Index: " + new_index_name)
for ok, action in streaming_bulk(client=es_client, index=new_index_name, actions=generate_update_docs()):
    successes += ok

Processing update...
New Index: skiboards-2023-04-01
Extracting... Capita DOA (2023)
Extracting... Burton Custom (2022)
