# ElasticSearch Upload
---
- Connect to Bonsai ES client
- Create cluster / shards if non existant 
- Add alias to cluser 
- Connect to Firestore
- Extract SkiBoards from Firestore
- Format / Validate SkiBoard objects
- Upload data to ElasticSearch

In [54]:
# https://docs.bonsai.io/article/102-python
import io, sys, os, base64, re, logging
import json
import datetime
import pymysql

from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch.helpers import streaming_bulk

import firebase_admin
from firebase_admin import credentials, firestore

# SkiBoard
---

In [53]:
import os.path
import logging
import json
import pytz
from datetime import datetime
from operator import itemgetter
from difflib import SequenceMatcher

# Application Imports
# --------------------------------------------------
#from application.core import setupdb

# Infrastructure Imports
# --------------------------------------------------
from flask import session
from firebase_admin import firestore
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient



__author__ = 'liamkenny'

unit_names = {
    'size':             ['size', 'length'],
    'nose_width':       ['nose width', 'tip width'],
    'waist_width':      ['waist width'],
    'tail_width':       ['tail width'],
    'sidecut':          ['sidecut', 'sidecut radius', 'turning radius'],
    'effective_edge':   ['effective edge', 'running length'],
    'setback':          ['stance setback'],
    'stance width':     ['stance width', 'stance range'],
    'profile':          ['bend', 'profile'],
    'flex':             ['flex', 'stiffness'],
    'asym':             ['asym', 'asymetric']
}

profile_types = {
    'full_camber': 'url_for_img',
    'hybrid_camber': 'url_for_img',
    'directional_camber': 'url_for_img',
    'flat': 'url_for_img',
    'directional_flat': 'url_for_img',
    'hybrid_rocker': 'url_for_img',
    'full_rocker': 'url_for_img'
}

param_names = ['size', 'nose_width', 'waist_width', 'tail_width', 'sidecut', 'effective_edge', 'setback', 'stance_width']

# Connect ElasticSearch credentials
f = open(sys.path[0] + '/../application/config/bonsai_config.json')
es_config = json.load(f)

# --------------------------------------------------
# Match Param                        F U N C T I O N
# --------------------------------------------------
def match_param(param):

    # Calculate best similarity score for each unit name
    match_scores = {}
    for unit in unit_names:
        match_scores[unit] = 0
        for option in unit_names[unit]:
            similarity = SequenceMatcher(None, param.replace('_', ' '), option).ratio()
            #logging.info("param: {} - comparedto: {} - score: {}".format(param, option, similarity))
            if similarity > match_scores[unit]:
                match_scores[unit] = similarity
            
    matched = max(match_scores, key=match_scores.get)
    confidence = match_scores[max(match_scores)]
    #logging.info("Parameter Matching for: {} - best match: {}\n{}".format(param, matched, match_scores))

    return matched, confidence

# ==================================================
# S K I B O A R D                          C L A S S
# ==================================================
class SkiBoard():

    # If a SkiBoard has an ID of 0 it has not been saved in the database
    def __init__(self, skiboard_id, brand, model, year, name, slug, category, family=None, description=None, stiffness=None, shape=None, flex_profile=None, camber_profile=None, camber_details=[], core=None, core_profiling=None, fibreglass=None, laminates=[], resin=None, base=None, edge_tech=None, topsheet=None, sidewall=None, inserts=None, asym=False, weight=0, womens=False, youth=False, url=None):
        self.id = skiboard_id
        self.brand = brand
        self.model = model
        self.year = year
        self.name = name
        self.slug = slug
        self.category = category
        self.description = description
        self.stiffness = stiffness
        self.shape = shape
        self.family = family
        self.flex_profile = flex_profile
        self.camber_profile = camber_profile
        self.camber_details = camber_details
        self.core = core
        self.core_profiling = core_profiling
        self.fibreglass = fibreglass
        self.laminates = laminates
        self.resin = resin
        self.base = base
        self.edge_tech = edge_tech
        self.topsheet = topsheet
        self.sidewall = sidewall
        self.inserts = inserts
        self.asym = asym
        self.weight = weight
        self.womens = womens
        self.youth = youth
        self.url = url

    # --------------------------------------------------
    # Is Duplicate                       F U N C T I O N
    # --------------------------------------------------
    def is_duplicate(self):
        db = setupdb()
        cursor = db.cursor()

        # Search for SkiBoards with 
        try:
            logging.info("Checking for Duplicate SkiBoards: {} {} ({})".format(self.brand, self.model, self.year))
            sql = """SELECT skiboard_id FROM SkiBoards WHERE brand = '{}' AND model = '{}' AND year = '{}'""".format(self.brand, self.model, self.year)
            cursor.execute(sql)
            result = cursor.fetchone()
            logging.info("Duplicate Found: {}".format(result))
        except Exception as e:
            logging.error(e)

        if result:
            return True

        return False

    # --------------------------------------------------
    # Get Item                           F U N C T I O N
    # --------------------------------------------------
    @classmethod
    def get(cls, id=None, brand=None, model=None, year=None, slug=None):
        
        db = setupdb()
        cursor = db.cursor()

        if id:
            try:
                logging.info("Getting SkiBoard from ID: {}".format(id))
                sql = f"""SELECT * FROM SkiBoards WHERE skiboard_id = '{id}'"""
                cursor.execute(sql)
                result = cursor.fetchone()
                logging.info("Result: {}".format(result))
            except Exception as e:
                logging.error(e)
                return None
            
        elif brand and model and year:
            try:
                logging.info("Getting SkiBoard by B-M-Y: {} {} ({})".format(brand, model, year))
                sql = f"""SELECT * FROM SkiBoards WHERE brand = '{brand}' AND model = '{model}' AND year = '{year}'"""
                cursor.execute(sql)
                result = cursor.fetchone()
                logging.info("Result: {}".format(result))
            except Exception as e:
                logging.error(e)
                return None
        elif slug:
            try:
                sql = f"""SELECT * FROM SkiBoards WHERE slug = '{slug}'"""
                cursor.execute(sql)
                result = cursor.fetchone()
            except Exception as e:
                logging.error(f"Could not get SkiBoard from Slug:\n{e}")
                return None
            
        if not result:
            return None
        
        # Map DB Result to User Object
        skiboard = SkiBoard(
            skiboard_id=result[0], 
            url=result[1], 
            brand=result[2], 
            model=result[3], 
            year=result[4], 
            name=result[5],
            slug=result[6],
            category=result[7],
            family=result[8],
            description=result[9],
            stiffness=result[10],
            flex_profile=result[11],
            camber_profile=result[12],
            camber_details=result[13],
            core=result[14],
            core_profiling=result[15],
            fibreglass=result[16],
            laminates=result[17],
            resin=result[18],
            base=result[19],
            edge_tech=result[20],
            topsheet=result[21],
            sidewall=result[22],
            inserts=result[23],
            asym=result[24],
            weight=result[25],
            womens=result[26],
            youth=result[27]
        )
        
        return skiboard


    # --------------------------------------------------
    # Save SkiBoard                      F U N C T I O N
    # --------------------------------------------------
    def save(self):
        db = setupdb()
        cursor = db.cursor()

        try:
            if self.id:
                print("Updating SkiBoard...")
                sql = f"""REPLACE INTO SkiBoards (skiboard_id, url, brand, model, year, name, slug, category, family, description, stiffness, flex_profile, camber_profile, camber_details, core, laminates, base, sidewall, weight, youth, updated) 
                values(
                '{str(self.id)}'
                '{str(self.url)}', 
                '{str(self.brand)}',  
                '{str(self.model)}', 
                '{str(self.year)}',
                '{str(self.brand)} {str(self.model)} {str(self.year)}',
                '{str(self.brand).lower()}-{str(self.model).lower()}-{str(self.year)}',
                '{str(self.category)}', 
                '{str(self.family)}', 
                '{str(self.description)}', 
                {float(self.stiffness)}, 
                '{str(self.flex_profile)}', 
                '{str(self.camber_profile)}', 
                '{'~'.join(str(i) for i in self.camber_details)}', 
                '{str(self.core)}', 
                '{'~'.join(str(i) for i in self.laminates)}', 
                '{str(self.base)}', 
                '{str(self.sidewall)}', 
                {float(self.weight)}, 
                {bool(self.youth)}, 
                '{datetime.now(pytz.timezone('Canada/Pacific')).strftime("%Y/%m/%d %H:%M:%S")}' )"""
                
            else:
                print("Creating SkiBoard...")
                sql = f"""INSERT INTO SkiBoards (url, brand, model, year, name, slug, category, family, description, stiffness, flex_profile, camber_profile, camber_details, core, laminates, base, sidewall, weight, youth, created, updated) 
                values(
                '{str(self.url)}', 
                '{str(self.brand)}', 
                '{str(self.model)}', 
                '{str(self.year)}', 
                '{str(self.brand)} {str(self.model)} {str(self.year)}',
                '{str(self.brand).lower()}-{str(self.model).lower()}-{str(self.year)}',
                '{str(self.category)}', 
                '{str(self.family)}', 
                '{str(self.description)}', 
                {float(self.stiffness)}, 
                '{str(self.flex_profile)}', 
                '{str(self.camber_profile)}', 
                '{'~'.join(str(i) for i in self.camber_details)}', 
                '{str(self.core)}', 
                '{'~'.join(str(i) for i in self.laminates)}', 
                '{str(self.base)}', 
                '{str(self.sidewall)}', 
                {float(self.weight)}, 
                {int(bool(self.youth))}, 
                '{datetime.now(pytz.timezone('Canada/Pacific')).strftime("%Y/%m/%d %H:%M:%S")}',
                '{datetime.now(pytz.timezone('Canada/Pacific')).strftime("%Y/%m/%d %H:%M:%S")}')"""
                
            
            print(f"About to execute SQL: {sql}")
            cursor.execute(sql)
            db.commit()
             
        except Exception as e:
            logging.error("Could not create new SkiBoard:\n{}".format(e))
            print("Could not create new SkiBoard: {}".format(e))
            return False

        logging.info("Saved SkiBoard:\nBrand: {}\nModel: {} Year: ({})".format(self.brand, self.model, self.year))

        # ToDo...
        # Update ElasticSearch
        '''
        successes = 0
        logging.info("Uploading SkiBoard to ElasticSearch")
        es.update(
            id=self.id,
            index='SkiBoards',
            document=self.__dict__
        )   
        '''
        

        return True
    
    # --------------------------------------------------
    # Search Database                    F U N C T I O N
    # -------------------------------------------------- 
    @classmethod
    def search_db(cls, query_string):
        
        db = setupdb()
        cursor = db.cursor()

        try:
            #logging.info("Searching for SkiBoard: {}".format(query_string))
            print(f"Searching for SkiBoard: {query_string}")
            sql = f"SELECT * FROM SkiBoards WHERE MATCH(name) AGAINST('{query_string}' IN NATURAL LANGUAGE MODE)"
            # sql = f"SELECT * FROM SkiBoards WHERE MATCH(name) AGAINST('{query_string}' WITH QUERY EXPANSION)"
            cursor.execute(sql)
            response = cursor.fetchall()
            logging.info("Response: {}".format(response))
            print(f"Response: {response}")
        except Exception as e:
            logging.error(e)
            return None
    
        results = []
        for r in response:
            logging.info(f"Extracting skiboard from result: \n{r}")
            # Map DB Result to User Object
            result = SkiBoard(
                skiboard_id=r[0], 
                url=r[1], 
                brand=r[2], 
                model=r[3], 
                year=r[4], 
                name=r[5],
                slug=r[6],
                category=r[7],
                family=r[8],
                description=r[9],
                stiffness=r[10],
                flex_profile=r[11],
                camber_profile=r[12],
                camber_details=r[13],
                core=r[14],
                core_profiling=r[15],
                fibreglass=r[16],
                laminates=r[17],
                resin=r[18],
                base=r[19],
                edge_tech=r[20],
                topsheet=r[21],
                sidewall=r[22],
                inserts=r[23],
                asym=r[24],
                weight=r[25],
                womens=r[26],
                youth=r[27]
            )

            logging.info(f"Results: \n{results.__dict__}")
            results.append(result)

        return results
    


    # --------------------------------------------------
    # Update ElasticSearch               F U N C T I O N
    # --------------------------------------------------
    def update_es(id, skiboard, es_index='skiboards'):

        if not skiboard:
            return False

        # Connect to ElasticSearch
        es_client = Elasticsearch([es_config['url']], basic_auth=(es_config['key'], es_config['secret']))
        idx_manager = IndicesClient(es_client)
        active_index = list(idx_manager.get(es_index).keys())[0]

        # Create or Update document
        if not es_client.exists(index=es_index, id=id):
            return es_client.index(index=active_index, id=id, body=skiboard.__dict__)

        return es_client.update(index=active_index, id=id, document=skiboard)
       
    
    # --------------------------------------------------
    # Search ElasticSearch               F U N C T I O N
    # --------------------------------------------------
    @classmethod
    def search_es(query, es_index='skiboards'):
         # Connect to ElasticSearch
        es_client = Elasticsearch([es_config['url']], basic_auth=(es_config['key'], es_config['secret']))
        idx_manager = IndicesClient(es_client)
        active_index = list(idx_manager.get(es_index).keys())[0]

        search_body = {
            "query": {
                "multi_match": {
                    "query": query,
                    "type": "bool_prefix",
                    "fields": [
                        "name",
                        "brand",
                        "model"
                    ]
                }
            }
        }
        
        logging.info("Querying elasticsearch index {}: \n{}".format(active_index, search_body))
        resp = es_client.search(index=active_index, body=search_body)
        logging.info("ElasticSearch response: {}".format(resp))
        res = []
        try:
            for hit in resp['hits']['hits']:
                skiboard = SkiBoard(
                    skiboard_id = hit['_source']['skiboard_id'], 
                    url = hit['_source']['url'], 
                    brand = hit['_source']['brand'], 
                    model = hit['_source']['model'], 
                    year = hit['_source']['year'], 
                    name = hit['_source']['name'],
                    slug = hit['_source']['slug'],
                    category = hit['_source']['category'],
                    family = hit['_source']['family'],
                    description = hit['_source']['description'],
                    stiffness = hit['_source']['stiffness'],
                    flex_profile = hit['_source']['flex_profile'],
                    camber_profile = hit['_source']['camber_profile'],
                    camber_details = hit['_source']['camber_details'],
                    core = hit['_source']['core'],
                    core_profiling = hit['_source']['core_profile'],
                    fibreglass = hit['_source']['fibreglass'],
                    laminates = hit['_source']['laminates'],
                    resin = hit['_source']['resin'],
                    base = hit['_source']['base'],
                    edge_tech = hit['_source']['edge_tech'],
                    topsheet = hit['_source']['topsheet'],
                    sidewall = hit['_source']['sidewall'],
                    inserts = hit['_source']['inserts'],
                    asym = hit['_source']['asym'],
                    weight = hit['_source']['weight'],
                    womens = hit['_source']['womens'],
                    youth = hit['_source']['youth']
                )

                res.append(skiboard)
        except Exception as e:
            logging.error("Error extracting hits from ElasticSearch: {}".format(e))
            
        logging.info("ElasticSearch:\nQuery: {}\nResponse: {}".format(query, res))

        return res

    # --------------------------------------------------
    # Calculate Comparisons              F U N C T I O N
    # --------------------------------------------------
    def calc_comparisons():
        total_comparisons = 0
        logging.info("Session: {}".format(session))
        if 'compare' in session and session['compare']:
            for key in session['compare']:
                total_comparisons += len(session['compare'][key])

        return "[ {} ]".format(total_comparisons)


    # --------------------------------------------------
    # Extract Params from Text           F U N C T I O N
    # --------------------------------------------------
    def extract_params_from_text(raw_input):
        # Initialise empty dictionaries
        sizes = 0
        params = {}
        param_units = {}

        logging.info("Extracting params from raw input")

        # Itterate throguh each row
        for row in raw_input.split('\n'):
            split_row = row.split(" ")

            # Extract name from row
            row_name = " ".join(split_row[:-1])

            logging.info("Split row: {}".format(split_row))
            logging.info("Row name: {}".format(row_name))

            # Extract unit from row
            if '(' in str(row):
                units = str(row)[row.find('(') +1:row.find(')')]
            else:
                units = None

            # Extract values from row
            values = split_row[-1].split('\t')[1:]

            # Remove rogue characters
            for x, v in enumerate(values):
                values[x] = v.replace('\r', '')

            # Create dict for params and units
            params[row_name.replace(' ', '_').lower()] = values
            param_units[row_name.replace(' ', '_').lower()] = units

            sizes += 1
            
        return params, param_units, sizes


    # --------------------------------------------------
    # Format Params                      F U N C T I O N
    # --------------------------------------------------
    def format_params(unformatted, units):
        formatted_data = {}
        formatted_units = {}
        data_confidence = {}

        # Populate formatted dictionaries with given data
        for key in unformatted:
            matched, confidence = match_param(key)
            if matched not in data_confidence or (matched in data_confidence and confidence > data_confidence[matched]):
                #logging.info("Updating param matching...\nKey: {} - Confidence: {}\nDict: {}".format(matched, confidence, data_confidence))
                data_confidence[matched] = confidence
                formatted_data[matched] = unformatted[key]
                formatted_units[matched] = units[key]

        return formatted_data, formatted_units


    # --------------------------------------------------
    # Describe                           F U N C T I O N
    # --------------------------------------------------
    def describe():
        return {'profile_types': profile_types, 'unit_names': unit_names, 'param_names': param_names}


## Connect to DB

In [43]:
# Database Setup
# --------------------------------------------------
def setupdb():
    f = open('../application/config/localdb_config.json')
    dbconfig = json.loads(f.read())
    db = pymysql.connect(host=dbconfig['localhost'], user=dbconfig['username'], password=dbconfig['password'], database=dbconfig['database'])
    f.close()
    return db

In [44]:
db = setupdb()
cursor = db.cursor()

In [45]:
sql = "SELECT * FROM SkiBoards"
cursor.execute(sql)
results = cursor.fetchall()

In [46]:
skiboards = []

for r in results:
    sql = f"SELECT * FROM Sizes WHERE skiboard_id = {r[0]}"
    cursor.execute(sql)
    size_results = cursor.fetchall()
    sizes = []
    
    # Extract info from each size of a single skiboard
    for s in size_results:
        size = {
            'size': s[1],
            'nose_width': s[2],
            'waist_width': s[3],
            'tail_width': s[4],
            'sidecut': s[5],
            'setback': s[6],
            'factory_mounting_point': s[7],
            'freestyle_mounting_point': s[8],
            'effective_edge': s[9],
            'created': s[10],
            'updated': s[11]
        }
        sizes.append(size)
        
    skiboard = {
        'skiboard_id': r[0],
        'url': r[1],
        'brand': r[2],
        'model': r[3],
        'year': r[4],
        'name': r[5],
        'slug': r[6],
        'category': r[7],
        'family': r[8],
        'description': r[9],
        'stiffness': r[10],
        'shape': r[11],
        'flex_profile': r[12],
        'camber_profile': r[13],
        'camber_details': r[14],
        'core': r[15],
        'core_profiling': r[16],
        'fibreglass': r[17],
        'laminates': r[18],
        'resin': r[19],
        'base': r[20],
        'edge_tech': r[21],
        'topsheet': r[22],
        'sidewall': r[23],
        'inserts': r[24],
        'asym': r[25],
        'weight': r[26],
        'womens': r[27],
        'youth': r[28],
        'created': r[29],
        'updated': r[30],
        'sizes': sizes
    }
    skiboards.append(skiboard)

In [47]:
print('Extracted {} SkiBoards\n'.format(len(skiboards)))
print(skiboards[0])

Extracted 79 SkiBoards

{'skiboard_id': 1, 'url': '', 'brand': 'Burton', 'model': 'Custom', 'year': '2020', 'name': 'Burton Custom 2020', 'slug': 'burton-custom-2020', 'category': 'Snowboard', 'family': '', 'description': 'There are many imitators, but the Burton Custom Snowboard is the original beast of many burdens. Accept no substitute, the Custom has been evolving towards versatile perfection since 1996. Maybe you have seen a few of them out there, blasting down groomers, popping high into the air effortlessly, and still out there shredding the resort when the brights come on. Poppy, fast, responsive, and powerful, there are no signs that the legend of the Burton Custom Snowboard will ever slow down.', 'stiffness': 6.0, 'shape': 'Directional Twin', 'flex_profile': 'Twin', 'camber_profile': 'Full Camber', 'camber_details': None, 'core': 'Super Fly II™ 700G Core', 'core_profiling': None, 'fibreglass': None, 'laminates': '45° Carbon Highlights', 'resin': 'Super Sap® Epoxy', 'base': 'S

## Connect to ElasticSearch

In [25]:
f = open(sys.path[0] + '/../application/config/bonsai_config.json')

In [26]:
es_config = json.load(f)

In [27]:
es_endpoint = es_config['url']
es_user = es_config['key']
es_password = es_config['secret']

In [28]:
es_client = Elasticsearch([es_endpoint], basic_auth=(es_user, es_password))

In [61]:
index = list(es_client.indices.get_alias(index="*"))[0]
print(index)

skiboards-2024-02-21


In [29]:
new_index_name = "skiboards-{}".format(datetime.date.today())
alias = "skiboards"
print("New Index: " + new_index_name + " AKA: " + alias)

New Index: skiboards-2024-02-21 AKA: skiboards


In [30]:
idx_manager = IndicesClient(es_client)

In [31]:
# Create new index
try:
    idx_manager.create(index=new_index_name)
except RequestError:
    print("Error initializing index manager")

# Add alias to new index
try:
    idx_manager.put_alias(new_index_name, alias)
except:
    print("Error adding alias to cluster")

## Upload Firestore data to ElasticSearch

In [34]:
def generate_update_docs(skiboards):    
    for doc in skiboards:
        yield doc
        

In [35]:
successes = 0
print("Processing update...")
print("New Index: " + new_index_name)
for ok, action in streaming_bulk(client=es_client, index=new_index_name, actions=generate_update_docs(skiboards)):
    successes += ok

Processing update...
New Index: skiboards-2024-02-21


## Search

In [36]:
query = "burton custom"

In [37]:
search_body = {
        "query": {
            "multi_match": {
                "query": query,
                "type": "bool_prefix",
                "fields": [
                    "brand",
                    "model",
                    "year"
                ]
            }
        }
    }
res = es_client.search(index=new_index_name, body=search_body)

In [38]:
print(res)

{'took': 360, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 45, 'relation': 'eq'}, 'max_score': 1.5643144, 'hits': [{'_index': 'skiboards-2024-02-21', '_type': '_doc', '_id': 'WomUz40BU8zon6yuVocV', '_score': 1.5643144, '_source': {'skiboard_id': 1, 'url': '', 'brand': 'Burton', 'model': 'Custom', 'year': '2020', 'name': 'Burton Custom 2020', 'slug': 'burton-custom-2020', 'category': 'Snowboard', 'family': '', 'description': 'There are many imitators, but the Burton Custom Snowboard is the original beast of many burdens. Accept no substitute, the Custom has been evolving towards versatile perfection since 1996. Maybe you have seen a few of them out there, blasting down groomers, popping high into the air effortlessly, and still out there shredding the resort when the brights come on. Poppy, fast, responsive, and powerful, there are no signs that the legend of the Burton Custom Snowboard will ever slow down.', 'stiff

In [49]:
SkiBoard.search_es('burton')

NotFoundError: NotFoundError(404, 'index_not_found_exception', 'no such index [burton]', burton, index_or_alias)

## Delete Index

In [150]:
try:
    idx_manager.delete(index=new_index_name)
except:
    print("No index found with alias: {}".format(new_index_name))