## Similar SkiBoard Finder
---
Find Skis or Snowboards that are similar to another in a specified size
- provide skiboard + size
- request X similar skiboards
- return results as list

In [450]:
import json
import os, re, math
import statistics
import pymysql
import numpy as np
import pandas as pd
from sklearn.preprocessing import MaxAbsScaler
from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import euclidean

In [437]:
# Database Setup
# --------------------------------------------------
def setupdb():
    f = open('../app/config/localdb_config.json')
    dbconfig = json.loads(f.read())
    db = pymysql.connect(host=dbconfig['localhost'], user=dbconfig['username'], password=dbconfig['password'], database=dbconfig['database'])
    f.close()
    return db

### Collect Skiboards from DB
---

In [438]:
def get_skiboards():
    db = setupdb()
    cursor = db.cursor()

    sql = 'SELECT * FROM Sizes LEFT JOIN SkiBoards on SkiBoards.skiboard_id = Sizes.skiboard_id'

    # Search for SkiBoards with 
    try:
        cursor.execute(sql)
        response = cursor.fetchall()
        skiboards = []

        
        for r in response:
            #print(f'{r}\n\n')
            skiboards.append({
                'id': r[0],
                'name': f'{r[15]} {r[16]}',
                'year': r[17],
                'type': r[20],
                'size': r[1],
                'stiffness': r[23],
                'camber_profile': r[26],
                'shape': r[24],
                'nose_width': r[2],
                'waist_width': r[3],
                'tail_width': r[4],
                'sidecut': r[5],
                'effective_edge': r[9]
            })
    except Exception as e:
        print(f'ERROR: {e}')
    
    return pd.DataFrame.from_records(skiboards)

### Manage Sidecut Abnormalities
---

In [439]:
def condense_sidecuts(skiboards):
    for idx, row in skiboards.iterrows():
        
        try:
            # extract list of numbers from 'sidecut' value
            radius_list = re.findall(r'\d+\.?\d*', str(row['sidecut']))
    
            # convert from str to float val
            radius_list = [float(val) for val in radius_list]
    
            # calc the median radius and weight the average calc with this value
            median_radius = statistics.median(radius_list)
            radius_list.append(median_radius)
    
            # calc a weighted average of the sidecut
            radius_av = round(statistics.mean(radius_list), 1)
    
        except Exception as e:
            print(f'ERROR: {e}')
            print(f'{row}\n')
    
        
        # update the value of sidecut with the weighted average
        skiboards.loc[idx, 'sidecut'] = radius_av

    return skiboards

### Calculate Taper 
---

In [479]:
def calc_taper(skiboards):
    for idx, row in skiboards.iterrows():
        if row['nose_width'] and row['tail_width']:
            skiboards.loc[idx, 'taper'] = float(row['nose_width']) - float(row['tail_width'])
            '''
            if skiboards.loc[idx, 'taper'] >= 10:
                print(f"SkiBoard: {skiboards.loc[idx, 'name']}")
                print(f"Nose Width: {skiboards.loc[idx, 'nose_width']}\nTail Width: {skiboards.loc[idx, 'tail_width']}\nTaper: {skiboards.loc[idx, 'taper']}\n\n\n")
           '''
    return skiboards

### Weigting of normalized values
---

In [486]:
def weighted_normalization(skiboards):
    data_cols = ['nose_width', 'waist_width', 'tail_width', 'sidecut', 'effective_edge', 'taper']
    
    weights = {
        'nose_width': 1,
        'waist_width': 1,
        'tail_wdith': 1,
        'sidecut': 1,
        'effective_edge': 1,
        'taper': 1,
    }

    '''
    A = a*np.sqrt(w)
    B = b*np.sqrt(w)
    norm = scipy.spatial.distance.euclidean(A, B)
    '''

    # normalize dataset
    scaler = MaxAbsScaler()
    norm_data = scaler.fit_transform(skiboards[weights.keys()])

    for i, row in enumerate(norm_data):
        for key, weight in weights.items():
            print(norm_data[i])
            norm_data[i] = norm_data[i]
                

    return norm_data

## Find Similar SkiBoards
---

In [487]:
def find_similar(my_skiboard, skiboards):
    # define data / label cols
    
    label_cols = ['id', 'name', 'year']

    skiboards = condense_sidecuts(skiboards)
    skiboards = calc_taper(skiboards)

    # Find the full set index of the comparitor SkiBoard
    my_skiboard_idx = skiboards[(skiboards['id'] == my_skiboard['id'].values[0]) & (skiboards['size'] == my_skiboard['size'].values[0])].index[0]

    # normalize dataset
    scaler = MaxAbsScaler()
    norm_data = scaler.fit_transform(skiboards[data_cols])
    #norm_data = weighted_normalization(norm_data)

    # Normalize Camber dominance of Skiboards
    my_camber = str(my_skiboard.camber_profile).lower()
    if my_camber:
        if 'flat' in my_camber:
            my_camber = 'flat'
        elif 'rocker' in my_camber:
            my_camber = 'rocker'
        else:
            my_camber = 'camber'

    # Normalize shape profile of SkiBoards
    my_shape = str(my_skiboard.shape).lower()
    if my_shape:
        if 'twin' in my_shape:
            my_shape = 'twinish' # includes directional twin or twintip etc
        else:
            my_shape = 'directional'

    # Calculate similarity scores for each SkiBoard
    for i, s in skiboards.iterrows():
        distance = euclidean(norm_data[my_skiboard_idx], norm_data[i])
        skiboards.loc[i, 'similarity'] = distance

        # Adjust for stiffness
        if (s.stiffness.values[0] < my_skiboard.stiffness - 1.5) or (s.stiffness.values[0] > my_skiboard.stiffness + 1.5):
            skiboards.loc[i, 'similarity'] = skiboards.loc[i, 'similarity'] * 0.9
        
        # Adjust for camber profile
        if my_camber not in str(s.camber_profile).lower():
            skiboards.loc[i, 'similarity'] = skiboards.loc[i, 'similarity'] * 0.9

        # Adjust for shape
        if 'twin' in str(s.shape).lower():
            this_shape = 'twinish'
        else:
            this_shape = 'directional'

        if my_shape != this_shape:
                skiboards.loc[i, 'similarity'] = skiboards.loc[i, 'similarity'] * 0.9

        # consider taper instead of nose / tail width
        
        


    return skiboards  

## Find Versions
---

In [488]:
def find_versions(my_skiboard, skiboards, show_original=False, only_newest=False, remove_newest=False):

    # collect all versions of the same skibaord
    versions = skiboards.loc[skiboards['name'] == my_skiboard['name'].values[0]]
    versions = versions.sort_values(['year', 'size'], ascending=[False, True])

    # filter results to include / exclude newest model
    if only_newest:
        newest_id = versions.iloc[0].id
        versions = versions[versions['id'] == newest_id]
    elif remove_newest:
        newest_id = versions.iloc[0].id
        versions = versions[versions['id'] != newest_id]

    # if the selected skiboard is the newest, an empty dataframe will be returned
    if not show_original:
        versions = versions[versions['id'] != my_skiboard.id.values[0]]
        
    return versions

## Filter SkiBoard Type
---

In [489]:
def filter_by_type(my_skiboard, skiboards):
    my_type = my_skiboard['type'].values[0]
    
    return skiboards[skiboards['type'] == my_type]

# Main
---

In [490]:
skiboards = get_skiboards()

In [491]:
skiboards.head()

Unnamed: 0,id,name,year,type,size,stiffness,camber_profile,shape,nose_width,waist_width,tail_width,sidecut,effective_edge
0,1,Burton Custom,2020,Snowboard,150,6.0,Full Camber,Directional Twin,288.1,248.0,288.1,7.4,1135.0
1,1,Burton Custom,2020,Snowboard,154,6.0,Full Camber,Directional Twin,291.5,250.0,291.5,7.6,1175.0
2,1,Burton Custom,2020,Snowboard,154W,6.0,Full Camber,Directional Twin,299.5,258.0,299.5,7.6,1175.0
3,1,Burton Custom,2020,Snowboard,156,6.0,Full Camber,Directional Twin,294.3,252.0,294.3,7.8,1195.0
4,1,Burton Custom,2020,Snowboard,158,6.0,Full Camber,Directional Twin,297.0,254.0,297.0,7.9,1215.0


In [492]:
name = 'Burton Custom'
year = 2020
size = '154W'

my_skiboard = skiboards.loc[(skiboards['name'] == name) & (skiboards['year'] == str(year)) & (skiboards['size'] == size)]
print(my_skiboard.id.values[0])

1


In [493]:
skiboards = find_similar(my_skiboard, skiboards)
versions = find_versions(my_skiboard, skiboards, remove_newest=True, show_original=True)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

Remove exact matches of skiboard from results list

In [447]:
skiboards = skiboards[~skiboards['id'].isin(versions['id'].tolist())]

In [448]:
filter_by_type(my_skiboard, skiboards)

Unnamed: 0,id,name,year,type,size,stiffness,camber_profile,shape,nose_width,waist_width,tail_width,sidecut,effective_edge,similarity
0,1,Burton Custom,2020,Snowboard,150,6.0,Full Camber,Directional Twin,288.1,248.0,288.1,7.4,1135.0,0.066392
1,1,Burton Custom,2020,Snowboard,154,6.0,Full Camber,Directional Twin,291.5,250.0,291.5,7.6,1175.0,0.043396
2,1,Burton Custom,2020,Snowboard,154W,6.0,Full Camber,Directional Twin,299.5,258.0,299.5,7.6,1175.0,0.000000
3,1,Burton Custom,2020,Snowboard,156,6.0,Full Camber,Directional Twin,294.3,252.0,294.3,7.8,1195.0,0.035225
4,1,Burton Custom,2020,Snowboard,158,6.0,Full Camber,Directional Twin,297.0,254.0,297.0,7.9,1215.0,0.037558
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
816,98,Ride Manic,2020,Snowboard,163,3.0,Directional Hybrid Camber,Twin,305.0,257.0,305.0,8.3,1241.0,0.066659
817,99,Ride Heartbreaker,2020,Snowboard,139,3.0,Hybrid Camber,Twin,274.0,234.0,274.0,8.7,1076.0,0.165561
818,99,Ride Heartbreaker,2020,Snowboard,143,3.0,Hybrid Camber,Twin,277.0,236.0,277.0,8.8,1101.0,0.149844
819,99,Ride Heartbreaker,2020,Snowboard,147,3.0,Hybrid Camber,Twin,281.0,238.0,281.0,8.9,1127.0,0.133710


In [449]:
skiboards = skiboards.sort_values(['similarity'], ascending=True)
skiboards.head()

Unnamed: 0,id,name,year,type,size,stiffness,camber_profile,shape,nose_width,waist_width,tail_width,sidecut,effective_edge,similarity
574,45,Burton Custom (Flying V),2020,Snowboard,154w,6.0,Hybrid Rocker,Directional Twin,299.5,258.0,299.5,7.6,1175.0,0.0
2,1,Burton Custom,2020,Snowboard,154W,6.0,Full Camber,Directional Twin,299.5,258.0,299.5,7.6,1175.0,0.0
202,143,Nitro Cinema,2020,Snowboard,159,5.0,Hybrid Rocker,Directional,302.0,256.0,302.0,7.8,1170.0,0.017653
207,144,Nitro Prime Overlay,2020,Snowboard,158,5.0,Flat,Directional,302.0,256.0,302.0,7.8,1170.0,0.017653
208,144,Nitro Prime Overlay,2020,Snowboard,159,5.0,Flat,Directional,302.0,256.0,302.0,7.8,1170.0,0.017653


# TO DO...
---
- add this file to branch and deploy to master

## Improvements
---
- Factor in stiffness rating
- Consider camber profile? Do we simplify this to camber / rocker dominent
- Scale variables by significance

- Remove duplicate results for all other skiboards
- Weight variables based on significacnce during similarity calc

In [396]:
mystr = "abcb"

In [397]:
def recurring_checker(my_str):
    for idx, c in enumerate(mystr):
        if c in list(mystr)[idx:]:
            return c
    return None

In [398]:
print(recurring_checker(mystr))

a


In [480]:
weights = {
        'nose_width': 1,
        'waist_width': 1,
        'tail_wdith': 1,
        'sidecut': 1,
        'effective_edge': 1,
        'taper': 1,
    }

In [483]:
for w in weights.keys():
    print(w)

nose_width
waist_width
tail_wdith
sidecut
effective_edge
taper
