In [15]:
import pickle
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

In [16]:
pkl_file = open('../data/df.pkl', 'rb')
df = pickle.load(pkl_file)
pkl_file.close() 

In [17]:
features = [
            'top_elev_(ft)', 
            'bottom_elev_(ft)', 
            'vert_rise_(ft)', 
            'slope_length_(ft)', 
            'avg_width_(ft)', 
            'slope_area_(acres)', 
            'avg_grade_(%)', 
            'max_grade_(%)', 
            'groomed']

X = df[features].values

In [18]:
def cos_sim_recommendations(trail_name, resort_name, X, n=5, resort=None):
    index = df.index[(df['trail_name'] == trail_name) & (df['resort'] == resort_name)][0]
    trail = X[index].reshape(1,-1)
    cs = cosine_similarity(trail, X)
    rec_index = np.argsort(cs)[0][::-1][1:]
    ordered_df = df.loc[rec_index]
    if resort:
        ordered_df = ordered_df[ordered_df['resort'] == resort]
    rec_df = ordered_df.head(n)
    orig_row = df.loc[[index]].rename(lambda x: 'original')
    total = pd.concat((orig_row,rec_df))
    return total

In [19]:
def eucl_dist_recommendations_resort(trail_name, resort_name, X, n=5, resort=None):
    index = df.index[(df['trail_name'] == trail_name) & (df['resort'] == resort_name)][0]
    trail = X[index].reshape(1,-1)
    ed = euclidean_distances(trail, X)
    rec_index = np.argsort(ed)[0][::-1][1:]
    ordered_df = df.loc[rec_index]
    if resort:
        ordered_df = ordered_df[ordered_df['resort'] == resort]
    rec_df = ordered_df.head(n)
    orig_row = df.loc[[index]].rename(lambda x: 'original')
    total = pd.concat((orig_row,rec_df))
    return total

In [20]:
ss = StandardScaler()
X = ss.fit_transform(X)

In [21]:
cos_sim_recommendations('Sorensen Park','Winter Park',X,n=5)

Unnamed: 0,trail_name,top_elev_(ft),bottom_elev_(ft),vert_rise_(ft),slope_length_(ft),avg_width_(ft),slope_area_(acres),avg_grade_(%),max_grade_(%),ability_level,resort,location,groomed,colors,ability_nums,color_nums
original,Sorensen Park,9150.0,9090.0,60.0,1067.43,114.0,4.07,6.0,8.0,Beginner,Winter Park,CO,1,green,1,1
932,Shoo Fly,9580.0,9450.0,130.0,1258.66,112.0,5.88,10.0,14.0,Beginner,Winter Park,CO,1,green,1,1
314,Little Hawk TRV,9369.0,9348.0,21.0,821.0,67.0,1.3,3.0,8.0,Beginner,Eldora,CO,1,green,1,1
852,Bobcat,9490.0,9400.0,90.0,822.96,137.0,1.53,11.0,9.0,Novice,Winter Park,CO,1,green,2,1
308,Fox Tail,9511.0,9456.0,56.0,749.0,61.0,1.0,7.0,12.0,Novice,Eldora,CO,1,green,2,1
317,Sunkid Slope,9361.0,9354.0,8.0,157.0,67.0,0.2,6.0,6.0,Beginner,Eldora,CO,1,blue,1,2


In [22]:
eucl_dist_recommendations_resort('Sorensen Park','Winter Park',X,n=5,resort=None)

Unnamed: 0,trail_name,top_elev_(ft),bottom_elev_(ft),vert_rise_(ft),slope_length_(ft),avg_width_(ft),slope_area_(acres),avg_grade_(%),max_grade_(%),ability_level,resort,location,groomed,colors,ability_nums,color_nums
original,Sorensen Park,9150.0,9090.0,60.0,1067.43,114.0,4.07,6.0,8.0,Beginner,Winter Park,CO,1,green,1,1
1021,Cinch,10901.0,8108.0,2793.0,30653.0,38.0,26.5,9.0,13.0,Novice,Beaver Creek,CO,1,green,2,1
1007,Alphabet Chutes,11700.0,11350.0,350.0,694.98,2117.26,33.78,58.0,236.0,Expert,Winter Park,CO,0,bb,6,4
583,Inner MongoliaBowl,11495.0,10226.0,1269.1,3877.6,2187.0,194.6,33.0,58.0,Advanced,Vail,CO,0,black,5,3
1018,Stone Creek Chutes,10900.0,9420.0,1480.0,7946.0,987.0,180.0,32.0,85.0,Expert,Beaver Creek,CO,0,bb,6,4
1001,Belle Fourche,11400.0,10740.0,660.0,2101.48,2491.12,120.18,33.0,51.0,Expert,Winter Park,CO,0,black,6,3


In [23]:
'''least similar'''
cs = cosine_similarity(X[0].reshape(1,-1), X)[0]
css = list(enumerate(cs))
srtd = sorted(css, key=lambda x: x[1])[::-1]
srtd[-5:]

[(333, -0.9410708817093416),
 (308, -0.9413197094749598),
 (932, -0.9466683118520665),
 (314, -0.9581932537348332),
 (848, -0.9661414996247434)]

In [24]:
df.iloc[[0,673,821,848,314,403]]

Unnamed: 0,trail_name,top_elev_(ft),bottom_elev_(ft),vert_rise_(ft),slope_length_(ft),avg_width_(ft),slope_area_(acres),avg_grade_(%),max_grade_(%),ability_level,resort,location,groomed,colors,ability_nums,color_nums
0,Over the Rainbow,11924.0,10858.0,1066.0,2404.0,319.0,17.6,50.0,69.0,Expert,Loveland,CO,0,bb,6,4
673,Bubba’s Shortcut Upper,10335.0,10238.0,97.0,1200.0,118.0,3.3,8.0,11.0,Novice,Crested Butte,CO,1,green,2,1
821,School Yard,6799.0,6732.0,66.0,980.0,129.0,2.9,7.0,9.0,Beginner,Diamond Peak,NV,1,green,1,1
848,Sorensen Park,9150.0,9090.0,60.0,1067.43,114.0,4.07,6.0,8.0,Beginner,Winter Park,CO,1,green,1,1
314,Little Hawk TRV,9369.0,9348.0,21.0,821.0,67.0,1.3,3.0,8.0,Beginner,Eldora,CO,1,green,1,1
403,Lakeview to Weasel,7538.0,7483.0,55.0,1282.0,106.0,3.1,4.0,18.0,Novice,Alpine Meadows,CA,1,blue,2,2


In [25]:
color = ['green','blue']

In [26]:
def cos_sim_recs(index, n=5, resort=None, color=None):
    trail = X[index].reshape(1,-1)
    cs = cosine_similarity(trail, X)
    rec_index = np.argsort(cs)[0][::-1][1:]
    ordered_df = df.loc[rec_index]
    if resort:
        ordered_df = ordered_df[ordered_df['resort'] == resort]
    if color:
        ordered_df = ordered_df[ordered_df['colors'].isin(color)]
    rec_df = ordered_df.head(n)
    rec_df = rec_df.reset_index(drop=True)
    rec_df.index = rec_df.index+1
    orig_row = df.loc[[index]].rename(lambda x: 'original')
    total = pd.concat((orig_row,rec_df))
    return total

In [27]:
cos_sim_recs(901,n=10,color=color)

Unnamed: 0,trail_name,top_elev_(ft),bottom_elev_(ft),vert_rise_(ft),slope_length_(ft),avg_width_(ft),slope_area_(acres),avg_grade_(%),max_grade_(%),ability_level,resort,location,groomed,colors,ability_nums,color_nums
original,Whistle Stop Lower,10800.0,10640.0,160.0,1876.64,79.0,4.74,9.0,13.0,Novice,Winter Park,CO,1,green,2,1
1,Upper White Rabbit,10800.0,10600.0,200.0,1902.59,124.0,4.52,11.0,16.0,Novice,Winter Park,CO,1,blue,2,2
2,Minor Matter,10677.0,10479.0,198.0,1723.0,100.0,4.0,12.0,18.0,Novice,Copper,CO,1,green,2,1
3,Gunbarrel,10670.0,10450.0,220.0,1991.16,50.0,4.0,11.0,12.0,Low Intermediate,Winter Park,CO,1,green,3,1
4,Tango Road,11121.0,10999.0,123.0,1297.0,97.0,2.9,10.0,16.0,Novice,Loveland,CO,1,green,2,1
5,Silver Queen Road,11242.0,11074.0,168.0,2050.0,36.0,1.7,8.0,16.0,Low Intermediate,Crested Butte,CO,1,blue,3,2
6,I-Way,10735.0,10638.0,97.0,1063.0,64.0,1.6,9.0,19.0,Novice,Copper,CO,1,green,2,1
7,Chisholm,11130.0,11003.0,127.0,1488.0,41.0,1.4,9.0,18.0,Novice,Arapahoe Basin,CO,1,green,2,1
8,Tin Pants Catwalk,11023.0,10971.0,51.3,681.6,38.0,0.6,8.0,10.0,Beginner,Vail,CO,1,green,1,1
9,Peanut,10320.0,10165.0,156.0,1833.0,35.0,1.5,9.0,15.0,Novice,Crested Butte,CO,1,green,2,1
