In [27]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
import warnings
warnings.filterwarnings('ignore')
#MIGHT HAVE TO ENCODE THE BINNING OF THE LENGTHS SO THIS CAN BE A 
#PARAMETER FOR A COLD START

In [28]:
US_trails = pd.read_csv('../data/US_trails_engineered.csv')

In [29]:
US_trails.columns

Index(['name', 'location', 'difficulty', 'length', 'ascent', 'descent',
       'stars', 'latitude', 'longitude', 'summary', 'url',
       'difficulty_encoded', 'category', 'type_Featured Ride', 'type_Trail',
       'city/town', 'state', 'length_range'],
      dtype='object')

In [30]:
US_trails.shape

(20691, 18)

In [31]:
US_trails.dtypes

name                   object
location               object
difficulty             object
length                float64
ascent                  int64
descent                 int64
stars                 float64
latitude              float64
longitude             float64
summary                object
url                    object
difficulty_encoded      int64
category               object
type_Featured Ride      int64
type_Trail              int64
city/town              object
state                  object
length_range           object
dtype: object

In [32]:
#I don't need the latitude and longitude columns since I'll be filtering on those
columns_to_scale = list(US_trails.select_dtypes(exclude=['object']).columns.difference(['latitude', 'longitude']))
columns_to_scale

['ascent',
 'descent',
 'difficulty_encoded',
 'length',
 'stars',
 'type_Featured Ride',
 'type_Trail']

In [33]:
#this scales each column individually, BUT REALLY WHAT IS IT DOING?
for col in columns_to_scale:
    US_trails[col + "_scaled"] = preprocessing.scale(US_trails[col])

In [34]:
'''
#THIS DOES THE SAME AS ABOVE, EXCEPT THAT IT REWRITES ALL THE QUANTITATIVE COLUMNS WITH THEIR SCALED VALUES
scaler = preprocessing.StandardScaler()
scaler.fit(US_trails[quantitative_columns])
US_trails[quantitative_columns] = scaler.transform(US_trails[quantitative_columns])
US_trails.head()
'''

'\n#THIS DOES THE SAME AS ABOVE, EXCEPT THAT IT REWRITES ALL THE QUANTITATIVE COLUMNS WITH THEIR SCALED VALUES\nscaler = preprocessing.StandardScaler()\nscaler.fit(US_trails[quantitative_columns])\nUS_trails[quantitative_columns] = scaler.transform(US_trails[quantitative_columns])\nUS_trails.head()\n'

In [35]:
US_trails.columns

Index(['name', 'location', 'difficulty', 'length', 'ascent', 'descent',
       'stars', 'latitude', 'longitude', 'summary', 'url',
       'difficulty_encoded', 'category', 'type_Featured Ride', 'type_Trail',
       'city/town', 'state', 'length_range', 'ascent_scaled', 'descent_scaled',
       'difficulty_encoded_scaled', 'length_scaled', 'stars_scaled',
       'type_Featured Ride_scaled', 'type_Trail_scaled'],
      dtype='object')

In [36]:
scaled_cols = [col for col in US_trails.columns if 'scaled' in col]
scaled_cols
#US_trails_scaled = US_trails.filter(regex='scaled')
#US_trails_scaled.head()

['ascent_scaled',
 'descent_scaled',
 'difficulty_encoded_scaled',
 'length_scaled',
 'stars_scaled',
 'type_Featured Ride_scaled',
 'type_Trail_scaled']

In [37]:
Big_X = US_trails[scaled_cols].values
Big_X

array([[ 2.948716  , -2.79569706,  2.40444594, ...,  1.47345507,
         2.6075075 , -2.6075075 ],
       [-0.09257181,  0.12246069,  1.6195909 , ...,  0.44678903,
         2.6075075 , -2.6075075 ],
       [-0.1825315 ,  0.20147031, -0.7349742 , ..., -0.57987702,
         2.6075075 , -2.6075075 ],
       ..., 
       [ 0.86338537, -0.22996377, -0.7349742 , ..., -0.57987702,
        -0.38350801,  0.38350801],
       [-0.12617121, -0.05635054, -0.7349742 , ..., -0.57987702,
        -0.38350801,  0.38350801],
       [ 2.32333359, -1.441098  ,  0.04988084, ..., -0.57987702,
        -0.38350801,  0.38350801]])

In [38]:
#this function will take in a state you ridden in, a trail you rode on, and the state you want to ride in and recommend
def cos_sim_recommendations(trail_name, state_name, scaled_features, n = 5, desired_state = None, desired_location= None):
    index = US_trails.index[(US_trails['name'] == trail_name) & (US_trails['state']==state_name)][0]
    #index is the index of the given trail in the main df based on it's name and state
    trail = scaled_features[index].reshape(1,-1)
    #trail is the scaled features of the given trail reshaped into a 1 row array
    cs = cosine_similarity(trail, scaled_features)
    #cs is the array of cosine similarity measurements of the trail compared to the scaled_features of the other trails
    rec_index = np.argsort(cs)[0][::-1][1:]
    #rec_index is an array of the indices of the cosine similarity measurements ordered by greatest to least, leaving out the given trail
    ordered_df = US_trails.loc[rec_index]
    #ordered_df gives the trails of US_trails ordered by the trails most similar to least similar
    if desired_state:
        ordered_df = ordered_df[ordered_df['state']== desired_state]
        #if a desired state to ride in is given, the dataframe is returned with trails only in that state
    if desired_location:
        ordered_df = ordered_df[ordered_df['location']== desired_location]
        #if a desired location to ride in is given, the dataframe is returned with trails only in that location
    rec_df = ordered_df.head(n)
    #rec_df is a dataframe with the number of desired trails to be recommended
    orig_row = US_trails.loc[[index]].rename(lambda x: 'original')
    #orig_row renames the index of the original trail given as 'original'
    total = pd.concat([orig_row, rec_df])
    #this combines the original row labeled as original row and the n recommended trails that are most similar to the original
    return total
    

In [39]:
cos_sim_recommendations("Mount Falcon and Lair O' the Bear Loop", 'Colorado', Big_X, n = 5, desired_state = 'Colorado', desired_location = 'Durango')

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,Mount Falcon and Lair O' the Bear Loop,"Morrison, Colorado",Blue/Black,19.3,2655,-2660,3.6,39.6468,-105.1968,A physically challenging but technically moder...,...,Morrison,Colorado,15-20,2.3927,-2.26758,0.834736,1.781343,0.036123,2.607508,-2.607508


In [40]:
cos_sim_recommendations("Mount Falcon and Lair O' the Bear Loop", 'Colorado', Big_X, n = 5, desired_state = 'Colorado', desired_location = 'Salida')

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,Mount Falcon and Lair O' the Bear Loop,"Morrison, Colorado",Blue/Black,19.3,2655,-2660,3.6,39.6468,-105.1968,A physically challenging but technically moder...,...,Morrison,Colorado,15-20,2.3927,-2.26758,0.834736,1.781343,0.036123,2.607508,-2.607508


In [41]:
#this function will take in a state you ridden in, a trail you rode on, and the state you want to ride in and recommend
def euclidean_dist_recommendations(trail_name, state_name, scaled_features, n = 5, desired_state = None, desired_location= None):
    index = US_trails.index[(US_trails['name'] == trail_name) & (US_trails['state']==state_name)][0]
    #index is the index of the given trail in the main df based on it's name and state
    trail = scaled_features[index].reshape(1,-1)
    #trail is the scaled features of the given trail reshaped into a 1 row array
    cs = euclidean_distances(trail, scaled_features)
    #cs is the array of euclidean distance measurements of the trail compared to the scaled_features of the other trails
    rec_index = np.argsort(cs)[0][::-1][1:]
    #rec_index is an array of the indices of the euclidean distance measurements ordered by greatest to least, leaving out the given trail
    ordered_df = US_trails.loc[rec_index]
    #ordered_df gives the trails of US_trails ordered by the trails most similar to least similar
    if desired_state:
        ordered_df = ordered_df[ordered_df['state']== desired_state]
        #if a desired state to ride in is given, the dataframe is returned with trails only in that state
    if desired_location:
        ordered_df = ordered_df[ordered_df['location']== desired_location]
        #if a desired location to ride in is given, the dataframe is returned with trails only in that location
    rec_df = ordered_df.head(n)
    #rec_df is a dataframe with the number of desired trails to be recommended
    orig_row = US_trails.loc[[index]].rename(lambda x: 'original')
    #orig_row renames the index of the original trail given as 'original'
    total = pd.concat([orig_row, rec_df])
    #this combines the original row labeled as original row and the n recommended trails that are most similar to the original
    return total

In [42]:
#THIS IS SOOOO MUCH WORSE!!!!! WHY????
euclidean_dist_recommendations("Mount Falcon and Lair O' the Bear Loop", 'Colorado', Big_X, n = 5, desired_state = 'Colorado', desired_location = 'Salida')

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,Mount Falcon and Lair O' the Bear Loop,"Morrison, Colorado",Blue/Black,19.3,2655,-2660,3.6,39.6468,-105.1968,A physically challenging but technically moder...,...,Morrison,Colorado,15-20,2.3927,-2.26758,0.834736,1.781343,0.036123,2.607508,-2.607508


In [43]:
cos_sim_recommendations("Mount Falcon and Lair O' the Bear Loop", 'Colorado', Big_X, n = 5, desired_state = 'Utah', desired_location = 'Moab')

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,Mount Falcon and Lair O' the Bear Loop,"Morrison, Colorado",Blue/Black,19.3,2655,-2660,3.6,39.6468,-105.1968,A physically challenging but technically moder...,...,Morrison,Colorado,15-20,2.3927,-2.26758,0.834736,1.781343,0.036123,2.607508,-2.607508


In [44]:
cos_sim_recommendations("Mount Falcon and Lair O' the Bear Loop", 'Colorado', Big_X, n = 5, desired_state = 'Virginia')

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,Mount Falcon and Lair O' the Bear Loop,"Morrison, Colorado",Blue/Black,19.3,2655,-2660,3.6,39.6468,-105.1968,A physically challenging but technically moder...,...,Morrison,Colorado,15-20,2.3927,-2.26758,0.834736,1.781343,0.036123,2.607508,-2.607508
8663,Braley's Pond & Confederate Breastworks Loop,"Weyers Cave, Virginia",Blue/Black,18.0,2753,-2758,4.0,38.2866,-79.3009,The first 1/3 of the ride is fireroad and pave...,...,Weyers Cave,Virginia,15-20,2.498918,-2.369461,0.834736,1.629015,0.446789,2.607508,-2.607508
7893,Dry Run/Peter's Ridge Trail Loop,"Covington, Virginia",Blue/Black,14.6,2796,-2789,3.5,37.803,-79.9352,Backcountry ride with a mix of gravel/Jeep roa...,...,Covington,Virginia,10-15,2.545523,-2.401689,0.834736,1.23062,-0.066544,2.607508,-2.607508
7053,Potts Mountain Jeep Trail Loop,"New Castle, Virginia",Blue/Black,18.1,2475,-2472,3.0,37.6051,-80.0758,A scenic route that circumnavigates Barbours C...,...,New Castle,Virginia,15-20,2.197607,-2.072135,0.834736,1.640733,-0.579877,2.607508,-2.607508
5900,Rush Creek Loop,"Chilhowie, Virginia",Blue/Black,13.7,2461,-2451,4.0,36.6552,-81.6902,A variety of trails in the Rush Creek area of ...,...,Chilhowie,Virginia,10-15,2.182433,-2.050304,0.834736,1.125163,0.446789,2.607508,-2.607508
7894,Blue Suck Falls to Fore Mountain (South),"Clifton Forge, Virginia",Blue,16.5,2538,-2542,3.5,37.913,-79.7974,A direct ride out of Douthat State Park to the...,...,Clifton Forge,Virginia,15-20,2.265889,-2.144907,0.049881,1.453253,-0.066544,2.607508,-2.607508


In [45]:
cos_sim_recommendations("The Whole Enchilada", 'Utah', Big_X, n = 15)

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,The Whole Enchilada,"Moab, Utah",Black,34.1,1283,-7794,4.8,38.4855,-109.232,"One of the world's great rides, with 8000 ft o...",...,Moab,Utah,30+,0.905656,-7.604888,1.619591,3.515533,1.268122,2.607508,-2.607508
4551,Cannell Trail - IMBA EPIC,"Kernville, California",Black,25.5,1718,-8030,4.6,35.9905,-118.3657,This epic Southern California jewel is a true ...,...,Kernville,California,25-30,1.377131,-7.850233,1.619591,2.507828,1.062789,2.607508,-2.607508
2933,Bike For Bender Hell Ride,"Big Bear City, California",Black,35.9,2349,-7055,4.8,34.1707,-116.8299,A point-to-point adventure from South Fork Cam...,...,Big Bear City,California,30+,2.061041,-6.836623,1.619591,3.726448,1.268122,2.607508,-2.607508
8132,Never Eat Soggy Waffels,"Junction, Utah",Black,22.3,1778,-6709,4.0,38.379,-112.4002,Shuttled ride from mud lake down to Beaver lin...,...,Junction,Utah,20-25,1.442162,-6.476921,1.619591,2.132868,0.446789,2.607508,-2.607508
10089,Big Boulder from Packer Saddle,"Downieville, California",Black,21.8,1644,-5813,4.3,39.6145,-120.6666,You still have about 2000ft of climbing on you...,...,Downieville,California,20-25,1.296926,-5.54544,1.619591,2.07428,0.754789,2.607508,-2.607508
1332,CDO,"Catalina, Arizona",Black,21.1,1105,-6939,3.6,32.4407,-110.7861,An amazing trail traversing from the top of Mt...,...,Catalina,Arizona,20-25,0.71273,-6.716029,1.619591,1.992258,0.036123,2.607508,-2.607508
8361,Monarch Crest - IMBA EPIC,"Whitepine, Colorado",Blue/Black,36.2,2035,-5890,4.7,38.4965,-106.3254,One of Colorado's top 5 epic rides - classic h...,...,Whitepine,Colorado,30+,1.720712,-5.625489,0.834736,3.7616,1.165455,2.607508,-2.607508
1088,Lemmon Drop,"Tanque Verde, Arizona",Black,18.8,1928,-6457,4.1,32.398,-110.6893,"A long, mostly downhill tour of Mt. Lemmon.",...,Tanque Verde,Arizona,15-20,1.60474,-6.214942,1.619591,1.722755,0.549456,2.607508,-2.607508
15995,Paulina Peak to Horse Butte,"La Pine, Oregon",Blue/Black,38.7,1878,-5842,4.0,43.6895,-121.2545,An amazing tour of the Newberry Crater with a ...,...,La Pine,Oregon,30+,1.550547,-5.575588,0.834736,4.054538,0.446789,2.607508,-2.607508
12,Haleakala Shuttle,"Kēōkea, Hawaii",Black,17.8,186,-7043,5.0,20.7086,-156.254,An epic descent from the rim of the Haleakala ...,...,Kēōkea,Hawaii,15-20,-0.28333,-6.824148,1.619591,1.60558,1.473455,2.607508,-2.607508


In [46]:
cos_sim_recommendations("Porcupine Rim", 'Utah', Big_X, n = 15, desired_state = 'Colorado', desired_location = 'Crested Butte')

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,Porcupine Rim,"Moab, Utah",Black,14.7,1195,-2962,4.7,38.5819,-109.4164,The world famous Porcupine Rim ride; a bone ja...,...,Moab,Utah,10-15,0.810277,-2.581539,1.619591,1.242338,1.165455,2.607508,-2.607508


In [47]:
cos_sim_recommendations("Porcupine Rim", 'Utah', Big_X, n = 15, desired_state = 'South Carolina')

Unnamed: 0,name,location,difficulty,length,ascent,descent,stars,latitude,longitude,summary,...,city/town,state,length_range,ascent_scaled,descent_scaled,difficulty_encoded_scaled,length_scaled,stars_scaled,type_Featured Ride_scaled,type_Trail_scaled
original,Porcupine Rim,"Moab, Utah",Black,14.7,1195,-2962,4.7,38.5819,-109.4164,The world famous Porcupine Rim ride; a bone ja...,...,Moab,Utah,10-15,0.810277,-2.581539,1.619591,1.242338,1.165455,2.607508,-2.607508
4340,Paris Mountain State Park,"Wade Hampton, South Carolina",Blue/Black,11.5,1534,-1534,4.5,34.9265,-82.3691,A fun ride that showcases the great terrain an...,...,Wade Hampton,South Carolina,10-15,1.177702,-1.09699,0.834736,0.867378,0.960122,2.607508,-2.607508
3772,Tiger Rag Loop,"Clemson, South Carolina",Blue/Black,9.6,960,-954,4.3,34.7471,-82.8663,A fast loop that is harder than it seems,...,Clemson,South Carolina,5-10,0.555571,-0.494022,0.834736,0.644746,0.754789,2.607508,-2.607508
3774,20 Miles of Northern Issaqueena,"Clemson, South Carolina",Blue,19.6,2113,-2114,3.8,34.7383,-82.8414,"The faster side of Issaqueena, connecting a nu...",...,Clemson,South Carolina,15-20,1.805252,-1.699958,0.049881,1.816495,0.241456,2.607508,-2.607508
3775,The Bottom End,"Clemson, South Carolina",Blue,21.9,2281,-2283,3.7,34.7169,-82.8394,This ride includes all trails in the lower end...,...,Clemson,South Carolina,20-25,1.98734,-1.875651,0.049881,2.085998,0.138789,2.607508,-2.607508
3773,Tour of Issaqueena,"Clemson, South Carolina",Blue,10.3,1131,-1130,4.0,34.7384,-82.8411,This covers most of the popular trails in Issa...,...,Clemson,South Carolina,10-15,0.74091,-0.676992,0.049881,0.726768,0.446789,2.607508,-2.607508
3776,Fant's Grove Perimeter Loop,"Pendleton, South Carolina",Blue,20.3,1985,-1985,3.2,34.6353,-82.8129,A ride around the perimeter of Fant's Grove us...,...,Pendleton,South Carolina,20-25,1.666519,-1.56585,0.049881,1.898518,-0.374544,2.607508,-2.607508
2522,FATS: Brown Wave & Skinny,"Murphys Estates, South Carolina",Blue,12.2,927,-925,4.5,33.6082,-82.0799,Super fast loops of Brown Wave and Skinny,...,Murphys Estates,South Carolina,10-15,0.519804,-0.463874,0.049881,0.949401,0.960122,2.607508,-2.607508
3777,NICA RIDE,"Clemson, South Carolina",Blue,16.1,1912,-1916,3.0,34.7388,-82.8404,Upstate Shredders!,...,Clemson,South Carolina,15-20,1.587398,-1.494118,0.049881,1.406383,-0.579877,2.607508,-2.607508
1951,Marrington Trails,"Goose Creek, South Carolina",Green/Blue,17.3,2014,-2020,3.9,32.9633,-79.9565,If you feel the need for speed this is the place!,...,Goose Creek,South Carolina,15-20,1.697951,-1.602236,-0.734974,1.546993,0.344122,2.607508,-2.607508
