In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pandas_profiling
import seaborn as sns
import missingno as msno
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
pwd

'C:\\Users\\Sam Cannon\\Desktop\\Python\\Climbing'

In [67]:
data = pd.read_json('nv-routes.jsonlines', lines = True)

In [56]:
data.head()

Unnamed: 0,description,fa,grade,location,metadata,protection,route_name,safety,type
0,"[In the late 1970's, my old friend and mentor,...",Wade Mills (solo),"{'YDS': '5.8', 'French': '5b', 'Ewbanks': '16'...",[Route ascends the huge main NE face of Wheeler.],"{'left_right_seq': '1', 'parent_lnglat': [-114...","[See route description and ""Hiking and Climbin...",Stella by Starlight,,"{'trad': True, 'aid': True, 'alpine': True}"
1,[P1) Unnamed splitter crack starts with a bold...,"P2+3 Sean McLane, Jack Hill, and Forest Alther...","{'YDS': '5.12-', 'French': '7a+', 'Ewbanks': '...",,"{'left_right_seq': '1', 'parent_lnglat': [-117...",[Double rack plus 8 draws for P3],Sword of Wotan,R,"{'trad': True, 'alpine': True}"
2,[Starts on an easy slab to the first bolt. Wor...,Bill Ohran (2007),"{'YDS': '5.12c', 'French': '7b+', 'Ewbanks': '...",[Starts on the right side of the Digimon secto...,"{'left_right_seq': '6', 'parent_lnglat': [-114...",[9 bolts + chains],Wrath of Sores,,{'sport': True}
3,[Techy climbing on great pink limestone.],Robbie Colbert (2007),"{'YDS': '5.12b', 'French': '7b', 'Ewbanks': '2...",[First route to the right of the Scrum Felcher...,"{'left_right_seq': '1', 'parent_lnglat': [-114...",[10 bolts + chains],Waste Not Want Not,,{'sport': True}
4,[A couple bolts going up some slab which works...,Tyler Phillips,"{'YDS': '5.10c', 'French': '6b', 'Ewbanks': '2...",,"{'left_right_seq': '0', 'parent_lnglat': [-114...",[5 bolts to anchor],Scrum Felcher,,{'sport': True}


In [107]:
#filtering out climbs and their descriptions only 
climbs = data[['route_name','description']]

In [108]:
# I have to transform the column of descriptions from a list into objects
climbs['description'] = climbs['description'].str[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [109]:
#drop al nas
climbs.dropna(inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [110]:
climbs.head()

Unnamed: 0,route_name,description
0,Stella by Starlight,"In the late 1970's, my old friend and mentor, ..."
1,Sword of Wotan,P1) Unnamed splitter crack starts with a bold ...
2,Wrath of Sores,Starts on an easy slab to the first bolt. Work...
3,Waste Not Want Not,Techy climbing on great pink limestone.
4,Scrum Felcher,A couple bolts going up some slab which works ...


In [46]:
#reset the index
climbs.reset_index(drop = True, inplace = True)

In [47]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(climbs['description'])

In [53]:
#calculating cosine similarity
from sklearn.metrics.pairwise import linear_kernel
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
results = {}
for idx, row in climbs.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1] 
    similar_items = [(cosine_similarities[idx][i], climbs['route_name'][i]) for i in similar_indices] 
    results[row['route_name']] = similar_items[1:]

In [100]:
#creating function to make dictionary of recommendations and then put them into a table
def climb(route_name):  
    return climbs.loc[climbs['route_name'] == route_name]['route_name'].tolist()[0].split(' - ')[0] 
 
def recommend(route_name, num):
    print("Recommending " + str(num) + " climbs similar to " + climb(route_name) + "...")   
    print("-------")    
    recs = results[route_name][:num]   
    for rec in recs: 
        print("Recommended: " + climb(rec[1]) + " (score:" +      str(rec[0]) + ")")

## Making recommendations

In [101]:
recommend('Stella by Starlight', 5)

Recommending 5 climbs similar to Stella by Starlight...
-------
Recommended: Crab Walk (score:0.08919462971516928)
Recommended: Who Uses a Slingshot Anyway (score:0.07956405117409433)
Recommended: Echo Falls (score:0.07227526966667361)
Recommended: mike's route (score:0.054380001697220876)
Recommended: Mary Jane Falls (score:0.04972506001689877)


In [111]:
data[data['route_name'] == "Mary Jane Falls"]

Unnamed: 0,description,fa,grade,location,metadata,protection,route_name,safety,type
2980,[Steep and sustained ice climbing the whole wa...,unknown,{},,"{'left_right_seq': '0', 'parent_lnglat': [-115...",[All screws. 16cm and up if you have them. Cou...,Mary Jane Falls,,{'ice': True}


In [112]:
recommend('Wrath of Sores', 10)

Recommending 10 climbs similar to Wrath of Sores...
-------
Recommended: Right Slsb (score:0.10647158407667948)
Recommended: Everywhere (score:0.1037838712863007)
Recommended: Chick Magnet (score:0.07917759896189935)
Recommended: Agriculture (score:0.07891010794174401)
Recommended: Camus Campus (score:0.07334406204735057)
Recommended: Crispy Critters (score:0.07083729854581253)
Recommended: Stone Age Traverse (score:0.06999207886865239)
Recommended: Der Oonterkloggen Direct (score:0.06954257605967479)
Recommended: Der Oonterkloggen Traverse (score:0.0685130923368692)
Recommended: Don't Scare the Bat! (score:0.06814578188103751)


In [116]:
data[data['route_name'] == 'Chick Magnet']

Unnamed: 0,description,fa,grade,location,metadata,protection,route_name,safety,type
2213,[Easier slab working up to a very small roof. ...,Dan Snyder,"{'YDS': '5.10a', 'French': '6a', 'Ewbanks': '1...",,"{'left_right_seq': '2', 'parent_lnglat': [-114...",[9 bolts to chains],Chick Magnet,,{'sport': True}


In [114]:
data[data['route_name'] == 'Wrath of Sores']

Unnamed: 0,description,fa,grade,location,metadata,protection,route_name,safety,type
2,[Starts on an easy slab to the first bolt. Wor...,Bill Ohran (2007),"{'YDS': '5.12c', 'French': '7b+', 'Ewbanks': '...",[Starts on the right side of the Digimon secto...,"{'left_right_seq': '6', 'parent_lnglat': [-114...",[9 bolts + chains],Wrath of Sores,,{'sport': True}
