In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
import time
import os

In [2]:
# Creates a list of strings that match the files in the directory
nums = [str(num) for num in range(1, 9)]
data = ["route-finder_" + i + ".csv" for i in nums]

In [3]:
# Creates a list of dataframes, one for each .csv file in the directory
route_finder_list = [pd.read_csv(climb) for climb in data]

In [4]:
# Stacks all of the dataframes to make one big dataframe
climbs = pd.concat(route_finder_list).reset_index()

In [5]:
# Write out the Combined version of the data so that I have a csv with all of the data in it 
#climbs.to_csv("utah_climbs.csv", index = False)

In [6]:
# This Code creates two boolean factors for whether the climb is rated PG13 or R
pg13 = climbs.Rating.str.extract("(PG13)")
R = climbs.Rating.str.extract("(R)")
climbs["PG13"] = pg13 == "PG13"
climbs["R"] = R == "R"

index                                                           143
Route                                                     Altar Boy
Location          PA's Mother > Rock Canyon > Southern Wasatch >...
URL               https://www.mountainproject.com/route/10680596...
Avg Stars                                                       2.0
Your Stars                                                       -1
Route Type                                              Trad, Sport
Rating                                                 5.10b/c PG13
Pitches                                                           1
Length                                                        100.0
Area Latitude                                               40.2656
Area Longitude                                            -111.6204
PG13                                                           True
R                                                             False
Name: 918, dtype: object

In [7]:
# Remove PG13 and R from the "Rating" column
climbs["Rating"] = climbs.Rating.str.replace("(PG13|R)", "", regex = True)


In [8]:
# This Code Separates out the "Location" column into "State", "Region", "Location", "Crag", and "Wall"
locs = climbs.Location.apply(lambda x : x[::-1]).str.extract("([A-Za-z1-9& ]*)>*([A-Za-z1-9& ]*)>*([A-Za-z1-9 &]*)>*([A-Za-z1-9& ]*)>*([A-Za-z1-9& ]*)")
locs["State"] = locs[0].apply(lambda x : x[::-1]).apply(lambda x : x.strip())
locs["Region"] = locs[1].apply(lambda x : x[::-1]).apply(lambda x : x.strip())
locs["Location"] = locs[2].apply(lambda x : x[::-1]).apply(lambda x : x.strip())
locs["Crag"] = locs[3].apply(lambda x : x[::-1]).apply(lambda x : x.strip())
locs["Wall"] = locs[4].apply(lambda x : x[::-1]).apply(lambda x : x.strip())

In [33]:
# This Adds the new variables into the dataframe and drops "index" and "Your Stars"
climbs.drop(["Location"], axis=1, inplace=True)
climbs[["State", "Region", "Location", "Crag", "Wall"]] = locs.drop([0, 1, 2, 3, 4], axis=1).replace("", np.NaN)
climbs.drop(["Your Stars"], axis=1, inplace=True)
climbs.drop(["index"], axis = 1, inplace = True)

In [48]:
rating_dict = {"5.6" : 1,
               ""}

array(['5.13c', '5.14b', '5.13d', '5.14+', '5.13c/d', '5.14a', '5.14c',
       '5.14d', '5.14a/b', '5.14-', '5.13+', '5.14b/c', '5.15d', '5.9-',
       '5.9 ', '5.9', '5.9 A0 ', '5.9+', '5.9- ', '5.9+ X', '5.9+ ',
       '5.9 A1', '5.9 M5', '5.10a', '5.10a/b', '5.10b ', '5.10b', '5.10 ',
       '5.10', '5.10 A0 ', '5.10b/c', '5.10-', '5.10b/c ', '5.10a ',
       '5.10- ', '5.10c', '5.10+', '5.10+ ', '5.10c/d', '5.10d',
       '5.10c/d ', '5.10d ', '5.10c ', '5.11', '5.11 ', '5.11- C0',
       '5.11a/b', '5.11b', '5.11a', '5.11a ', '5.11-', '5.11b ',
       '5.11b/c', '5.11b/c ', '5.11a/b ', '5.11b/c M7', '5.11b A0',
       '5.11a A0', '5.11- ', '5.12a', '5.12-', '5.11c', '5.11+ ', '5.11d',
       '5.11+', '5.12a/b', '5.12a ', '5.12- ', '5.12- V4', '5.11c/d',
       '5.11d ', '5.11c ', '5.11c X', '5.11+ M8-', '5.12- X', '5.12a/b ',
       '5.12a A0', '5.12b', '5.12+', '5.12', '5.12b ', '5.12c', '5.12d',
       '5.12b/c', '5.13a', '5.13a/b', '5.12c ', '5.12c/d', '5.13b',
       '5.13-', 

In [9]:
url = "https://www.mountainproject.com/gyms/utah"

In [10]:
r = requests.get(url)

In [11]:
soup = BeautifulSoup(r.text)

In [12]:
links = soup.find_all('a')

def hasGym(link):
    try:
        if "gym/" in link["href"]:
            return True
        else:
            return False
    except:
        return False

gyms = [link.text for link in links if hasGym(link)]
gyms

['Add Missing Gym',
 'AlpenFit - Indoor Training for Outdoo…',
 'Contact Climbing',
 'Dimple Dell',
 'Double-Diamond Climbing Wall',
 'Elevation Rock Gym',
 'Fit Stop Rock Climbing Gym',
 'iROCK Utah',
 'Lehi Legacy Center',
 'Momentum Indoor Climbing Gym',
 'Momentum Lehi',
 'Momentum Millcreek',
 'Salt Lake Bouldering Project',
 'SUU Climbing Gym',
 'Taylorsville Rec Center',
 'Technical Training Center',
 'The Front Climbing Club',
 'The Front Climbing Club',
 'The Front Climbing Club - South Main',
 'The Project',
 'The Quarry',
 'The Scratch Pad',
 'The Summit',
 'Uintah Recreation Community Center',
 'Ultimate Sports Arena',
 'Washington City Community Center',
 'West Valley Family Fitness Center cli…']