In [2]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
import time
import os

In [3]:
# Creates a list of strings that match the files in the directory
nums = [str(num) for num in range(1, 9)]
data = ["route-finder_" + i + ".csv" for i in nums]

In [4]:
# Creates a list of dataframes, one for each .csv file in the directory
route_finder_list = [pd.read_csv(climb) for climb in data]

In [5]:
# Stacks all of the dataframes to make one big dataframe
climbs = pd.concat(route_finder_list).reset_index()

In [6]:
# Write out the Combined version of the data so that I have a csv with all of the data in it 
#climbs.to_csv("utah_climbs.csv", index = False)

In [7]:
# This Code creates two boolean factors for whether the climb is rated PG13 or R
pg13 = climbs.Rating.str.extract("(PG13)")
R = climbs.Rating.str.extract("(R)")
climbs["PG13"] = pg13 == "PG13"
climbs["R"] = R == "R"
climbs.iloc[918]

index                                                           143
Route                                                     Altar Boy
Location          PA's Mother > Rock Canyon > Southern Wasatch >...
URL               https://www.mountainproject.com/route/10680596...
Avg Stars                                                       2.0
Your Stars                                                       -1
Route Type                                              Trad, Sport
Rating                                                 5.10b/c PG13
Pitches                                                           1
Length                                                        100.0
Area Latitude                                               40.2656
Area Longitude                                            -111.6204
PG13                                                           True
R                                                             False
Name: 918, dtype: object

In [8]:
# Remove PG13 and R from the "Rating" column
climbs["Rating"] = climbs.Rating.str.replace("(PG13|R)", "", regex = True)


In [9]:
locs = climbs.Location.apply(lambda x : x[::-1]).str.extract(r"([A-Za-z1-9 ]+)>([A-Za-z1-9 ]+)>([A-Za-z1-9 ]+)>([A-Za-z1-9 ]+)>([A-Za-z1-9 ]+)")

# for i in range(4):
#     locs[i] = locs[i].apply(lambda x : x[::-1])
locs["State"] = locs[0]
locs["Region"] = locs[1]
locs["Location"] = locs[2]
locs["Crag"] = locs[3]
locs["Wall"] = locs[4]
locs.State   #.apply(lambda x : x[::-1])

0       hatU 
1       hatU 
2       hatU 
3       hatU 
4       hatU 
        ...  
6102      NaN
6103      NaN
6104      NaN
6105      NaN
6106      NaN
Name: State, Length: 6107, dtype: object

In [10]:
climbs["Location"].str.extract("(> Utah$)").isna().sum()

0    0
dtype: int64

In [11]:
url = "https://www.mountainproject.com/gyms/utah"

In [12]:
r = requests.get(url)

In [13]:
soup = BeautifulSoup(r.text)

In [14]:
links = soup.find_all('a')

def hasGym(link):
    try:
        if "gym/" in link["href"]:
            return True
        else:
            return False
    except:
        return False

gyms = [link.text for link in links if hasGym(link)]
gyms

['Add Missing Gym',
 'AlpenFit - Indoor Training for Outdoo…',
 'Contact Climbing',
 'Dimple Dell',
 'Double-Diamond Climbing Wall',
 'Elevation Rock Gym',
 'Fit Stop Rock Climbing Gym',
 'iROCK Utah',
 'Lehi Legacy Center',
 'Momentum Indoor Climbing Gym',
 'Momentum Lehi',
 'Momentum Millcreek',
 'Salt Lake Bouldering Project',
 'SUU Climbing Gym',
 'Taylorsville Rec Center',
 'Technical Training Center',
 'The Front Climbing Club',
 'The Front Climbing Club',
 'The Front Climbing Club - South Main',
 'The Project',
 'The Quarry',
 'The Scratch Pad',
 'The Summit',
 'Uintah Recreation Community Center',
 'Ultimate Sports Arena',
 'Washington City Community Center',
 'West Valley Family Fitness Center cli…']

In [15]:
# Add in Weather Dataframe
weather = pd.read_csv('Utah_Weather_Stations.csv')
weather.head()

Unnamed: 0,X,Y,OBJECTID,STATION_NA,STN_ID_,SECONDARY_,STN_AUTHOR,NETWORK_,LOCATION_O,TYPE_OF_OB,...,SOIL_TEMP,FUEL_TEMP,FUEL_MOIST,PRESSURE,SL_PRESSUR,ALTIMETER,WEATHER_CO,VISIBILITY,UVB,ELEVATION
0,-12579190.0,4439105.0,1,CANNAN,42115T04,,NWS,COOP,http://climate.usu.edu,ONCE DAILY,...,,,,,,,,,,5000.0
1,-12610420.0,4439189.0,2,FORT PEARCE /ST GEORGE,FPWU1,DD09129A,NOAA/NWS,HADS,http://amazon.nws.noaa.gov/hads/charts/UT.html,HOURLY,...,,,,,,,,,,
2,-12624240.0,4439830.0,3,VIRGIN RIVER/ST GEORGE 10SW,VRSU1,,1668A424,HADS,http://amazon.nws.noaa.gov/hads/charts/UT.html,HOURLY,...,,,,,,,,,,
3,-12526870.0,4440500.0,4,KANAB MUNICIPAL AIRPORT,KKNB,99999,National Weather Service,NWS/FAA,http://www.faa.gov/asos/,HOURLY,...,,,,PR,SLP,A,WC,VIS,,4863.912
4,-12575350.0,4441154.0,5,CW2921 Hildale,C2921,9999999,APRSWXNET/Citizen Weather Observer Program,APRSWXNET/CWOP,http://www.wxqa.com/states/UT.html,HOURLY,...,,,,PR,SLP,A,,,,5155.832


In [16]:
# Find disctinct counties (hopefully will match up with the regions from climbing)
unique_counties = weather['COUNTY'].unique()
print(unique_counties)

['WASHINGTON' 'KANE' 'SAN JUAN' 'IRON' 'GARFIELD' 'WEBER' 'DAGGETT'
 'GARFILED' 'PIUTE' 'WAYNE' 'BEAVER' 'JUAB' 'GRAND' 'SEVIER' 'MILLARD'
 'UTAH' 'DUCHESNE' 'TOOELE' 'UINTAH' 'WASATCH' 'EMERY' 'CARBON' 'SANPETE'
 'CACHE' 'SALT LAKE' 'SUMMIT' 'DAVIS' 'BOX ELDER' 'MORGAN' 'RICH']


In [17]:
for county in unique_counties:
    mask = climbs['Location'].str.contains(county)
    # Print rows where the county is found in the location column
    print(climbs[mask])

Empty DataFrame
Columns: [index, Route, Location, URL, Avg Stars, Your Stars, Route Type, Rating, Pitches, Length, Area Latitude, Area Longitude, PG13, R]
Index: []
Empty DataFrame
Columns: [index, Route, Location, URL, Avg Stars, Your Stars, Route Type, Rating, Pitches, Length, Area Latitude, Area Longitude, PG13, R]
Index: []
Empty DataFrame
Columns: [index, Route, Location, URL, Avg Stars, Your Stars, Route Type, Rating, Pitches, Length, Area Latitude, Area Longitude, PG13, R]
Index: []
Empty DataFrame
Columns: [index, Route, Location, URL, Avg Stars, Your Stars, Route Type, Rating, Pitches, Length, Area Latitude, Area Longitude, PG13, R]
Index: []
Empty DataFrame
Columns: [index, Route, Location, URL, Avg Stars, Your Stars, Route Type, Rating, Pitches, Length, Area Latitude, Area Longitude, PG13, R]
Index: []
Empty DataFrame
Columns: [index, Route, Location, URL, Avg Stars, Your Stars, Route Type, Rating, Pitches, Length, Area Latitude, Area Longitude, PG13, R]
Index: []
Empty Data

In [18]:
weather.head()

Unnamed: 0,X,Y,OBJECTID,STATION_NA,STN_ID_,SECONDARY_,STN_AUTHOR,NETWORK_,LOCATION_O,TYPE_OF_OB,...,SOIL_TEMP,FUEL_TEMP,FUEL_MOIST,PRESSURE,SL_PRESSUR,ALTIMETER,WEATHER_CO,VISIBILITY,UVB,ELEVATION
0,-12579190.0,4439105.0,1,CANNAN,42115T04,,NWS,COOP,http://climate.usu.edu,ONCE DAILY,...,,,,,,,,,,5000.0
1,-12610420.0,4439189.0,2,FORT PEARCE /ST GEORGE,FPWU1,DD09129A,NOAA/NWS,HADS,http://amazon.nws.noaa.gov/hads/charts/UT.html,HOURLY,...,,,,,,,,,,
2,-12624240.0,4439830.0,3,VIRGIN RIVER/ST GEORGE 10SW,VRSU1,,1668A424,HADS,http://amazon.nws.noaa.gov/hads/charts/UT.html,HOURLY,...,,,,,,,,,,
3,-12526870.0,4440500.0,4,KANAB MUNICIPAL AIRPORT,KKNB,99999,National Weather Service,NWS/FAA,http://www.faa.gov/asos/,HOURLY,...,,,,PR,SLP,A,WC,VIS,,4863.912
4,-12575350.0,4441154.0,5,CW2921 Hildale,C2921,9999999,APRSWXNET/Citizen Weather Observer Program,APRSWXNET/CWOP,http://www.wxqa.com/states/UT.html,HOURLY,...,,,,PR,SLP,A,,,,5155.832


In [21]:
climbs['Latitude'] = climbs['Area Latitude']
climbs['Longitude'] = climbs['Area Longitude']
climbs.head()

Unnamed: 0,index,Route,Location,URL,Avg Stars,Your Stars,Route Type,Rating,Pitches,Length,Area Latitude,Area Longitude,PG13,R,Latitude,Longitude
0,0,Tiki Man,The Wicked Crag > Mill Creek > La Sal Mountain...,https://www.mountainproject.com/route/10571855...,4.0,-1,Sport,5.13c,1,60.0,38.49691,-109.31021,False,False,38.49691,-109.31021
1,1,The Bleeding,The Wicked Crag > Mill Creek > La Sal Mountain...,https://www.mountainproject.com/route/10813817...,4.0,-1,Sport,5.14b,1,60.0,38.49691,-109.31021,False,False,38.49691,-109.31021
2,2,Doubloon,The Wicked Crag > Mill Creek > La Sal Mountain...,https://www.mountainproject.com/route/11953024...,3.0,-1,Sport,5.14b,1,90.0,38.49691,-109.31021,False,False,38.49691,-109.31021
3,3,Prosthetics,The Sunny Side > Mill Creek > La Sal Mountains...,https://www.mountainproject.com/route/11288012...,4.0,-1,Sport,5.13d,1,90.0,38.49649,-109.31053,False,False,38.49649,-109.31053
4,4,Donut Project,The Sunny Side > Mill Creek > La Sal Mountains...,https://www.mountainproject.com/route/11607613...,3.0,-1,Sport,5.14+,1,30.0,38.49649,-109.31053,False,False,38.49649,-109.31053


In [22]:
import pandas as pd
from scipy.spatial import distance

# Function to find nearest point in data2 for each point in data1
def find_nearest(row, weather):
    distances = weather.apply(lambda r: distance.euclidean((row['Latitude'], row['Longitude']), (r['Latitude'], r['Longitude'])), axis=1)
    return weather.loc[distances.idxmin()]

# Apply the function to each row in data1
result = climbs.apply(lambda row: find_nearest(row, weather), axis=1)

# Reset index to avoid any issues
result.reset_index(drop=True, inplace=True)

# Combine the result with data1
combined_data = pd.concat([climbs, result], axis=1)

print(combined_data)


KeyError: 'Latitude'