In [1]:
# Import packages we will be using
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [53]:
# Initializing variables
climbing_gyms = pd.DataFrame(columns=['gym_name', 'address', 'phone_number'])
states = [
    'alabama', 'alaska', 'arizona', 'arkansas', 'california', 'colorado', 'connecticut',
    'delaware', 'florida', 'georgia', 'hawaii', 'idaho', 'illinois', 'indiana', 'iowa',
    'kansas', 'kentucky', 'louisiana', 'maine', 'maryland', 'massachusetts', 'michigan',
    'minnesota', 'mississippi', 'missouri', 'montana', 'nebraska', 'nevada', 'new-hampshire',
    'new-jersey', 'new-mexico', 'new-york', 'north-carolina', 'north-dakota', 'ohio',
    'oklahoma', 'oregon', 'pennsylvania', 'rhode-island', 'south-carolina', 'south-dakota',
    'tennessee', 'texas', 'utah', 'vermont', 'virginia', 'washington', 'west-virginia',
    'wisconsin', 'wyoming'
]
# Getting a list of the URLS to each gym website from MountainProject using each State category.
state_urls = ['https://www.mountainproject.com/gyms/' + state for state in states]
climbing_gym_urls = [
    td.find('a')['href']
    for url in state_urls
    for td in BeautifulSoup(requests.get(url).text, 'html.parser').find_all('td', {'class': 'text-truncate'})
]

In [65]:
gym_data_list = []
# Getting the Gym name, address and phone number if available and exporting to excel.
for url in climbing_gym_urls:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Try to find gym name, otherwise set to 'N/A'
    gym_name = soup.find('h1').get_text(strip=True) if soup.find('h1') else 'N/A'
    
    # Try to find gym info div, otherwise set to 'N/A' for both phone and address
    gym_info_div = soup.find('div', class_='gym-info')
    if gym_info_div:
        # Extract all <div> elements inside the gym info div
        divs = gym_info_div.find_all('div')
        phone_number = divs[1].get_text(strip=True) if len(divs) > 1 else 'N/A'
        address = divs[2].find('a').get_text(strip=True) if len(divs) > 2 and divs[2].find('a') else 'N/A'
    else:
        phone_number = 'N/A'
        address = 'N/A'

    # Append the gym data directly to the list
    gym_data_list.append({'gym_name': gym_name, 'address': address, 'phone_number': phone_number})

# Convert list of dictionaries to DataFrame in one step
climbing_gyms = pd.DataFrame(gym_data_list)

In [76]:
climbing_gyms.to_excel('climbing_gyms.xlsx', index=False)