In [1]:
# import reqs
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

### Extract the Data from onthesnow

In [11]:
def scrape_ski_resorts(url):
    """Scrapes through url for ski information and returns a DataFrame"""
    try:
        response = requests.get(url, timeout=10)
        resorts_list = []
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            resorts = soup.find_all("span", class_="h4 styles_h4__1nbGO")
            
            for resort in resorts:
                resort_name = resort.text.strip() if resort.text else "No Name"
                resorts_list.append([resort_name])
            resorts_df = pd.DataFrame(resorts_list, columns=['Resort Name'])
            
            return resorts_df
        else:
            print(f"Failed to retrieve the webpage, status code: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")

# Replace with the actual URL of the page you want to scrape
ski_resort_url = "https://www.onthesnow.com/skireport"
daily_ski_report = scrape_ski_resorts(ski_resort_url)

### Transform the Data into Readable

In [31]:
def reformat_ski(df, column_names = None):
    """Reformats the ski data into a readable format"""
    reshaped_data = []
    data_column = df.iloc[:, 0]
    for start in range(0, len(data_column), 5):
        group = data_column[start:start + 5].tolist()
        reshaped_data.append(group)
    reshaped_df = pd.DataFrame(reshaped_data, columns=['Resort Name', '72 Hour Snowfall', 'Base Depth', 'Trails open', 'Open lifts'])
    reshaped_df['72 Hour Snowfall'] = reshaped_df['72 Hour Snowfall'].str.replace("-", "", regex=False)
    reshaped_df['72 Hour Snowfall'] = reshaped_df['72 Hour Snowfall'].str.replace("", "", regex=False)
    reshaped_df['Open lifts'] = reshaped_df['Open lifts'].str.replace("-", "", regex=False)
    reshaped_df['Trails open'] = reshaped_df['Trails open'].str.replace(r'\/\d+.*', '', regex=True)
    reshaped_df['Resort Name'] = reshaped_df['Resort Name'].str.replace(r'Ski Area', '', regex=True)
    reshaped_df['Resort Name'] = reshaped_df['Resort Name'].str.replace(r'Resort', '', regex=True)
    reshaped_df['Resort Name'] = reshaped_df['Resort Name'].str.replace(r'Mountain', '', regex=True)
    return reshaped_df
    
ski_report = reformat_ski(daily_ski_report)
ski_report

Unnamed: 0,Resort Name,72 Hour Snowfall,Base Depth,Trails open,Open lifts
0,Lee Canyon,"30""","68""Variable Conditions",28,5/5
1,Snowbird,"30""",--,132,12/12
2,Alyeska,"29""","49-108""Packed Powder",60,7/7
3,Arizona Snowbowl,"22""","75""Powder",48,8/8
4,Cervinia - Breuil,"22""","0-77""-",--,
5,Alta,"21""","138""",80,5/5
6,Pomerelle,"18""","62""Packed Powder",32,3/3
7,Solitude,"17""",--,69,8/8
8,Park City,"17""","87""",340,41/44
9,Sunrise Park,"16""","72-78""Powder",50,8/8


### Match Data Frame with Locations for plotting on map

In [36]:
file_path = 'resort_info.csv'
# load csv into df
ski_locations = pd.read_csv(file_path)

In [48]:
def match_locations(df1, df2):
    df1['Resort Name'] = df1['Resort Name'].str.strip()
    df2['Resort Name'] = df2['Resort Name'].str.strip()
    merged_daily = pd.merge(df1, df2, on='Resort Name', how='inner')
    merged_daily = merged_daily.head(20)
    return merged_daily

matched = match_locations(ski_report, ski_locations)
matched

Unnamed: 0,Resort Name,72 Hour Snowfall,Base Depth,Trails open,Open lifts,state,summit,base,vertical,lifts,runs,acres,green_percent,green_acres,blue_percent,blue_acres,black_percent,black_acres,lat,lon
0,Snowbird,"30""",--,132,12/12,Utah,11000.0,7600.0,3400.0,12.0,89.0,2500,0.27,675.0,0.38,950.0,0.35,875.0,40.58,-111.65
1,Alyeska,"29""","49-108""Packed Powder",60,7/7,Alaska,3939.0,250.0,2500.0,9.0,73.0,1500,0.11,165.0,0.52,780.0,0.37,555.0,60.97,-149.11
2,Alta,"21""","138""",80,5/5,Utah,10550.0,8530.0,2020.0,11.0,116.0,2200,0.25,550.0,0.4,880.0,0.35,770.0,40.6,-111.64
3,Solitude,"17""",--,69,8/8,Utah,10035.0,7988.0,2047.0,7.0,65.0,1200,0.2,240.0,0.5,600.0,0.3,360.0,40.63,-111.59
4,Park City,"17""","87""",340,41/44,Utah,10000.0,6800.0,3200.0,41.0,344.0,7300,0.09,657.0,0.51,3723.0,0.4,2920.0,40.68,-111.48
5,Sunrise Park,"16""","72-78""Powder",50,8/8,Arizona,1100.0,9200.0,1800.0,8.0,65.0,800,0.4,320.0,0.4,320.0,0.2,160.0,34.02,-109.53
6,Wolf Creek,"16""","84-89""Powder",144,10/11,Colorado,11904.0,10300.0,1604.0,7.0,77.0,1600,0.2,320.0,0.45,720.0,0.35,560.0,37.4,-106.6
7,Purgatory,"15""","61-65""Powder",107,10/11,Colorado,10822.0,8739.0,2083.0,10.0,85.0,1200,0.23,276.0,0.51,612.0,0.26,312.0,39.46,-106.08
8,Ski Apache,"14""","20""Variable Conditions",51,6/11,New Mexico,12000.0,10100.0,1900.0,11.0,55.0,750,0.2,150.0,0.6,450.0,0.2,150.0,33.4,-105.78
9,Brian Head,"14""","69""Packed Powder",72,8/8,Utah,10929.0,9600.0,1329.0,9.0,71.0,650,0.3,195.0,0.35,227.5,0.35,227.5,37.69,-112.86
