In [25]:
# import reqs
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

### Extract the Data from onthesnow

In [80]:
def scrape_ski_resorts(url):
    """Scrapes through url for ski information and returns a DataFrame"""
    try:
        response = requests.get(url, timeout=10)
        resorts_list = []
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            resorts = soup.find_all("span", class_="h4 styles_h4__1nbGO")
            
            for resort in resorts:
                resort_name = resort.text.strip() if resort.text else "No Name"
                resorts_list.append([resort_name])
            resorts_df = pd.DataFrame(resorts_list, columns=['Resort Name'])
            
            return resorts_df
        else:
            print(f"Failed to retrieve the webpage, status code: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")

# Replace with the actual URL of the page you want to scrape
ski_resort_url = "https://www.onthesnow.com/skireport"
scrape_ski_resorts(ski_resort_url)

Unnamed: 0,Resort Name
0,Mountain High
1,"42""-"
2,"26-38""Powder"
3,34/5958% Open
4,6/14-
...,...
120,Brian Head Resort
121,"19""-"
122,"59""Powder"
123,72/7298% Open


### Transform the Data into Readable

In [95]:
df = ski_report

def reformat_ski(df, column_names = None):
    """Reformats the ski data into a semi-readable format"""
    reshaped_data = []
    data_column = df.iloc[:, 0]
    for start in range(0, len(data_column), 5):
        group = data_column[start:start + 5].tolist()
        reshaped_data.append(group)
    reshaped_df = pd.DataFrame(reshaped_data, columns=['Resort Name', '72 hour Snowfall', 'Base Depth', 'Trails open', 'Open lifts'])
    reshaped_df['72 hour Snowfall'] = reshaped_df['72 hour Snowfall'].str.replace("-", "", regex=False)
    reshaped_df['72 hour Snowfall'] = reshaped_df['72 hour Snowfall'].str.replace("", "", regex=False)
    reshaped_df['Open lifts'] = reshaped_df['Open lifts'].str.replace("-", "", regex=False)

    return reshaped_df
    
ski_report = reformat_ski(daily_ski_report)
ski_report

Unnamed: 0,Resort Name,72 hour Snowfall,Base Depth,Trails open,Open lifts
0,Mountain High,"42""","26-38""Powder",34/5958% Open,6/14
1,Mammoth Mountain,"41""","82-115""Powder",157/17391% Open,19/25
2,Tahoe Donner,"35""","20-30""Powder",17/17100% Open,3/5
3,Lee Canyon,"33""","30""-",--,
4,Northstar California,"32""","8-78""Powder",84/10084% Open,12/20
5,Ski China Peak,"32""","40-52""Powder",0/52-,
6,June Mountain,"31""","36""Powder",32/4276% Open,2/7
7,Mt. Shasta Ski Park,"31""","94""Packed Powder",34/3874% Open,4/6
8,Jackson Hole,"30""","30-85""Powder",118/13184% Open,13/13
9,Mt. Rose - Ski Tahoe,"30""","40-60""Powder",49/6783% Open,7/8


### Load into dataframe for analysis of perfect ski conditions

In [89]:
# if meets certain criteria then it will return the row

def perfect_ski(df):
    rows = []
    for row in rows:
        if df['72 hour Snowfall'] >= ['2"']:
            return row

new_df = perfect_ski(ski_report)
print(new_df)

None
