## Import Libraries

In [4]:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

## Collect Data From ZRankings.com

In [5]:
page = 0
result = []

# Loop through 7 pages of snow resort data
while page < 8:
    page = page + 1
    
    # Request to extract data from zrankings.com
    response = requests.get('https://www.zrankings.com/ski-resorts/snow?_=1615734995765&page=' + str(page))
    text = BeautifulSoup(response.text, 'html.parser')
    
    # Div tag where all data we want exists
    snowTable = text.find('div', {'class': 'tableshow table-wrap'})
    rows = snowTable.find_all('tr')
    
    # Iterating through each row of ski resorts and striping text from tags
    for row in rows[1:]:
        td = row.find_all('td')
        snowData = {}
        snowData['Zone'] = td[2].text.strip()
        resortName = td[1].text.strip()
        snowData['Resort Name'] = resortName.replace("\n","")[:-2]
        snowData['State'] = resortName.replace("\n","")[-2:]
        snowData['Base Elevation (ft)'] = re.findall('\d+', td[6].text.strip())[0]
        snowData['Top Elevation (ft)'] = re.findall('\d+', td[6].text.strip())[1]
        snowData['Snowfall Score'] = td[11].text.strip()
        snowData['% of North-facing Terrain'] = td[12].text.strip().replace('%', '')
        snowData['% of South-facing Terrain'] = td[15].text.strip().replace('%', '')
        snowData['% of East-facing Terrain'] = td[13].text.strip().replace('%', '')
        snowData['% of West-facing Terrain'] = td[14].text.strip().replace('%', '')
        snowData['Total Snow Score With Preservation'] = td[16].text.strip()
        
        # Parse ski resort link to access more information on that resort
        moreLink = td[17].a["href"]
        response = requests.get('https://www.zrankings.com' + moreLink)
        text = BeautifulSoup(response.text, 'html.parser')
        overallScore = text.find_all('div', {'class': 'overall-compare'})
        
        '''
        Collect further information within a ski resort's provided direct link
        Try/Exceptions are used in the case that a tag does not exist on the page.
        '''
        
        for score in overallScore:
            div1 = score.find_all('div')
            try:
                snowData['Overall Ranking'] =  re.sub(r"[\n\t\s]*", "", div1[3].find('h4').text)  
            except:
                continue
            try:
                snowData['Regional Ranking'] = div1[6].find('h4').text
            except:
                continue
            try:
                snowData['State Ranking'] = div1[8].find('h4').text 
            except:
                continue
                
        snowStuff = text.find_all('div', {'class': 'snow-stuff'})
        for stuff in snowStuff:
            div2 = stuff.find_all('div')
            try:
                snowData['True Snow Per Year (inches)'] = div2[1]('h3')[0].text[:-1] 
            except:
                continue
            try:
                snowData['Snow Quality Rank'] = div2[3]('h2')[0].text 
            except:
                continue
            try:
                snowData['Dump Potential Rank'] = div2[6]('h2')[0].text
            except:
                continue
            try:
                snowData['% of Days With More Than 6" of Snow'] = div2[11]('span')[0].text.replace('%', '')
            except:
                continue
            try:
                snowData['% of Months With More Than 90" of Snow'] = div2[15]('span')[0].text.replace('%', '') 
            except:
                continue
            try:
                snowData['% of Months With Less Than 30" of Snow'] = div2[19]('span')[0].text.replace('%', '') 
            except:
                continue
 
        sideStats = text.find_all('div', {'class': 'side-stats-2 clearfix'})
        for stats in sideStats:
            span = stats.find_all('span')
            snowData['Acreage'] = span[0].text.replace("acres", "").replace(',', '')
            snowData['Total Runs'] = span[1].text
            snowData['Longest Run (ft)'] = span[2].text.replace('ft', '').replace(',', '')
            snowData['Lifts'] = span[3].text
            snowData['Uphill Max (ppl/hr)'] = span[4].text.replace('ppl/hr', '').replace(',', '')
            snowData['Terrain Parks'] = span[5].text
            snowData['Halfpipes'] = span[7].text
        
        result.append(snowData)
    print(page)

1
2
3
4
5
6
7
8


## Construct Data to Pandas Dataframe

In [6]:
snowData = pd.DataFrame.from_dict(result)
pd.set_option('display.max_columns', 35)
snowData.head()

Unnamed: 0,"% of Days With More Than 6"" of Snow",% of East-facing Terrain,"% of Months With Less Than 30"" of Snow","% of Months With More Than 90"" of Snow",% of North-facing Terrain,% of South-facing Terrain,% of West-facing Terrain,Acreage,Base Elevation (ft),Dump Potential Rank,Halfpipes,Lifts,Longest Run (ft),Overall Ranking,Regional Ranking,Resort Name,Snow Quality Rank,Snowfall Score,State,State Ranking,Terrain Parks,Top Elevation (ft),Total Runs,Total Snow Score With Preservation,True Snow Per Year (inches),Uphill Max (ppl/hr),Zone
0,20.2,17,1.3,49.3,53,1,29,2200,8530,2,0,11,16800,4,4,Alta Ski Area,1,96.1,UT,2,0,10550,50,100.0,517,11248,Rockies
1,18.0,16,3.4,37.0,50,13,21,2500,7760,5,0,11,13200,3,3,Snowbird,2,90.0,UT,1,1,11000,85,97.5,497,17400,Rockies
2,19.3,15,4.3,44.2,43,10,32,1050,8750,3,2,6,15840,20,16,Brighton Ski Resort,3,92.4,UT,5,4,10500,66,95.2,504,10100,Rockies
3,20.0,75,3.0,45.0,20,5,0,925,3000,4,0,3,2800,51,6,Powder King Mountain Resort,4,92.3,BC,6,0,5500,37,93.3,492,2600,Canadian-Rockies
4,18.9,0,3.5,40.4,20,20,60,3000,7408,6,0,5,14675,19,15,Grand Targhee,5,88.9,WY,2,1,10000,72,91.9,466,7200,Rockies


## Convert to CSV

In [20]:
snowData.to_csv('zRankings.csv')