# Hakuba Analysis

Collect the data on this page: https://www.hakubavalley.com/en/ski_resort_info_en/

Want to see in charts to make comparisons easier. Also adjusted data a bit.
- Combined Goryu and Hakuba 47


In [1]:
import requests
import parsel
import pandas as pd
import plotly.express as px

In [2]:
response = requests.get('https://www.hakubavalley.com/en/ski_resort_info_en/')
response.status_code

200

In [3]:
doc = parsel.Selector(response.text)

In [4]:
resorts = doc.css('.spec-item')
len(resorts)

10

In [5]:
resort = resorts[0]
resort.css('.gelande_name::text').getall()

['Jigatake Snow Resort']

In [6]:
spec = resort.css('.spec-info dl')
len(spec)

8

In [30]:
{k:v.strip() for k, v in zip(spec.css('dt::text').getall()[0:6], spec.css('dd::text').getall()[0:6])}

{'Length': '1,500',
 'Sum total of all courses': '4,800',
 'Area of course': '30',
 'Gondola': '0',
 'Chair lift': '4',
 'Number of courses': '7'}

In [31]:
resort.css('.spec-info dl dd::text').getall()[0:6]

['1,500', '4,800', '30', '0', '4', '7']

In [32]:
resort.css('.altitude p::text').getall()

['1,200', '260', '940']

In [33]:
resort.css('.course-level p::text').getall()

['70', '30', '0']

In [11]:
resort.css('.site_url a::attr(href)').get()

'http://www.jiigatake.com/'

In [13]:
resort.css('.btn-wht-blk a::attr(href)').get()

'https://www.hakubavalley.com/cms/wp-content/uploads/2022/01/jigatake-map.png'

In [34]:
def parse_resort(selector):
    name = selector.css('.gelande_name::text').get()
    specs = selector.css('.spec-info dl dd::text').getall()[0:6]
    elevation = selector.css('.altitude p::text').getall()
    levels = selector.css('.course-level p::text').getall()
    website = selector.css('.site_url a::attr(href)').get()
    trail_map = selector.css('.btn-wht-blk a::attr(href)').get()

    return dict(
        name=name,
        length=int(specs[0].replace(',', '')),
        total_trails_length=int(specs[1].replace(',', '')),
        area=int(specs[2].replace(',', '')),
        gondolas=int(specs[3].replace(',', '')),
        chairs=int(specs[4].replace(',', '')),
        trails=int(specs[5].replace(',', '')),
        max_elevation=int(elevation[0].replace(',', '')),
        base_elevation=int(elevation[2].replace(',', '')),
        vertical=int(elevation[1].replace(',', '')),
        beginner=int(levels[0]),
        intermediate=int(levels[1]),
        advanced=int(levels[2]),
        website=website,
        trail_map=trail_map,
    )

parse_resort(resorts[1])

{'name': 'Kashimayari Snow Resort',
 'length': 5000,
 'total_trails_length': 14200,
 'area': 78,
 'gondolas': 0,
 'chairs': 5,
 'trails': 12,
 'max_elevation': 1550,
 'base_elevation': 830,
 'vertical': 720,
 'beginner': 40,
 'intermediate': 45,
 'advanced': 15}

In [35]:
resorts_df = (
    pd.DataFrame([parse_resort(resort) for resort in resorts])
    .assign(name=lambda df: (
        df['name']
        .str.replace(' Snow Resort', '')
        .str.replace(' Snow Field', '')
        .str.replace(" Park", '')
        .str.replace(" Resort", '')
        .str.replace(" Mountain", '')
        .str.replace(' Winter Sports', '')
        .str.replace('ABLE ', '')
        .str.replace('Hakuba ', ''))
        .str.replace('47', 'Hakuba 47'))
    .sort_values('area', ascending=False)
    .assign(beginner_trails=lambda df: df['beginner']/100.0 * df['trails'])
    .assign(intermediate_trails=lambda df: df['intermediate']/100.0 * df['trails'])
    .assign(advanced_trails=lambda df: df['advanced'] / 100.0 * df['trails'])
    .set_index('name')
)
resorts_df

Unnamed: 0_level_0,length,total_trails_length,area,gondolas,chairs,trails,max_elevation,base_elevation,vertical,beginner,intermediate,advanced,beginner_trails,intermediate_trails,advanced_trails
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Happo-one,8000,23300,220,1,21,13,1831,760,1071,30,50,20,3.9,6.5,2.6
Tsugaike,5000,20450,196,1,17,14,1704,800,904,50,30,20,7.0,4.2,2.8
Iwatake,3300,18220,125,1,8,26,1289,750,539,30,50,20,7.8,13.0,5.2
Goryu,5000,12620,120,1,11,17,1676,750,926,35,40,25,5.95,6.8,4.25
Kashimayari,5000,14200,78,0,5,12,1550,830,720,40,45,15,4.8,5.4,1.8
Norikura Onsen,2500,8919,50,0,9,16,1300,700,600,30,40,30,4.8,6.4,4.8
Cortina,3500,13335,50,0,6,16,1402,872,530,40,30,30,6.4,4.8,4.8
Sanosaka,2100,5140,40,0,6,8,1200,740,460,40,40,20,3.2,3.2,1.6
Hakuba 47,6400,13570,32,1,5,8,1614,820,794,30,40,30,2.4,3.2,2.4
Jigatake,1500,4800,30,0,4,7,1200,940,260,70,30,0,4.9,2.1,0.0


In [36]:
resorts_df.loc[['Goryu', 'Hakuba 47']]


Unnamed: 0_level_0,length,total_trails_length,area,gondolas,chairs,trails,max_elevation,base_elevation,vertical,beginner,intermediate,advanced,beginner_trails,intermediate_trails,advanced_trails
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Goryu,5000,12620,120,1,11,17,1676,750,926,35,40,25,5.95,6.8,4.25
Hakuba 47,6400,13570,32,1,5,8,1614,820,794,30,40,30,2.4,3.2,2.4


In [37]:
combined = (resorts_df
 .loc[['Goryu', 'Hakuba 47']]
 .agg({'length': 'max', 'total_trails_length': 'sum', 'area': 'sum', 'gondolas': 'sum', 'chairs': 'sum', 'trails': 'sum', 'max_elevation': 'max', 'base_elevation': 'min', 'beginner_trails': 'sum', 'intermediate_trails': 'sum', 'advanced_trails': 'sum'})
 )
combined['vertical'] = combined['max_elevation'] - combined['base_elevation']
combined['beginner'] = combined['beginner_trails'] / combined['trails'] * 100
combined['intermediate'] = combined['intermediate_trails'] / combined['trails'] * 100
combined['advanced'] = combined['advanced_trails'] / combined['trails'] * 100
combined


length                  6400.00
total_trails_length    26190.00
area                     152.00
gondolas                   2.00
chairs                    16.00
trails                    25.00
max_elevation           1676.00
base_elevation           750.00
beginner_trails            8.35
intermediate_trails       10.00
advanced_trails            6.65
vertical                 926.00
beginner                  33.40
intermediate              40.00
advanced                  26.60
dtype: float64

In [38]:
resorts_df.loc['Hakuba 47 + Goryu'] = combined
resorts_df.sort_values('area', ascending=False, inplace=True)

In [39]:
px.bar(resorts_df.assign(label=lambda df: df['gondolas'].apply(lambda f: f'{f}g ') + df['chairs'].apply(lambda f: f' {f}c')),
       y='area',
       title='Skiable Area by Resort',
       text='label',
).update_layout(xaxis_title='', yaxis_title='ha')

In [40]:
px.bar(resorts_df,
       y='total_trails_length',
       title='Total Trail Length by Resort'
).update_layout(xaxis_title='', yaxis_title='m')

In [41]:
px.bar(resorts_df,
       y=['beginner', 'intermediate', 'advanced'],
       title='Trail Type by Resort',
       color_discrete_map={'beginner': '#86c96b', 'intermediate': '#db3a2e', 'advanced': '#555'},
).update_layout(showlegend=False, yaxis_title='%', xaxis_title='')

In [42]:
px.bar(resorts_df,
       y=['beginner_trails', 'intermediate_trails', 'advanced_trails'],
       title='Trail Type by Resort',
       color_discrete_map={'beginner_trails': '#86c96b', 'intermediate_trails': '#db3a2e', 'advanced_trails': '#555'},
       ).update_layout(showlegend=False, yaxis_title='Trails', xaxis_title='')

In [43]:
px.bar(resorts_df.assign(label=lambda df: df['max_elevation'].apply(lambda x: f'max elev={x}m')),
       y='vertical',
       text='label',
       title='Vertical and Max Elevation by Resort',
       barmode='group'
       ).update_layout(showlegend=False, yaxis_title='m', xaxis_title='')