# Web Scraper Skidata

Data from: https://www.skiinfo.de/schweiz/skipaesse

## Libraries and settings

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt

## Web Scraper Skidata

In [None]:
# Option (1): Send an HTTP request to the URL
# url = 'https://www.skiinfo.de/schweiz/skipaesse'
# response = requests.get(url)
# html_content = response.content

# Option (2): Read html from file
# Read html from file
with open('skipass.html', 'r') as file:
    html_content = file.read()

# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

# Locate the table and extract values
table = soup.find('table')

# Extract table headers
headers = [header.get_text().replace('\xa0', '') for header in table.find_all('th')]

# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:
    cells = row.find_all('td')
    row_data = [cell.get_text().replace('\xa0', '').strip() for cell in cells]
    rows.append(row_data)

# Create a DataFrame
df = pd.DataFrame(rows, columns=headers)

# Change column names
df.columns = ['Skigebiet', 
              'Saisonkarte_Erwachsene', 
              'Saisonkarte_Kinder', 
              'Tageskarte_Erwachsene',
              'Tageskarte_Kinder',
              'Online_Kaufen']

# Show dimensions
print('Number of rows:', df.shape)

# Change data types and remove special characters
df['Saisonkarte_Erwachsene'] = pd.to_numeric(df['Saisonkarte_Erwachsene'].str.replace(',', ''), errors='coerce')
df['Saisonkarte_Kinder'] = pd.to_numeric(df['Saisonkarte_Kinder'].str.replace(',', ''), errors='coerce')
df['Tageskarte_Erwachsene'] = pd.to_numeric(df['Tageskarte_Erwachsene'].str.replace(',', ''), errors='coerce')
df['Tageskarte_Kinder'] = pd.to_numeric(df['Tageskarte_Kinder'].str.replace(',', ''), errors='coerce')

# Write to csv
df.to_csv('skiinfo.csv', index=False)

# Show the first 5 rows
df.describe()

## Histogram of Skiprices

In [None]:
# Histogramme erstellen
fig, axes = plt.subplots(2, 2, figsize=(7, 6))

df['Saisonkarte_Erwachsene'].plot(kind='hist', 
                                  bins=20,
                                  edgecolor='black',
                                  color='greenyellow', 
                                  ax=axes[0, 0])
axes[0, 0].set_title('Histogram of Saisonkarte_Erwachsene', fontsize=11)
axes[0, 0].set_xlabel('Price')
axes[0, 0].set_ylabel('Frequency')

df['Saisonkarte_Kinder'].plot(kind='hist', 
                              bins=20, 
                              edgecolor='black',
                              color='orange',  
                              ax=axes[0, 1])
axes[0, 1].set_title('Histogram of Saisonkarte_Kinder', fontsize=11)
axes[0, 1].set_xlabel('Price')
axes[0, 1].set_ylabel('Frequency')

df['Tageskarte_Erwachsene'].plot(kind='hist', 
                                 bins=20, 
                                 edgecolor='black',
                                 color='darkred', 
                                 ax=axes[1, 0])
axes[1, 0].set_title('Histogram of Tageskarte_Erwachsene', fontsize=11)
axes[1, 0].set_xlabel('Price')
axes[1, 0].set_ylabel('Frequency')

df['Tageskarte_Kinder'].plot(kind='hist', 
                             bins=20, 
                             edgecolor='black',
                             color='blue', 
                             ax=axes[1, 1])
axes[1, 1].set_title('Histogram of Tageskarte_Kinder', fontsize=11)
axes[1, 1].set_xlabel('Price')
axes[1, 1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')