# Web Scraper Wasserstände KT Zuerich

## Libraries and settings

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Web Scraper Hydrological Data

In [None]:
# Send an HTTP request to the URL
url = 'https://hydroproweb.zh.ch/Listen/AktuelleWerte/aktuelle_werte.html'
response = requests.get(url)
html_content = response.content

# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

# Locate the table and extract values
table = soup.find('table')

# Extract table headers
headers = [header.get_text().replace('\xa0', '') for header in table.find_all('th')]

# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:
    cells = row.find_all('td')
    row_data = [cell.get_text().replace('\xa0', '').strip() for cell in cells]
    rows.append(row_data)

# Split the combined "ZeitDatum" column into separate "Zeit" and "Datum" columns
clean_rows = []
for row in rows:
    if len(row) == 9:  # Ensure the row has the correct number of columns
        zeit_datum = row[2]
        zeit, datum = zeit_datum[:5], zeit_datum[5:]
        clean_row = row[:2] + [zeit, datum] + row[3:]
        clean_rows.append(clean_row)

# Define the final columns
columns = ['Gewaesser', 'Einheit', 'Zeit', 'Datum', 'Wert_Aktuell', '24h_vorher', 'Differenz', 'Mittel_24h', 'Maximum_24h', 'Minimum_24h']

# Create DataFrame
df = pd.DataFrame(clean_rows, columns=columns)

# Optionally, save the DataFrame to a CSV file
df.to_csv('hydodata_table.csv', index=False)

# Print the DataFrame
df

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')