In [36]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

URL = "https://natatry.pl/szlaki"

response = requests.get(URL)
response.encoding = 'utf-8'
response.raise_for_status()

soup = BeautifulSoup(response.text, 'html.parser')

table = soup.find('table')

headers = [th.get_text(strip=True) for th in table.find('thead').find_all('th')]

rows = []

for tr in soup.find_all('tr'):
    cols = tr.find_all('td')
    if len(cols) == 0:
        continue

    # Parsowanie pól
    trasa = cols[0].get_text(strip=True)

    difficulty = cols[1].find('img')
    diff_scale = None

    if difficulty:
        if "stars1" in difficulty['src']:
            diff_scale = 1
        elif "stars2" in difficulty['src']:
            diff_scale = 2
        elif "stars3" in difficulty['src']:
            diff_scale = 3
        elif "stars4" in difficulty['src']:
            diff_scale = 4
        elif "stars5" in difficulty['src']:
            diff_scale = 5

    dlugosc = cols[2].get_text(strip=True).replace(' km', '').replace(',', '.')
    try:
        dlugosc = float(dlugosc)
    except ValueError:
        dlugosc = None

    # Kolory szlaków (lista)
    szlak_imgs = cols[3].find_all('img')
    trails = [img['alt'].replace(' szlak', '').strip() for img in szlak_imgs]

    for i in range(len(trails)):
        if trails[i] == 'czerwony':
            trails[i] = 'red'
        elif trails[i] == 'niebieski':
            trails[i] = 'blue'
        elif trails[i] == 'zielony':
            trails[i] = 'green'
        elif trails[i] == 'żółty':
            trails[i] = 'yellow'
        elif trails[i] == 'czarny':
            trails[i] = 'black'

    czas = cols[4].get_text(strip=True)

    opis_link = cols[5].find('a')['href'] if cols[5].find('a') else None

    rows.append({
        'trail_name': trasa,
        'difficulty': diff_scale,
        'length_km': dlugosc,
        'trail_1': trails[0] if len(trails) > 0 else None,
        'trail_2': trails[1] if len(trails) > 1 else None,
        'trail_3': trails[2] if len(trails) > 2 else None,
        'trail_4': trails[3] if len(trails) > 3 else None,
        'time_min': czas,
    })

df = pd.DataFrame(rows)
df['difficulty'] = df['difficulty'].astype('Int64')

def norm_time(time_str):
    h, m = 0, 0
    if 'h' in time_str:
        h = int(time_str.split('h')[0])
        if "'" in time_str:
            m = int(time_str.split('h')[1].replace("'", '').strip())
    else:
        m = int(time_str.replace("'", '').strip())

    return h * 60 + m

df['time_min'] = df['time_min'].apply(norm_time)

df.to_csv('tatra_hikes.csv', index=False)

df[-10:]

Unnamed: 0,trail_name,difficulty,length_km,trail_1,trail_2,trail_3,trail_4,time_min
297,Na Nosal z Zakopanego (Murowanica),1.0,1.6,green,,,,55
298,Do Jaskini Dziury przez Dolinę ku Dziurze,,1.5,blue,,,,25
299,Nad Smreczyński Staw z Hali Ornak,,1.5,black,,,,30
300,Na Przełęcz Bobrowiecką z Polany Chochołowskiej,1.0,1.5,yellow,blue,,,50
301,Na Świnicę z Zawratu,4.0,1.5,red,,,,50
302,Orla Perć – od przełęczy Zawrat do Koziego Wie...,5.0,1.5,red,,,,135
303,Orla Perć – od Skrajnego Granatu do przełęczy ...,5.0,1.5,red,,,,100
304,Do Jaskini Mroźnej,1.0,1.0,black,,,,30
305,Przez Dolinę za Bramką,,1.0,green,,,,20
306,Na Sarnią Skałę z Czerwonej Przełęczy,1.0,0.5,black,,,,10
