In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

base_url = 'https://www.hikr.org/dir/tag/piz_bernina/tour/'
report_links = []
tour_data = []

# Step 1: Scrape the main page for links
skip = 0
while True:
    url = f"{base_url}?skip={skip}"
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch {url}: {e}")
        break
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.find_all('a', href=True)
    new_links = [link['href'] for link in links if '/tour/post' in link['href']]
    if not new_links:
        break  # Exit loop if no new links are found
    report_links.extend(new_links)
    skip += 20
    time.sleep(1)

# Remove duplicates and prepend base URL correctly
report_links = list(set(report_links))
report_links = [
    link if link.startswith('http') else 'https://www.hikr.org' + link 
    for link in report_links
]

print(f"Found {len(report_links)} tour reports.")
print(report_links[:10]) 




In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# List of tour report links

# Function to scrape description and date from a single link
def scrape_description_and_date(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract description
        description_tag = soup.find('div', class_='main_text')
        description = description_tag.get_text(strip=True) if description_tag else "No description found"
        
        # Extract date
        date_tag = soup.find('td', text='Tour Datum:')  # Look for the label
        if date_tag:
            date = date_tag.find_next_sibling('td').get_text(strip=True)
        else:
            date = "No date found"
        
        return description, date
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return "Error fetching description", "Error fetching date"

# List to store scraped data
tour_data = []

# Scrape data for each link
for index, url in enumerate(report_links):
    print(f"Scraping ({index+1}/{len(report_links)}): {url}")
    description, date = scrape_description_and_date(url)
    tour_data.append({"URL": url, "Description": description, "Date": date})
    time.sleep(1)  # Avoid overwhelming the server

# Create a DataFrame
df = pd.DataFrame(tour_data)

# Save to CSV
output_file = "hikr_tour_reports.csv"
df.to_csv(output_file, index=False, encoding='utf-8')

print(f"Scraping complete. Data saved to {output_file}")


Scraping (1/190): https://www.hikr.org/tour/post188240.html


  date_tag = soup.find('td', text='Tour Datum:')  # Look for the label


Scraping (2/190): https://www.hikr.org/tour/post173951.html
Scraping (3/190): https://www.hikr.org/tour/post40355.html
Scraping (4/190): https://www.hikr.org/tour/post68099.html
Scraping (5/190): https://www.hikr.org/tour/post141.html
Scraping (6/190): https://www.hikr.org/tour/post70011.html
Scraping (7/190): https://www.hikr.org/tour/post71622.html
Scraping (8/190): https://www.hikr.org/tour/post16213.html
Scraping (9/190): https://www.hikr.org/tour/post84570.html
Scraping (10/190): https://www.hikr.org/tour/post4248.html
Scraping (11/190): https://www.hikr.org/tour/post1898.html
Scraping (12/190): https://www.hikr.org/tour/post12692.html
Scraping (13/190): https://www.hikr.org/tour/post68799.html
Scraping (14/190): https://www.hikr.org/tour/post181277.html
Scraping (15/190): https://www.hikr.org/tour/post123030.html
Scraping (16/190): https://www.hikr.org/tour/post3458.html
Scraping (17/190): https://www.hikr.org/tour/post94.html
Scraping (18/190): https://www.hikr.org/tour/post8257

In [10]:
import pandas as pd

# Lade die zuvor gescrapten Daten
file_path = 'hikr_tour_reports.csv'  # Ersetze mit deinem Dateinamen
data = pd.read_csv(file_path)

# Schlüsselwörter für Gipfelerfolg und Abbruch
success_keywords = ['Gipfel erreicht', 'Erfolg', 'geschafft', 'am Gipfel', 'Erreicht']
failure_keywords = ['abgebrochen', 'umgedreht', 'nicht erreicht', 'gescheitert']

# Funktionen zum Überprüfen des Status
def check_success(description):
    return any(keyword.lower() in description.lower() for keyword in success_keywords)

def check_failure(description):
    return any(keyword.lower() in description.lower() for keyword in failure_keywords)

# Ergebnisse berechnen
data['Gipfel Erreicht'] = data['Description'].apply(check_success)
data['Tour Abgebrochen'] = data['Description'].apply(check_failure)

# Zusammenfassung
success_count = data['Gipfel Erreicht'].sum()
failure_count = data['Tour Abgebrochen'].sum()

print(f"Anzahl der erfolgreichen Gipfelbesteigungen: {success_count}")
print(f"Anzahl der abgebrochenen Touren: {failure_count}")

# Speichere die Ergebnisse in einer neuen CSV-Datei
output_file = 'tour_reports_with_status.csv'
data.to_csv(output_file, index=False)
print(f"Ergebnisse gespeichert in {output_file}")


Anzahl der erfolgreichen Gipfelbesteigungen: 88
Anzahl der abgebrochenen Touren: 4
Ergebnisse gespeichert in tour_reports_with_status.csv


In [14]:
import pandas as pd
import spacy
from collections import Counter

# Lade das deutsche NLP-Modell von spaCy
nlp = spacy.load("de_core_news_sm")

# Funktion zur Filterung von Ortsnamen
def extract_place_names(text):
    doc = nlp(text)
    # Erkenne nur Entitäten, die als Ort (LOC, GPE, FAC) markiert sind
    places = [ent.text for ent in doc.ents if ent.label_ in {"LOC", "GPE", "FAC"}]
    return places

# CSV-Datei laden
input_file = "hikr_tour_reports.csv"  # Passe den Dateinamen an
df = pd.read_csv(input_file)

# Extrahiere die Beschreibungen
descriptions = df["Description"].dropna()  # Sicherstellen, dass keine NaN-Werte verarbeitet werden

# Extrahiere Ortsnamen aus allen Beschreibungen
all_places = []
for description in descriptions:
    places = extract_place_names(description)
    all_places.extend(places)

# Häufigkeiten berechnen
place_counts = Counter(all_places)

# In ein DataFrame umwandeln
places_df = pd.DataFrame(place_counts.items(), columns=["Place", "Frequency"])
places_df = places_df.sort_values(by="Frequency", ascending=False)

# Ergebnis speichern
output_file = "place_frequencies.csv"
places_df.to_csv(output_file, index=False, encoding="utf-8")

print(f"Die Ortsnamen wurden extrahiert und in {output_file} gespeichert.")


Die Ortsnamen wurden extrahiert und in place_frequencies.csv gespeichert.


In [None]:
#Choose the places
import folium
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import gpxpy

# GPX-Datei einlesen
gpx_file = "pizbernina.gpx"  # Name der GPX-Datei
with open(gpx_file, "r") as gpx:
    gpx_data = gpxpy.parse(gpx)

# Routenpunkte extrahieren
route_points = [(point.latitude, point.longitude) for track in gpx_data.tracks for segment in track.segments for point in segment.points]

# CSV-Datei mit Ortsnamen laden
places_file = "place_frequencies.csv"
places_df = pd.read_csv(places_file)

# Begrenze auf die ersten 10 Einträge
places_df = places_df.head(50)

# Geocoder initialisieren
geolocator = Nominatim(user_agent="geoapi_exercises")

# Funktion zum Geokodieren der Ortsnamen
def geocode_place(place):
    try:
        location = geolocator.geocode(place, timeout=10)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None

# Koordinaten für die Ortsnamen abrufen
places_df["Coordinates"] = places_df["Place"].apply(geocode_place)
places_df["Latitude"] = places_df["Coordinates"].apply(lambda x: x[0])
places_df["Longitude"] = places_df["Coordinates"].apply(lambda x: x[1])

# Filter für gültige Koordinaten
places_df = places_df.dropna(subset=["Latitude", "Longitude"])

# Funktion zum Überprüfen, ob ein Ort innerhalb von 100m zur Route liegt
def is_near_route(lat, lon, route_points, threshold=100):
    for route_point in route_points:
        distance = geodesic((lat, lon), route_point).meters
        if distance <= threshold:
            return True
    return False

# Filtere die Orte, die in der Nähe der Route liegen
places_df["NearRoute"] = places_df.apply(lambda row: is_near_route(row["Latitude"], row["Longitude"], route_points), axis=1)
near_route_places = places_df[places_df["NearRoute"]]

# Karte erstellen
center_coords = [46.489, 9.833]  # Zentrum der Karte
m = folium.Map(location=center_coords, zoom_start=12, tiles=None)

# Swiss Topo Basiskarte hinzufügen
folium.raster_layers.WmsTileLayer(
    url="https://wms.geo.admin.ch/",
    layers="ch.swisstopo.pixelkarte-farbe",
    name="Swiss Topo Karte",
    format="image/png",
    transparent=True,
).add_to(m)

# Save the filtered places with Latitude and Longitude to a new CSV file
output_file = "filtered_nearby_places.csv"
near_route_places[['Place', 'Latitude', 'Longitude']].to_csv(output_file, index=False)

print(f"Filtered nearby places saved to {output_file}")

# Ortsnamen auf der Route als Marker hinzufügen
for _, row in near_route_places.iterrows():
    folium.Marker(
        location=[row["Latitude"], row["Longitude"]],
        popup=f"{row['Place']} ({row['Frequency']} mentions)",
        icon=folium.Icon(color="blue"),
    ).add_to(m)


# Route zur Karte hinzufügen
folium.PolyLine(route_points, color="red", weight=2.5, opacity=1).add_to(m)

# Layer-Kontrolle hinzufügen
folium.LayerControl().add_to(m)

# Karte als HTML speichern
m.save("route_with_nearby_places_map.html")

# Zeige die Karte im Jupyter Notebook (falls verwendet)
m


Filtered nearby places saved to filtered_nearby_places.csv


In [None]:
#Generate Sorted place list

# GPX file path
gpx_file = "pizbernina.gpx"

# Extract route points from GPX file
with open(gpx_file, "r") as gpx:
    gpx_data = gpxpy.parse(gpx)
route_points = [(point.latitude, point.longitude) for track in gpx_data.tracks for segment in track.segments for point in segment.points]

# Define the places with their coordinates
places = [
    {"Place": "Piz Bernina", "Latitude": 46.3824278, "Longitude": 9.9080521},
    {"Place": "Tschiervahütte", "Latitude": 46.40385045, "Longitude": 9.880129323931119},
    {"Place": "Val Roseg", "Latitude": 46.4379345, "Longitude": 9.8668031},
    {"Place": "Fuorcla Prievlusa", "Latitude": 46.3939731, "Longitude": 9.9068242},
    {"Place": "Piz Bianco", "Latitude": 46.3845983, "Longitude": 9.9065742},
    {"Place": "La Spedla", "Latitude": 46.3809172, "Longitude": 9.9073907},
    {"Place": "Rosegtal", "Latitude": 46.4890557, "Longitude": 9.8987686},
    {"Place": "Biancograt", "Latitude": 46.3876352, "Longitude": 9.9058198},
    {"Place": "Tschierva-Hütte", "Latitude": 46.40385045, "Longitude": 9.880129323931119},
]

# Function to find the nearest route point for each place
def nearest_route_point(lat, lon, route_points):
    distances = [(index, geodesic((lat, lon), route_point).meters) for index, route_point in enumerate(route_points)]
    return min(distances, key=lambda x: x[1])  # Returns (index, distance)

# Map places to nearest route points
places_with_distances = []
for place in places:
    nearest_point = nearest_route_point(place["Latitude"], place["Longitude"], route_points)
    places_with_distances.append({
        "Place": place["Place"],
        "Latitude": place["Latitude"],
        "Longitude": place["Longitude"],
        "NearestRoutePointIndex": nearest_point[0],
        "DistanceToRoute": nearest_point[1]
    })

# Sort places by their nearest route point index
sorted_places = sorted(places_with_distances, key=lambda x: x["NearestRoutePointIndex"])

# Save sorted places to CSV
sorted_places_df = pd.DataFrame(sorted_places)
output_file_sorted = "sorted_places_along_route.csv"
sorted_places_df.to_csv(output_file_sorted, index=False)

print(f"Sorted places saved to {output_file_sorted}")


Sorted places saved to sorted_places_along_route.csv


In [None]:
from openai import OpenAI
import pandas as pd
import re

# Initialize the OpenAI client with your API key
client = OpenAI(api_key="")  # Replace with your actual OpenAI API key

# Load the tour reports and sorted places CSV files
tour_reports_df = pd.read_csv('hikr_tour_reports.csv')
sorted_places_df = pd.read_csv('sorted_places_along_route.csv')

# Function to extract all relevant context around place name from tour descriptions
def extract_contexts_for_place(tour_reports, place_name, context_window=150):
    """
    Extract all snippets of text around the place name in the tour descriptions.
    """
    contexts = []
    for _, report in tour_reports.iterrows():
        description = report['Description']
        # Find all occurrences of the place name and extract surrounding context
        pattern = re.compile(r'(\b' + re.escape(place_name) + r'\b)', re.IGNORECASE)
        matches = [(m.start(), m.end()) for m in pattern.finditer(description)]
        for start, end in matches:
            # Extract a window around the place name
            snippet_start = max(0, start - context_window)
            snippet_end = min(len(description), end + context_window)
            snippet = description[snippet_start:snippet_end].strip()
            contexts.append(snippet)
    return contexts

# Function to generate a concise mountaineering description using OpenAI
def generate_concise_mountaineering_description(place_name, context, previous_place, next_place):
    print(f"Context for {place_name}: {context}")  # Debug output
    if not context.strip():  # Skip if context is empty
        return "No relevant context found for this place."

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a helpful assistant. Your task is to provide very short, bullet-point summaries based on tour descriptions. "
                    "These summaries will be used for a map, so keep them concise and informative. Focus on:\n"
                    "- Difficulty level: Use mountaineering scales (e.g., T4, PD).\n"
                    "- Notable features: Mention unique landmarks, terrain highlights, or views.\n"
                    "- Transitions: Specify previous and next locations, key route changes, and challenges.\n"
                    "- Route guidance: Provide essential navigation tips (e.g., cairns, paths, left around gendarme)."
                )
            },
            {
                "role": "user",
                "content": (
                    f"Based on the following context, provide a short and concise bullet-point summary for the place: {place_name}. "
                    "The summary should be brief and suitable for a map, derived from the context of the tour report. "
                    "Include:\n"
                    "- Difficulty level.\n"
                    "- Notable features.\n"
                    "- Transition details (previous and next places, key route changes).\n"
                    "- Route guidance with navigation tips.\n"
                    f"Context: {context}\n"
                    f"Previous place: {previous_place}\n"
                    f"Next place: {next_place}"
                )
            }
        ]
    )
    return response.choices[0].message.content

# Process each place using the sorted places file
def generate_mountain_descriptions(tour_reports_df, sorted_places_df):
    descriptions = []
    for index, place in sorted_places_df.iterrows():
        place_name = place['Place']
        previous_place = sorted_places_df.iloc[index - 1]['Place'] if index > 0 else "starting point"
        next_place = sorted_places_df.iloc[index + 1]['Place'] if index < len(sorted_places_df) - 1 else "final destination"
        
        # Collect all relevant contexts from the tour descriptions
        context_list = extract_contexts_for_place(tour_reports_df, place_name)
        combined_context = " ".join(context_list)  # Combine all contexts for the place
        
        # Debug output for context
        print(f"Generating description for {place_name}...")
        print(f"Combined Context: {combined_context}\n")

        # Generate description using OpenAI
        generated_desc = generate_concise_mountaineering_description(place_name, combined_context, previous_place, next_place)
        descriptions.append(generated_desc)
    
    sorted_places_df['Generated_Description'] = descriptions
    return sorted_places_df

# Run the process for all places
places_with_descriptions = generate_mountain_descriptions(tour_reports_df, sorted_places_df)

# Save and inspect the result
places_with_descriptions.to_csv('mountaineering_descriptions_with_directions.csv', index=False)
print(places_with_descriptions)


Generating description for Rosegtal...
Combined Context: ück zur Fuorcla Boval und von dort über den Vadret da Tschierva absteigen. Vom Rand des Gletschers über Wegspuren den Steinmännchen folgend zur Tschiervahütte 2583 m. Und von dort durchs wunderschöne Rosegtal nach Pontresina. Betrachtet man den Eselsgrat auf den Piz Roseg vom Zustieg zur Tschierva-Hütte im Rosegtal aus, so liegt es nahe, dass der Name daher rührt, dass zwei der Grattürme in etwa die Form von Eselsohren aufweisen, und dass mithin der Name weniger darauf zurückzuführen ist, dass nur  "Esel" sich waltstour von 19.5 Stunden wieder ihr Nachtlager auf der Alp Misaun.Beschreibung der TourNachdem wir am Vortag von Pontresina zur gut besetzten Tschiervahütte den sehr schönen, aber langen Weg durchs Rosegtal aufgestiegen waren, brachen wir drei (die beiden angehenden Ärzte Felix N. und Heini M. sowie ich) am 1. August etwa um halb vier Uhr morgens mit den andern Bergsteigern auf. Die meisten hatten den B val) dann traumhaf

In [129]:
places_with_descriptions.to_csv('places_with_generated_descriptions.csv', index=False)


Function calls itself,  
A loop of endless echo,  
Depths of thought revealed.


In [142]:
import folium
import pandas as pd
import gpxpy

# Load the CSV with descriptions
descriptions_file = "mountaineering_descriptions_with_directions.csv"
places_df = pd.read_csv(descriptions_file)

# Load the GPX file
gpx_file = "pizbernina.gpx"
with open(gpx_file, "r") as gpx:
    gpx_data = gpxpy.parse(gpx)

# Extract route points
route_points = [(point.latitude, point.longitude) for track in gpx_data.tracks for segment in track.segments for point in segment.points]

# Create a Folium map centered on the route
center_coords = [route_points[0][0], route_points[0][1]]  # Start of the route
map_swisstopo = folium.Map(location=center_coords, zoom_start=12, tiles=None)

# Add the SwissTopo map layer
folium.raster_layers.WmsTileLayer(
    url="https://wms.geo.admin.ch/",
    layers="ch.swisstopo.pixelkarte-farbe",
    name="Swiss Topo Karte",
    format="image/png",
    transparent=True,
).add_to(map_swisstopo)

# Add route to the map
folium.PolyLine(route_points, color="red", weight=2.5, opacity=1).add_to(map_swisstopo)

# Add filtered places as markers with descriptions
for _, row in places_df.iterrows():
    folium.Marker(
        location=[row["Latitude"], row["Longitude"]],
        popup=folium.Popup(
            f"<b>{row['Place']}</b><br>{row['Generated_Description']}",
            max_width=300
        ),
        icon=folium.Icon(color="blue"),
    ).add_to(map_swisstopo)

# Add a layer control to toggle layers
folium.LayerControl().add_to(map_swisstopo)

# Save the map as an HTML file
output_file = "route_with_descriptions_map.html"
map_swisstopo.save(output_file)

print(f"Map has been saved as {output_file}.")


Map has been saved as route_with_descriptions_map.html.
