In [None]:
# import os
# import pandas as pd
# import folium
# import ast


# def tag_contains_tomb(tag):
#     """
#     Check if the tag (or list of tags) contains 'tomb'.
#     The tag can be a list already or a string representing a list.
#     """
#     # If tag is already a list, check membership
#     if isinstance(tag, list):
#         return any(t.lower() == "tomb" for t in tag)

#     try:
#         # Try to convert a string representation of a list to an actual list.
#         tag_list = ast.literal_eval(tag)
#         if isinstance(tag_list, list):
#             return any(t.lower() == "tomb" for t in tag_list)
#     except Exception:
#         # If conversion fails, fall back to checking if 'tomb' is a substring.
#         return "tomb" in tag.lower()

#     return False


# # Define the path to the cleaned CSV file.
# data_file = os.path.join("datatest", "cleaned_data.csv")

# # Load the CSV data.
# try:
#     df = pd.read_csv(data_file)
# except Exception as e:
#     print(f"Error loading file {data_file}: {e}")
#     exit(1)

# # Filter records where the 'tags' column contains 'tomb' (case insensitive).
# tomb_df = df[df["tags"].apply(tag_contains_tomb)]

# if tomb_df.empty:
#     print("No records with tag 'tomb' were found.")
# else:
#     # Calculate the center of the coordinates for map centering.
#     center_lat = tomb_df["coordinates.latitude"].mean()
#     center_lon = tomb_df["coordinates.longitude"].mean()

#     # Create an interactive folium map centered on the computed location.
#     m = folium.Map(location=[center_lat, center_lon], zoom_start=2)

#     # Add markers for each record with tag 'tomb'.
#     for idx, row in tomb_df.iterrows():
#         lat = row["coordinates.latitude"]
#         lon = row["coordinates.longitude"]
#         name = row["name"]
#         description = row["description"]
#         location_text = row["location"]

#         # Create a popup message with relevant details.
#         popup_text = (
#             f"<b>Name:</b> {name}<br>"
#             f"<b>Description:</b> {description}<br>"
#             f"<b>Location:</b> {location_text}"
#         )
#         folium.Marker(
#             location=[lat, lon],
#             popup=folium.Popup(popup_text, parse_html=True)
#         ).add_to(m)

#     # Create the output directory if it doesn't exist.
#     output_dir = "datatest"
#     os.makedirs(output_dir, exist_ok=True)

#     # Save the interactive map to an HTML file.
#     output_map_file = os.path.join(output_dir, "tomb_map.html")
#     m.save(output_map_file)
#     print(f"Interactive tomb map saved to {output_map_file}.")

Interactive tomb map saved to datatest\tomb_map.html.


In [1]:
import os
import pandas as pd
import ast

data_file = os.path.join("datatest", "filtered", "filtered_tags.csv")

if not os.path.exists(data_file):
    print(f"File not found: {data_file}")
else:
    data = pd.read_csv(data_file)
    df = pd.DataFrame(data)
    if 'tags' in df.columns:
        # Convert the tags from string to list and flatten them
        all_tags = df['tags'].apply(lambda x: ast.literal_eval(x)).explode()
        # Get unique tags
        unique_tags = all_tags.unique()
        print(unique_tags)
    else:
        print("'tags' column not found in the CSV file.")

['restaurant' 'fast_food' 'fountain' 'cafe' 'cinema' 'theatre' 'bar'
 'ice_cream' 'hotel' 'pub' 'memorial' 'attraction' 'city_gate' 'monument'
 'ruins' 'archaeological_site' 'viewpoint' 'castle' 'tomb' 'fort'
 'artwork' 'tower' 'clock' 'museum' 'sports_centre' 'playground' 'park'
 'swimming_pool' 'fitness_centre' 'stadium' 'garden' 'miniature_golf'
 'sauna' 'hostel' 'gallery' 'guest_house' 'picnic_site' 'camp_site'
 'aquarium' 'chalet' 'theme_park' 'zoo' 'ship' 'citywalls'
 'nature_reserve' 'pitch' 'water_park' 'ferry_terminal' 'track' 'marina'
 'golf_course' 'aqueduct']


In [None]:
import osmnx as ox
import networkx as nx
import math
import json

# ---------------------------
# Geospatial Utility Functions
# ---------------------------


def compute_bounding_box(lat, lon, radius_m):
    """
    Compute an approximate bounding box around a point (lat, lon) with a given radius (in meters).
    Returns (min_lat, max_lat, min_lon, max_lon).
    """
    R = 6371000  # Earth's radius in meters
    lat_rad = math.radians(lat)

    # Calculate degree offsets
    delta_lat = (radius_m / R) * (180 / math.pi)
    delta_lon = (radius_m / (R * math.cos(lat_rad))) * (180 / math.pi)

    min_lat = lat - delta_lat
    max_lat = lat + delta_lat
    min_lon = lon - delta_lon
    max_lon = lon + delta_lon
    print(lat, lon, radius_m, sep=",")
    print(min_lat, max_lat, min_lon, max_lon, sep=",")
    return min_lat, max_lat, min_lon, max_lon


def filter_by_bounding_box_and_tag(poi_data, user_lat, user_lon, radius_m, search_tag):
    """
    Quickly filter POIs that fall within a bounding box around the user's location
    and contain the specified tag.
    """
    min_lat, max_lat, min_lon, max_lon = compute_bounding_box(
        user_lat, user_lon, radius_m)
    candidates = []

    for poi in poi_data:
        poi_lat = poi["coordinates"]["latitude"]
        poi_lon = poi["coordinates"]["longitude"]
        # Check if within the bounding box
        if min_lat <= poi_lat <= max_lat and min_lon <= poi_lon <= max_lon:
            # Check if the search tag is present (case-insensitive match)
            poi_tags = [tag.lower() for tag in poi.get("tags", [])]
            if search_tag.lower() in poi_tags:
                candidates.append(poi)

    return candidates

# ---------------------------
# Routing Functions using OSMnx & NetworkX (Car Mode)
# ---------------------------


def get_network_graph(user_lat, user_lon, radius_m, travel_mode='drive'):
    """
    Download a street network graph centered on the user's location.
    Supports multiple travel modes like 'drive' and 'walk'.
    """
    graph_dist = radius_m * 2
    try:
        graph = ox.graph_from_point(
            (user_lat, user_lon), dist=graph_dist, network_type=travel_mode)
        return graph
    except Exception as e:
        print(f"Error retrieving network graph for {travel_mode}:", e)
        return None


def get_route_distance(graph, user_lat, user_lon, candidate_lat, candidate_lon):
    """
    Compute the route (network) distance between the user's location and the candidate's location.
    Returns distance in meters.
    """
    try:
        user_node = ox.distance.nearest_nodes(graph, user_lon, user_lat)
        candidate_node = ox.distance.nearest_nodes(
            graph, candidate_lon, candidate_lat)
        route_length = nx.shortest_path_length(
            graph, user_node, candidate_node, weight='length')
        return route_length
    except Exception as e:
        print(
            f"Error computing route for candidate at ({candidate_lat}, {candidate_lon}):", e)
        return float('inf')


def get_top_n_by_route_distance_for_all_modes(candidates, user_lat, user_lon, radius_m, n=5):
    """
    Compute route distances for all candidates using both driving and walking modes.
    """
    modes = ['drive', 'walk']
    all_results = {}

    for mode in modes:
        graph = get_network_graph(
            user_lat, user_lon, radius_m, travel_mode=mode)
        if graph is None:
            print(
                f"Failed to retrieve the network graph for {mode}. Skipping this mode.")
            continue

        # Calculate route distance for each candidate
        for poi in candidates:
            candidate_lat = poi["coordinates"]["latitude"]
            candidate_lon = poi["coordinates"]["longitude"]
            route_distance = get_route_distance(
                graph, user_lat, user_lon, candidate_lat, candidate_lon)
            poi[f"{mode}_route_distance_m"] = route_distance

        # Filter to only those POIs that are within the route distance threshold
        candidates_within_radius = [
            poi for poi in candidates if poi[f"{mode}_route_distance_m"] <= radius_m]

        # Sort by route distance (shortest first)
        candidates_within_radius.sort(
            key=lambda x: x[f"{mode}_route_distance_m"])

        all_results[mode] = candidates_within_radius[:n]

    return all_results


# ---------------------------
# Main Execution: Updated for Both Driving and Walking
# ---------------------------
if __name__ == "__main__":
    # Load the POI data from the JSON file.
    with open('osm_istanbul_relations.json') as f:
        poi_data = json.load(f)

    # Simulated user query parameters:
    user_lat = 40.985660   # Example: Istanbul city center latitude
    user_lon = 29.027361   # Example: Istanbul city center longitude
    radius_m = 1000        # 1 km search radius
    search_tag = "park"    # Example tag to filter for

    # --- Step 1: Candidate Filtering by Bounding Box and Tag ---
    candidates = filter_by_bounding_box_and_tag(
        poi_data, user_lat, user_lon, radius_m, search_tag)
    print("Candidates after bounding box and tag filtering:")
    for poi in candidates:
        print(
            f"  {poi['name']} at {poi['coordinates']} with tags: {poi['tags']}")

    # --- Step 2: Geospatial Analysis via Route Distances (Drive & Walk) ---
    top_candidates = get_top_n_by_route_distance_for_all_modes(
        candidates, user_lat, user_lon, radius_m, n=5)

    # Display results for both modes
    print("\nTop candidates based on route distances:")
    for mode, results in top_candidates.items():
        print(f"\n--- {mode.capitalize()} Mode ---")
        if results:
            for poi in results:
                print(
                    f"{poi['name']} - Route Distance: {poi[f'{mode}_route_distance_m']:.2f} meters")
        else:
            print(
                f"No locations found within the specified route distance for {mode} mode.")

In [2]:
import os
import osmnx as ox
import networkx as nx
import math
import pandas as pd

# ---------------------------
# Geospatial Utility Functions
# ---------------------------


def compute_bounding_box(lat, lon, radius_m):
    """
    Compute an approximate bounding box around a point (lat, lon) with a given radius (in meters).
    Returns (min_lat, max_lat, min_lon, max_lon).
    """
    R = 6371000  # Earth's radius in meters
    lat_rad = math.radians(lat)

    # Calculate degree offsets
    delta_lat = (radius_m / R) * (180 / math.pi)
    delta_lon = (radius_m / (R * math.cos(lat_rad))) * (180 / math.pi)

    min_lat = lat - delta_lat
    max_lat = lat + delta_lat
    min_lon = lon - delta_lon
    max_lon = lon + delta_lon
    return min_lat, max_lat, min_lon, max_lon


def filter_by_bounding_box_and_tag(df, user_lat, user_lon, radius_m, search_tag):
    """
    Quickly filter POIs that fall within a bounding box around the user's location
    and contain the specified tag.
    """
    min_lat, max_lat, min_lon, max_lon = compute_bounding_box(
        user_lat, user_lon, radius_m)

    # Filter by bounding box
    filtered_df = df[
        (df['coordinates.latitude'] >= min_lat) &
        (df['coordinates.latitude'] <= max_lat) &
        (df['coordinates.longitude'] >= min_lon) &
        (df['coordinates.longitude'] <= max_lon)
    ]

    # Filter by search tag (case insensitive)
    filtered_df = filtered_df[filtered_df['tags'].str.contains(
        search_tag, case=False, na=False)]

    # Convert filtered DataFrame to a list of dictionaries for easier processing later
    candidates = filtered_df.to_dict(orient='records')
    return candidates

# ---------------------------
# Routing Functions using OSMnx & NetworkX (Car Mode)
# ---------------------------


def get_network_graph(user_lat, user_lon, radius_m, travel_mode='drive'):
    """
    Download a street network graph centered on the user's location.
    Supports multiple travel modes like 'drive' and 'walk'.
    """
    graph_dist = radius_m * 2
    try:
        graph = ox.graph_from_point(
            (user_lat, user_lon), dist=graph_dist, network_type=travel_mode)
        return graph
    except Exception as e:
        print(f"Error retrieving network graph for {travel_mode}:", e)
        return None


def get_route_distance(graph, user_lat, user_lon, candidate_lat, candidate_lon):
    """
    Compute the route (network) distance between the user's location and the candidate's location.
    Returns distance in meters.
    """
    try:
        user_node = ox.distance.nearest_nodes(graph, user_lon, user_lat)
        candidate_node = ox.distance.nearest_nodes(
            graph, candidate_lon, candidate_lat)
        route_length = nx.shortest_path_length(
            graph, user_node, candidate_node, weight='length')
        return route_length
    except Exception as e:
        print(
            f"Error computing route for candidate at ({candidate_lat}, {candidate_lon}):", e)
        return float('inf')


def get_top_n_by_route_distance_for_all_modes(candidates, user_lat, user_lon, radius_m, n=5):
    """
    Compute route distances for all candidates using both driving and walking modes.
    """
    modes = ['drive', 'walk']
    all_results = {}

    for mode in modes:
        graph = get_network_graph(
            user_lat, user_lon, radius_m, travel_mode=mode)
        if graph is None:
            print(
                f"Failed to retrieve the network graph for {mode}. Skipping this mode.")
            continue

        # Calculate route distance for each candidate
        for poi in candidates:
            candidate_lat = poi["coordinates.latitude"]
            candidate_lon = poi["coordinates.longitude"]
            route_distance = get_route_distance(
                graph, user_lat, user_lon, candidate_lat, candidate_lon)
            poi[f"{mode}_route_distance_m"] = route_distance

        # Filter to only those POIs that are within the route distance threshold
        candidates_within_radius = [
            poi for poi in candidates if poi[f"{mode}_route_distance_m"] <= radius_m]

        # Sort by route distance (shortest first)
        candidates_within_radius.sort(
            key=lambda x: x[f"{mode}_route_distance_m"])

        all_results[mode] = candidates_within_radius[:n]

    return all_results


# ---------------------------
# Main Execution: Updated for Both Driving and Walking
# ---------------------------
if __name__ == "__main__":
    # Load the POI data from the CSV file.
    data_file = os.path.join("datatest", "filtered", "filtered_tags.csv")
    df = pd.read_csv(data_file)
    print("Data Information:")
    df.info()

    # Simulated user query parameters:
    user_lat = 40.985660   # Example: Istanbul city center latitude
    user_lon = 29.027361   # Example: Istanbul city center longitude
    radius_m = 1000        # 1 km search radius
    search_tag = "cinema"    # Example tag to filter for

    # --- Step 1: Candidate Filtering by Bounding Box and Tag ---
    candidates = filter_by_bounding_box_and_tag(
        df, user_lat, user_lon, radius_m, search_tag)
    print("Candidates after bounding box and tag filtering:")
    for poi in candidates:
        print(
            f"  {poi['name']} at ({poi['coordinates.latitude']}, {poi['coordinates.longitude']}) with tags: {poi['tags']}")

    # --- Step 2: Geospatial Analysis via Route Distances (Drive & Walk) ---
    top_candidates = get_top_n_by_route_distance_for_all_modes(
        candidates, user_lat, user_lon, radius_m, n=5)

    # Display results for both modes
    print("\nTop candidates based on route distances:")
    for mode, results in top_candidates.items():
        print(f"\n--- {mode.capitalize()} Mode ---")
        if results:
            for poi in results:
                print(
                    f"{poi['name']} - Route Distance: {poi[f'{mode}_route_distance_m']:.2f} meters")
        else:
            print(
                f"No locations found within the specified route distance for {mode} mode.")

Data Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26814 entries, 0 to 26813
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   name                   26814 non-null  object 
 1   description            26814 non-null  object 
 2   location               26814 non-null  object 
 3   tags                   26814 non-null  object 
 4   coordinates.latitude   26814 non-null  float64
 5   coordinates.longitude  26814 non-null  float64
 6   source_folder          26814 non-null  object 
 7   source_file            26814 non-null  object 
dtypes: float64(2), object(6)
memory usage: 1.6+ MB
Candidates after bounding box and tag filtering:
  Kadıköy Sineması at (40.9884436, 29.0288541) with tags: ['cinema']
  Unknown Place at (40.9873797, 29.02687) with tags: ['cinema']
  Sinematek at (40.9864836, 29.0326796) with tags: ['museum', 'cinema']

Top candidates based on route distances:

--- D