<center> <h1> Trekking Route Planning for Nepali Mountain Trails based on Time, Distance and Difficulty </h1> </center>

## Imports

In [5]:
import json
import csv
import os
import requests
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Constants

In [6]:
RAW_GEOJSON_INPUT = [
    "dataset/raw/Barhabise to TshoRolpa.json",
    "dataset/raw/GairiHanumante to Simigaun.json",
    "dataset/raw/Gaurishankar Conservation Area.json",
    "dataset/raw/Jiri to Tsho Rolpa.json",
    "dataset/raw/Kapthang to Larcha.json",
]

## Functions

### To parse geojson into tabulated coordinates and location names

In [7]:
def geojson_to_table(geojson_files, output_file):
    """
    Convert multiple GeoJSON files to a tabulated list of names and coordinates.

    Args:
        geojson_files (list): List of paths to the input GeoJSON files.
        output_file (str): Path to the output CSV file.
    """
    # Initialize output table
    all_features = []

    # Process each GeoJSON file
    for geojson_file in geojson_files:
        try:
            # Read GeoJSON file
            with open(geojson_file, "r") as f:
                data = json.load(f)

            # Get filename without extension for reference
            base_filename = os.path.basename(geojson_file).split(".")[0]

            # Process features
            for feature in data.get("features", []):
                # Extract name or title (if available) from properties
                properties = feature.get("properties", {})
                name = properties.get(
                    "name", properties.get("title", f"Feature-{base_filename}")
                )

                # Extract geometry
                geometry = feature.get("geometry", {})
                geometry_type = geometry.get("type")
                coordinates = geometry.get("coordinates", [])

                # Handle different geometry types
                if geometry_type == "Point":
                    # Single point
                    all_features.append(
                        {
                            "name": name,
                            "type": "Point",
                            "longitude": coordinates[0],
                            "latitude": coordinates[1],
                            "source_file": geojson_file,
                        }
                    )
                elif geometry_type == "LineString":
                    # Line of points
                    for i, coord in enumerate(coordinates):
                        all_features.append(
                            {
                                "name": f"{name} (Point {i + 1})",
                                "type": "LineString",
                                "longitude": coord[0],
                                "latitude": coord[1],
                                "source_file": geojson_file,
                            }
                        )
                elif geometry_type == "Polygon":
                    # Polygon (using outer ring only)
                    for i, coord in enumerate(coordinates[0]):
                        all_features.append(
                            {
                                "name": f"{name} (Vertex {i + 1})",
                                "type": "Polygon",
                                "longitude": coord[0],
                                "latitude": coord[1],
                                "source_file": geojson_file,
                            }
                        )
                elif geometry_type == "MultiPoint":
                    # Multiple points
                    for i, coord in enumerate(coordinates):
                        all_features.append(
                            {
                                "name": f"{name} (Point {i + 1})",
                                "type": "MultiPoint",
                                "longitude": coord[0],
                                "latitude": coord[1],
                                "source_file": geojson_file,
                            }
                        )
                elif geometry_type == "MultiLineString":
                    # Multiple lines
                    for j, line in enumerate(coordinates):
                        for i, coord in enumerate(line):
                            all_features.append(
                                {
                                    "name": f"{name} (Line {j + 1}, Point {i + 1})",
                                    "type": "MultiLineString",
                                    "longitude": coord[0],
                                    "latitude": coord[1],
                                    "source_file": geojson_file,
                                }
                            )
                elif geometry_type == "MultiPolygon":
                    # Multiple polygons (using outer rings only)
                    for j, polygon in enumerate(coordinates):
                        for i, coord in enumerate(polygon[0]):
                            all_features.append(
                                {
                                    "name": f"{name} (Polygon {j + 1}, Vertex {i + 1})",
                                    "type": "MultiPolygon",
                                    "longitude": coord[0],
                                    "latitude": coord[1],
                                    "source_file": geojson_file,
                                }
                            )

            print(f"Processed {geojson_file}")

        except Exception as e:
            print(f"Error processing {geojson_file}: {e}")

    # Write to CSV
    with open(output_file, "w", newline="") as f:
        writer = csv.DictWriter(
            f, fieldnames=["name", "type", "longitude", "latitude", "source_file"]
        )
        writer.writeheader()
        writer.writerows(all_features)

    print(
        f"Converted {len(all_features)} coordinates from {len(geojson_files)} files to {output_file}"
    )
    return all_features

In [8]:
# Or use wildcard pattern:
# input_files = glob.glob("data/*.geojson")

# Convert all files and combine into one CSV
features = geojson_to_table(
    RAW_GEOJSON_INPUT, "dataset/processed/combined_locations.csv"
)

# You can now also work with the features in memory if needed
print(f"Total features extracted: {len(features)}")

Processed dataset/raw/Barhabise to TshoRolpa.json
Processed dataset/raw/GairiHanumante to Simigaun.json
Processed dataset/raw/Gaurishankar Conservation Area.json
Processed dataset/raw/Jiri to Tsho Rolpa.json
Processed dataset/raw/Kapthang to Larcha.json
Converted 1107 coordinates from 5 files to dataset/processed/combined_locations.csv
Total features extracted: 1107


## OSM Data Fetching

In [None]:
def get_location_info(lat, lon):
    """
    Get location information from OpenStreetMap's Nominatim API using coordinates
    """
    # Nominatim API endpoint for reverse geocoding
    url = f"https://nominatim.openstreetmap.org/reverse?lat={lat}&lon={lon}&format=json"

    # Make sure to add a user agent as per Nominatim usage policy
    headers = {"User-Agent": "LocationLookupScript/1.0"}

    try:
        # Make the request to the API
        response = requests.get(url, headers=headers)

        # Check if the request was successful
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error: Received status code {response.status_code}")
            return None
    except Exception as e:
        print(f"Error making request: {e}")
        return None


def process_csv_file(csv_file_path, output_file_path):
    """
    Process a CSV file containing coordinates and fetch location information for each entry
    """
    results = []

    try:
        with open(csv_file_path, "r") as file:
            # Use CSV reader to parse the file
            csv_reader = csv.reader(file)

            # Skip the header row
            next(csv_reader)

            # Process each row
            for row in csv_reader:
                # Parse the data from the row
                name = row[0].strip('"')
                feature_type = row[1].strip()
                longitude = float(row[2].strip())
                latitude = float(row[3].strip("* "))
                source_file = row[4].strip("* ")

                print(f"Processing: {name}")

                # Get location information from OpenStreetMap
                location_info = get_location_info(latitude, longitude)

                print("Location Info", location_info)

                # Append results
                results.append(
                    {
                        "name": name,
                        "type": feature_type,
                        "longitude": longitude,
                        "latitude": latitude,
                        "source_file": source_file,
                        "location_info": location_info,
                    }
                )

                # Sleep to respect the usage policy (1 request per second is safe)
                time.sleep(1)

    except Exception as e:
        print(f"Error processing CSV file: {e}")

    # Save the results to a JSON file
    with open(output_file_path, "w") as outfile:
        json.dump(results, outfile, indent=2)

    print(f"Results saved to {output_file_path}")

In [13]:
process_csv_file(
    "dataset/processed/combined_locations.csv", "dataset/processed/location_info.json"
)

Processing: Feature-Barhabise to TshoRolpa (Line 1, Point 1)           
Location Info {'place_id': 224443884, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'way', 'osm_id': 942708593, 'lat': '27.786988464550575', 'lon': '85.89968218340306', 'class': 'highway', 'type': 'trunk', 'place_rank': 26, 'importance': 0.053383334859235486, 'addresstype': 'road', 'name': 'Arniko Highway', 'display_name': 'Arniko Highway, Barhabise-04, Bāhrabīse, Barhabise, सिन्धुपाल्चोक, बागमती प्रदेश, नेपाल', 'address': {'road': 'Arniko Highway', 'city_district': 'Barhabise-04', 'village': 'Bāhrabīse', 'municipality': 'Barhabise', 'county': 'सिन्धुपाल्चोक', 'state': 'बागमती प्रदेश', 'ISO3166-2-lvl4': 'NP-P3', 'country': 'नेपाल', 'country_code': 'np'}, 'boundingbox': ['27.7869645', '27.7917041', '85.8979360', '85.9000202']}
Processing: Feature-Barhabise to TshoRolpa (Line 1, Point 2)           
Location Info {'place_id': 226500031, 'licence': 'Data © OpenStreetMap

In [14]:
def json_to_csv(json_file_path, csv_file_path):
    # Read JSON data
    with open(json_file_path, "r", encoding="utf-8") as json_file:
        data = json.load(json_file)

    # If data is not a list (single record), convert it to a list
    if not isinstance(data, list):
        data = [data]

    # Extract all possible keys from the nested structure
    csv_headers = ["name", "type", "longitude", "latitude", "source_file"]
    location_info_headers = set()
    address_headers = set()

    for item in data:
        if "location_info" in item:
            for key in item["location_info"].keys():
                if key != "address":  # Handle address separately
                    location_info_headers.add(key)

            # Extract address keys if address exists
            if "address" in item["location_info"]:
                for addr_key in item["location_info"]["address"].keys():
                    address_headers.add(addr_key)

    # Convert sets to sorted lists for consistent ordering
    location_info_headers = sorted(list(location_info_headers))
    address_headers = sorted(list(address_headers))

    # Create full header list with prefixes for clarity
    full_headers = csv_headers.copy()
    for header in location_info_headers:
        if header != "address":  # Skip address as we'll handle it separately
            full_headers.append(f"location_{header}")

    for header in address_headers:
        full_headers.append(f"address_{header}")

    # Write data to CSV
    with open(csv_file_path, "w", newline="", encoding="utf-8") as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=full_headers)
        writer.writeheader()

        for item in data:
            row = {}

            # Add base fields
            for header in csv_headers:
                row[header] = item.get(header, "")

            # Add location_info fields
            if "location_info" in item:
                for header in location_info_headers:
                    if header != "address":  # Skip address
                        row[f"location_{header}"] = item["location_info"].get(
                            header, ""
                        )

                # Add address fields if available
                if "address" in item["location_info"]:
                    for addr_header in address_headers:
                        row[f"address_{addr_header}"] = item["location_info"][
                            "address"
                        ].get(addr_header, "")

            writer.writerow(row)

In [16]:
input_json = "dataset/processed/location_info.json"  # Change to your input file
output_csv = (
    "dataset/processed/location_results.csv"  # Change to your desired output file
)

json_to_csv(input_json, output_csv)
print(f"Conversion complete. CSV file saved as {output_csv}")

Conversion complete. CSV file saved as dataset/processed/location_results.csv


In [9]:
df = pd.read_csv("./dataset/final/final_elevation_filtered.csv")

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 381 entries, 0 to 380
Data columns (total 20 columns):
 #   Column                                                                                                                                                    Non-Null Count  Dtype  
---  ------                                                                                                                                                    --------------  -----  
 0   latitude                                                                                                                                                  381 non-null    float64
 1   longitude                                                                                                                                                 381 non-null    float64
 2   location_name                                                                                                                                             381 

In [10]:
df.describe()

Unnamed: 0,latitude,longitude,location_importance,location_lat,location_lon,location_osm_id,location_place_id,location_place_rank,altitude
count,381.0,381.0,381.0,381.0,381.0,381.0,381.0,381.0,381.0
mean,27.817914,86.106217,0.080118,27.817673,86.102098,1616376000.0,224342600.0,23.994751,2456.863517
std,0.118762,0.24067,0.053237,0.11708,0.237855,2632026000.0,1087259.0,3.992753,1174.249403
min,27.5742,85.7519,4e-05,27.574119,85.744471,6720524.0,222515000.0,18.0,684.0
25%,27.7511,85.8731,0.053383,27.742438,85.870577,316832600.0,223780300.0,19.0,1548.0
50%,27.81895,86.1557,0.053383,27.816637,86.156207,341283700.0,224131000.0,26.0,2260.0
75%,27.88785,86.3241,0.146717,27.890171,86.312076,1309207000.0,224866600.0,26.0,3424.0
max,28.1337,86.5518,0.16005,28.134569,86.552753,12137940000.0,229040400.0,30.0,5726.0
