In [None]:
import pandas as pd 
import numpy as np 
from tqdm import tqdm

# Load data

In [None]:
df_train = pd.read_csv('data/train.csv')
df_train.head()

In [None]:
df_test = pd.read_csv('data/test.csv')
df_test.head()

# OSM API

In [None]:
from OSMPythonTools.api import Api

In [None]:
unique_jalan_train = df_train['id_jalan'].unique()
unique_jalan_test = df_test['id_jalan'].unique()
unique_jalan = np.concatenate((unique_jalan_train, unique_jalan_test), axis=0)
unique_jalan = np.unique(unique_jalan, axis=0)

In [None]:
unique_mulai_train = df_train['id_titik_mulai'].unique()
unique_mulai_test = df_test['id_titik_mulai'].unique()
unique_akhir_train = df_train['id_titik_akhir'].unique()
unique_akhir_test = df_test['id_titik_akhir'].unique()
unique_mulai = np.concatenate((unique_mulai_train, unique_mulai_test), axis=0)
unique_mulai = np.unique(unique_mulai, axis=0)
unique_akhir = np.concatenate((unique_akhir_train, unique_akhir_test), axis=0)
unique_akhir = np.unique(unique_akhir, axis=0)

In [None]:
# Testing result
api = Api()
way = api.query('way/66924592')
way.tags()

In [None]:
result_jalan = {'id_jalan': unique_jalan,
                'api_result': []}
api = Api()
# new_col_jalan = ['lanes', 'lanes:forward', 'lit', 'maxspeed']
new_col_jalan = ['cycleway', 'highway','lanes', 'lanes:forward', 'lit', 'maxspeed', 'name', 'operator', 'ref', 'sidewalk', 'surface', 'turn:lanes:forward']
for id_jalan in unique_jalan:
    way = api.query(f'way/{id_jalan}')
    api_result = {} 
    for col in new_col_jalan:
        api_result[col] = way.tag(col)
    result_jalan['api_result'].append(api_result)


In [None]:
result_jalan['api_result']

In [None]:
# Testing result
node = api.query('node/21390008') # idx=0, id_titik mulai
print(node.lat())
print(node.lon())

In [None]:
import xml.etree.ElementTree as ET
import requests

def manual_extract(node):
    url = f'https://www.openstreetmap.org/api/0.6/node/{node}/history'
    r = requests.get(url)
    root = ET.fromstring(r.text)

    # Find all nodes with the specified id
    nodes = root.findall(f'.//node[@id="{node}"]')

    # Initialize variables to hold latitude and longitude
    latest_lat = None
    latest_lon = None

    # Iterate through the nodes in reverse order (latest version first)
    for node in reversed(nodes):
        lat = node.get('lat')
        lon = node.get('lon')
        
        if lat and lon:
            latest_lat = lat
            latest_lon = lon
            break  # Stop when the first valid lat and lon are found

    # If no valid lat and lon are found in the latest version, use the previous version's data
    if latest_lat is None or latest_lon is None:
        for node in reversed(nodes):
            lat = node.get('lat')
            lon = node.get('lon')
            if lat and lon:
                latest_lat = lat
                latest_lon = lon
                break  # Stop when the first valid lat and lon are found
    return latest_lat, latest_lon

    

In [None]:
result_mulai = {'id_titik_mulai': unique_mulai,
                'api_result': []}
api = Api()
new_col_jalan = ['lat', 'lon']
for id_titik in unique_mulai:
    try:
        node = api.query(f'node/{id_titik}')
    except:
        lat, lon = manual_extract(id_titik)
    
    api_result = {'lat': node.lat(),
                  'lon': node.lon()} 
    result_mulai['api_result'].append(api_result)


In [None]:
result_mulai['api_result']

In [None]:
result_akhir = {'id_titik_akhir': unique_akhir,
                'api_result': []}
api = Api()
new_col_jalan = ['lat', 'lon']
for id_titik in unique_akhir:
    try:
        node = api.query(f'node/{id_titik}')
    except:
        lat, lon = manual_extract(id_titik)
    
    api_result = {'lat': node.lat(),
                  'lon': node.lon()} 
    result_akhir['api_result'].append(api_result)


In [None]:
result_akhir['api_result'][0]['lat']

# MapQuest API

In [None]:
unique_mulai_akhir_train = np.unique(df_train[['id_titik_mulai', 'id_titik_akhir']].values, axis=0)
unique_mulai_akhir_test = np.unique(df_test[['id_titik_mulai', 'id_titik_akhir']].values, axis=0)
unique_mulai_akhir = np.concatenate((unique_mulai_akhir_train, unique_mulai_akhir_test), axis=0)
unique_mulai_akhir = np.unique(unique_mulai_akhir, axis=0)


In [None]:
np.array([21390008, 1425033102]) in unique_mulai_akhir

In [None]:
import json

def distance(start_lat, start_lon, end_lat, end_lon):
    key = 'FVgErOkhYoJmjdsUrldVi9nkrCrGKuWm'
    url = f'https://www.mapquestapi.com/directions/v2/route?key={key}&from={start_lat},{start_lon}&to={end_lat},{end_lon}'
    r = requests.get(url)
    data = r.json()
    dist = data['route']['distance']
    return dist

In [None]:
distance(51.4651822,-0.2552399,51.4652044,-0.2545952)

In [None]:
np.where(result_akhir['id_titik_akhir'] == id_titik[1])[0].sum()

In [None]:
result_distance = {'id_titik_mulai_akhir': unique_mulai_akhir,
                'api_result': []}
api = Api()
new_col_jalan = ['lat', 'lon']
for id_titik in unique_mulai_akhir:
    idx_mulai = np.where(result_mulai['id_titik_mulai'] == id_titik[0])[0].sum()
    idx_akhir = np.where(result_akhir['id_titik_akhir'] == id_titik[1])[0].sum()
    dist = distance(result_mulai['api_result'][idx_mulai]['lat'], result_mulai['api_result'][idx_mulai]['lon'], result_akhir['api_result'][idx_akhir]['lat'], result_akhir['api_result'][idx_akhir]['lon'])
    api_result = {'distance': dist} 
    result_distance['api_result'].append(api_result)


Export to CSV

In [None]:
result_distance['api_result']

In [None]:
id_distance = np.array([21390008, 1425033102])
# np.where(result_distance['id_titik_mulai_akhir'] == id_distance)
[index for index, value in enumerate(unique_mulai_akhir) if value in id_distance]


In [None]:
array1 = np.array([21390008, 1425033102])
array2 = np.array([21390008, 1425033102])
np.array_equal(array1, array2)

In [None]:
if (np.array([21390008, 1425033102]) - np.array([21390008, 1425033102]) == np.array([0, 0])):
    print('True')

In [None]:
def find_idx_dist(id_titik, unique_val):
    for idx, val in enumerate(unique_val):
        if np.array_equal(val, id_titik):
            return idx
            

In [None]:
def add_data(df, filename):
    # new_col = ['lanes', 'lanes:forward', 'lit', 'maxspeed', 'mulai_lat', 'mulai_lon', 'akhir_lat', 'akhir_lon', 'distance']
    new_col = ['cycleway', 'highway','lanes', 'lanes:forward', 'lit', 'maxspeed', 'name', 'operator', 'ref', 'sidewalk', 'surface', 'turn:lanes:forward', 'mulai_lat', 'mulai_lon', 'akhir_lat', 'akhir_lon', 'distance']
    additional_data = {'cycleway':[], 'highway':[],'lanes':[], 'lanes:forward':[], 'lit':[], 'maxspeed':[], 'name':[], 'operator':[], 'ref':[], 'sidewalk':[], 'surface':[], 'turn:lanes:forward':[], 'mulai_lat':[], 'mulai_lon':[], 'akhir_lat':[], 'akhir_lon':[], 'distance':[]}
    for index, row in tqdm(df.iterrows()):
        id_jalan = row['id_jalan']
        idx_jalan = np.where(result_jalan['id_jalan'] == id_jalan)[0].sum()

        cycleway, highway, name, operator = (result_jalan['api_result'][idx_jalan]['cycleway'], result_jalan['api_result'][idx_jalan]['highway'], result_jalan['api_result'][idx_jalan]['name'], result_jalan['api_result'][idx_jalan]['operator'])
        lanes, lanesforward, lit, maxspeed = (result_jalan['api_result'][idx_jalan]['lanes'], result_jalan['api_result'][idx_jalan]['lanes:forward'], result_jalan['api_result'][idx_jalan]['lit'], result_jalan['api_result'][idx_jalan]['maxspeed'])
        ref, sidewalk, surface, turnlanesforward = (result_jalan['api_result'][idx_jalan]['ref'], result_jalan['api_result'][idx_jalan]['sidewalk'], result_jalan['api_result'][idx_jalan]['surface'], result_jalan['api_result'][idx_jalan]['turn:lanes:forward'])

        id_titik_mulai = row['id_titik_mulai']
        id_titik_akhir = row['id_titik_akhir']

        idx_mulai = np.where(result_mulai['id_titik_mulai'] == id_titik_mulai)[0].sum()
        idx_akhir = np.where(result_akhir['id_titik_akhir'] == id_titik_akhir)[0].sum()

        mulai_lat, mulai_lon = (result_mulai['api_result'][idx_mulai]['lat'], result_mulai['api_result'][idx_mulai]['lon'])
        akhir_lat, akhir_lon = (result_akhir['api_result'][idx_akhir]['lat'], result_akhir['api_result'][idx_akhir]['lon'])

        id_distance = np.array([row['id_titik_mulai'], row['id_titik_akhir']])
        idx_distance = find_idx_dist(id_distance, unique_mulai_akhir)
        distance = result_distance['api_result'][idx_distance]['distance']

        additional_data['cycleway'].append(cycleway)
        additional_data['highway'].append(highway)
        additional_data['name'].append(name)
        additional_data['operator'].append(operator)
        additional_data['lanes'].append(lanes)
        additional_data['lanes:forward'].append(lanesforward)
        additional_data['lit'].append(lit)
        additional_data['maxspeed'].append(maxspeed)
        additional_data['ref'].append(ref)
        additional_data['sidewalk'].append(sidewalk)
        additional_data['surface'].append(surface)
        additional_data['turn:lanes:forward'].append(turnlanesforward)
        
        additional_data['mulai_lat'].append(mulai_lat)
        additional_data['mulai_lon'].append(mulai_lon)
        additional_data['akhir_lat'].append(akhir_lat)
        additional_data['akhir_lon'].append(akhir_lon)
        additional_data['distance'].append(distance)
    for col in new_col:
        df[col] = additional_data[col]
    df.to_csv(filename, index=False)

In [None]:
add_data(df_train, 'new_train-2.csv')
add_data(df_test, 'new_test-2.csv')