In [1]:
import pandas as pd
import networkx as nx
import geopandas as gpd
import time
import numpy as np
import folium
import movingpandas as mpd
import shapely as shp
import hvplot.pandas 
import geoviews
import mapclassify
import skmob
import matplotlib.pyplot as plt

from scipy.spatial import KDTree
from tqdm import tqdm
from joblib import Parallel, delayed
from folium.plugins import TimestampedGeoJson
from geopy.distance import geodesic
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from geopandas import GeoDataFrame, read_file
from shapely.geometry import mapping, shape, Point, LineString, Polygon
from datetime import datetime, timedelta
from holoviews import opts

## Useful support function

In [20]:
def create_plot(data): # with maid
    # Ambil baris pertama dari DataFrame
    first_row = data.iloc[0]

    # Ambil nilai latitude dan longitude dari baris pertama
    latitude = first_row['latitude']
    longitude = first_row['longitude']
    
    m = folium.Map(location=[latitude, longitude], zoom_start=25)

    # Add CircleMarkers for each point
    for index, row in data.iterrows():
        folium.CircleMarker(
            location=[row["latitude"], row["longitude"]],
            radius=5,  # Marker size
            color="blue",  # Marker color
            fill=True,
            fill_color="blue",  # Fill color of the marker
            fill_opacity=0.7,  # Opacity of the marker fill
            popup=f"User ID: {row['maid']}<br>Latitude: {row['latitude']}<br>Longitude: {row['longitude']}",
        ).add_to(m)
    
    return m

In [21]:
def create_plot_raw(data): # only lat, lon
    # Ambil baris pertama dari DataFrame
    first_row = data.iloc[0]

    # Ambil nilai latitude dan longitude dari baris pertama
    latitude = first_row['latitude']
    longitude = first_row['longitude']
    
    m = folium.Map(location=[latitude, longitude], zoom_start=25)

    # Add CircleMarkers for each point
    for index, row in data.iterrows():
        folium.CircleMarker(
            location=[row["latitude"], row["longitude"]],
            radius=5,  # Marker size
            color="blue",  # Marker color
            fill=True,
            fill_color="blue",  # Fill color of the marker
            fill_opacity=0.7,  # Opacity of the marker fill
            popup=f"Latitude: {row['latitude']}<br>Longitude: {row['longitude']}",
        ).add_to(m)
    
    return m

In [22]:
def create_pivot(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df_pivot = df['maid'].groupby(df['timestamp'].dt.date).value_counts()
    pivot = df_pivot.unstack().fillna(0).astype(int)

    total_counts = pivot.sum(axis=0)
    sorted_columns = total_counts.sort_values(ascending=False).index
    pivot_sorted = pivot[sorted_columns]
    return pivot_sorted

In [23]:
def create_line(data):
    data = data.to_crs('EPSG:4326')
    data = data['geometry']
    # Buat peta dengan lokasi awal berdasarkan rata-rata koordinat dari data linestring
    avg_lat = data.apply(lambda x: x.centroid.y).mean()
    avg_lon = data.apply(lambda x: x.centroid.x).mean()
    m = folium.Map(location=[avg_lat, avg_lon], zoom_start=15)

    # Tambahkan polyline untuk setiap linestring
    for linestring in data:
        coordinates = [(lat, lon) for lon, lat in linestring.coords]
        folium.PolyLine(locations=coordinates, color='purple', weight=8, opacity=0.7).add_to(m)

    # Tambahkan GeoJson dari data GeoPandas
    folium.GeoJson(data.to_json(), name='Garis Jalan').add_to(m)

    return m

## 1. Filter Area around Malioboro

In [None]:
def filter_area(gps, road):
    road_path = road
    
    tdf = skmob.TrajDataFrame.from_file(gps)
    area_shape = gpd.read_file(road_path)
    
    gdf_gps = GeoDataFrame(tdf, geometry=gpd.points_from_xy(tdf['longitude'], tdf['latitude']))
    filtered_data = gdf_gps[gdf_gps.geometry.within(area_shape.geometry.iloc[0])].copy()
    
    filtered_data['datetime_wib'] = pd.to_datetime(filtered_data['datetime_wib'])
    filtered_data['tanggal'] = filtered_data['datetime_wib'].dt.date
    
    print("Filtering area succeed")
    
    return filtered_data

In [None]:
# Read reverse geocoding data
gps = '../../../DataTelkomsel/2021November/RGnovember2021.csv'
road = './Malioboro_around/Shrink/clipping_boundary.geojson'

filtered_data = filter_area(gps, road)
filtered_data.to_csv('filter1_malas_nov.csv', index=False)    

## 2. Preprocessing basic
- Changed datetime_wib column -> timestamp
- Convert data type timestamp to datetime
- Get the required columns df[['maid', 'latitude', 'longitude', 'timestamp']]
- Remove duplicate data based on the same maid and time
- Sort data by maid and timestamp

In [None]:
def preprocess_gps_data(df):
    # Step 1: Ubah nama kolom datetime_wib menjadi timestamp
    df = df.rename(columns={'datetime_wib':'timestamp'})
    
    # Step 2: Konversi tipe data pada kolom timestamp menjadi datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Step 3: Ambil hanya kolom maid, latitude, longitude, dan timestamp
    df_filtered = df[['maid', 'latitude', 'longitude', 'timestamp']]
        
    # Step 4: Hapus data duplikat utk data keseluruhan dan data duplikat utk maid dan timestamp yg sama 
    # (ga mgkn berada pada dua tmpt berbeda dalam satu waktu kan)
    df_filtered = df_filtered.drop_duplicates(subset=['maid', 'timestamp'])
    
    # Step 5: Hapus data yang hanya memiliki 1 record data dalam 1 hari
    df_filtered['date'] = df_filtered['timestamp'].dt.date
    record_count_per_day = df_filtered.groupby(['maid', 'date']).size().reset_index(name='count')
    multiple_records_per_day = record_count_per_day[record_count_per_day['count'] > 1]
    df_filtered = pd.merge(df_filtered, multiple_records_per_day[['maid', 'date']], on=['maid', 'date'], how='inner')
    df_filtered.drop('date', axis=1, inplace=True)

    # Step 6: Urutkan data berdasarkan maid dan timestamp
    df_filtered = df_filtered.sort_values(by=['maid', 'timestamp'])
    
    # Step 7: Reset index
    df_filtered.reset_index(inplace=True, drop=True)
    
    return df_filtered

In [None]:
# Read GPS data
df = pd.read_csv('filter1_malas_nov.csv')

# Do the process
df_preprocessed = preprocess_gps_data(df)

# Save the result
df_preprocessed.to_csv('filter2_malas_nov.csv', index=False)

## 3. Filter data points with radius <x radius meter from the road network

In [None]:
def filter_area_around_road(gps_data, road_data, distance):
    # Membuat GeoDataFrame dari data GPS
    gdf_gps = gpd.GeoDataFrame(
        gps_data,
        geometry=gpd.points_from_xy(gps_data.longitude, gps_data.latitude),
        crs="EPSG:4326"  # WGS84 coordinate reference system
    )

    # Membuat GeoDataFrame dari data jaringan jalan
    gdf_jalan = gpd.GeoDataFrame(
        road_data,
        geometry=road_data.geometry,
        crs="EPSG:4326"  # WGS84 coordinate reference system
    )

    # Mengubah sistem referensi koordinat jaringan jalan ke UTM (misalnya UTM zone 48S untuk Yogyakarta)
    gdf_jalan = gdf_jalan.to_crs("EPSG:32748")  # UTM zone 48S

    # Buffer jaringan jalan sebesar 20 meter
    buffered_jalan = gdf_jalan.buffer(distance)

    # Gabungkan semua buffered jalan menjadi satu geometri
    merged_buffer = buffered_jalan.unary_union

    # Mengubah sistem referensi koordinat data GPS ke UTM
    gdf_gps = gdf_gps.to_crs("EPSG:32748")  # UTM zone 48S

    # Membuat kolom baru untuk menyimpan jarak terdekat ke jalan
    gdf_gps['closest_distance_to_road'] = gdf_gps.geometry.apply(lambda x: x.distance(merged_buffer))

    # Mengambil data point yang jaraknya maksimal 20 meter dari jaringan jalan
    data_point_terdekat = gdf_gps[gdf_gps['closest_distance_to_road'] <= distance]

    return data_point_terdekat

In [None]:
# Read the road data
road = './Malioboro_around/Shrink/Malioboro_around_shrink_line.shp'
area_shape = gpd.read_file(road)

highway = ['living_street','residential','primary','primary_link','tertiary', 'secondary', 'unclassified', 'secondary_link', 'tertiary_link','track']
area_vehicle = area_shape[area_shape['highway'].isin(highway)]
area_vehicle = area_vehicle[['osm_id','oneway','name', 'highway', 'geometry']]
area_vehicle.reset_index(inplace=True, drop=True)

In [None]:
# Read data GPS and road data
gps_data = pd.read_csv('filter2_malas_des.csv')
road_data = area_vehicle

# Do the process
radius = 20 # change this to choose the radius from road network
df_filtered = filter_area_around_road(gps_data, road_data, radius)

# Save the result
df_filtered.to_csv(f'filter3_{radius}m_malas_des.csv', index=False)

## 4. Map matching with KDTree to mapping all gps points to the road

In [None]:
# For trial with small data
data = pd.read_csv('./filter2_malar_des.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])
gps = data[(data['maid'] == 'd9f515f4-1a78-442b-81c4-55bf5f253b91') & (data['timestamp'].dt.date == pd.to_datetime('2021-12-06').date())]
gps.reset_index(inplace=True, drop=True)

In [27]:
# Pass the dataframe from the previous process
radius = 5 # change this to choose the radius from road network
gps = pd.read_csv(f'filter3_{radius}m_malas_des.csv')

# Reading the road CSV file into a DataFrame
road = pd.read_csv('./expanded_road_df_DIY.csv')

In [28]:
# Create Graph from Road Network Data
G = nx.Graph()
for index, row in road.iterrows():
    G.add_edge((row['start_x'], row['start_y']), (row['end_x'], row['end_y']), weight=1, name=row['name'])

In [29]:
# Map Matching with KDTree untuk menarik data yg tidak di jalan menjadi berada di jalan
sorted_gps = gps.sort_values(by=['maid', 'timestamp'])
node_list = list(G.nodes())

tree = KDTree(node_list)
distances, indices = tree.query(sorted_gps[['longitude', 'latitude']].values)
all_mapped_points_kdtree = [node_list[index] for index in indices]

sorted_gps['adjusted_longitude'] = [point[0] for point in all_mapped_points_kdtree]
sorted_gps['adjusted_latitude'] = [point[1] for point in all_mapped_points_kdtree]
sorted_gps['timestamp'] = pd.to_datetime(sorted_gps['timestamp'])

In [30]:
# Save the result
sorted_gps.to_csv('filter4_5m_malas_des.csv', index=False)

In [None]:
data = sorted_gps
a = data[(data['maid'] == 'd9f515f4-1a78-442b-81c4-55bf5f253b91') & (data['timestamp'].dt.date == pd.to_datetime('2021-12-06').date())]
# create_plot(a)

## 5. Filter immobility data with mas Arkham algorithm

In [31]:
def filter_immobility_arkham(name, group):
    filtered_data = []
    group = group.sort_values('timestamp')
    prev_row = None
    immobile_start_row = None

    for i, row in group.iterrows():
        if prev_row is not None:
            if row['adjusted_latitude'] == prev_row['adjusted_latitude'] and row['adjusted_longitude'] == prev_row['adjusted_longitude']:
                if immobile_start_row is None:
                    immobile_start_row = prev_row
            else:
                if immobile_start_row is not None:
                    filtered_data.append(immobile_start_row)
                    immobile_start_row = None
                filtered_data.append(row)

        prev_row = row

    if immobile_start_row is not None:
        filtered_data.append(immobile_start_row)
    else:
        if prev_row is not None:
            filtered_data.append(prev_row)

    return pd.DataFrame(filtered_data)

In [32]:
def filter_immobility_parallel(data):
    data['date'] = pd.to_datetime(data['timestamp']).dt.date
    grouped = data.groupby(['maid', 'date'])
    results = Parallel(n_jobs=40)(delayed(filter_immobility_arkham)(name, group) for name, group in tqdm(grouped))
    return pd.concat(results, ignore_index=True)

In [33]:
# Read data GPS and road data
gps_data = pd.read_csv('filter4_5m_malas_des.csv')

# Do the process
compressed_data = filter_immobility_parallel(gps_data)

# Save the result
compressed_data.to_csv('filter5_5m_malas_des.csv', index=False)

100%|████████████████████████████████████| 31171/31171 [01:33<00:00, 331.81it/s]


In [None]:
compressed_data[:5]

In [None]:
data = compressed_data
data['timestamp'] = pd.to_datetime(data['timestamp'])
a = data[(data['maid'] == 'd9f515f4-1a78-442b-81c4-55bf5f253b91') & (data['timestamp'].dt.date == pd.to_datetime('2021-12-06').date())]
create_plot(a)

In [None]:
len(a)

## 6. Adjusted path also based on mas Arkham algorithm

In [2]:
# Read shp shape data for adjusted path
road_shp = './Malioboro_around/Shrink/Malioboro_around_shrink_line.shp'
road_gdf = gpd.read_file(road_shp)
road_df = pd.DataFrame(road_gdf)

In [3]:
# Membuat ulang graph berarah dengan memperbaiki pendekatan
G_corrected = nx.MultiDiGraph()

# Fungsi untuk menambahkan edge dengan memecah geometri menjadi setiap segmen dan menyimpan geometri
def add_edges_corrected(geometry_str, oneway=False):
    if isinstance(geometry_str, str):
        # Mengubah string geometry menjadi objek LINESTRING
        linestring = shapely.wkt.loads(geometry_str)
    else:
        linestring = geometry_str
        
    # Mendapatkan semua koordinat dalam linestring
    coords = list(linestring.coords)
    
    # Iterasi melalui setiap pasangan koordinat untuk menambahkan edge
    for i in range(len(coords) - 1):
        start_point, end_point = coords[i], coords[i + 1]
        segment_geometry = LineString([start_point, end_point])
        G_corrected.add_edge(start_point, end_point, geometry=mapping(segment_geometry))
        if not oneway:  # Jika bukan jalan satu arah, tambahkan edge terbalik dan salin geometrinya
            G_corrected.add_edge(end_point, start_point, geometry=mapping(segment_geometry))

# Menyesuaikan penanganan kolom 'oneway' untuk memastikan akurasi
for idx, row in road_df.iterrows():
    oneway = row['oneway'] == 'yes' or row['oneway'] == 'true'  # Menganggap 'yes' atau 'true' sebagai jalan satu arah
    add_edges_corrected(row['geometry'], oneway)

# Informasi dasar tentang graph yang telah diperbaiki
info_corrected = {
    'number_of_nodes': G_corrected.number_of_nodes(),
    'number_of_edges': G_corrected.number_of_edges(),
}

info_corrected

{'number_of_nodes': 7487, 'number_of_edges': 14321}

In [4]:
# Fungsi untuk menghitung bearing (arah) antara dua titik
def calculate_bearing(lat1, lon1, lat2, lon2):
    start_lat = np.radians(lat1)
    start_lng = np.radians(lon1)
    end_lat = np.radians(lat2)
    end_lng = np.radians(lon2)
    
    d_long = end_lng - start_lng
    d_phi = np.log(np.tan(end_lat/2.0 + np.pi/4.0) / np.tan(start_lat/2.0 + np.pi/4.0))
    
    if abs(d_long) > np.pi:
        if d_long > 0.0:
            d_long = -(2.0 * np.pi - d_long)
        else:
            d_long = (2.0 * np.pi + d_long)
    
    bearing = (np.degrees(np.arctan2(d_long, d_phi)) + 360.0) % 360.0
    return bearing

In [5]:
# Fungsi untuk mencari edge terdekat yang sesuai dengan arah perjalanan
def find_nearest_edge_with_direction(gps_point, bearing):
    nearest_edge = None
    min_distance = float('inf')
    corrected_bearing = None
    
    # Mengubah titik GPS menjadi objek Point
    point = Point(gps_point[1], gps_point[0])  # (longitude, latitude)
    
    # Iterasi melalui setiap edge dalam graph
    for edge in G_corrected.edges(data=True):
        start_node, end_node = edge[0], edge[1]
        edge_line = LineString([start_node, end_node])
        
        # Menghitung jarak dari titik GPS ke edge
        distance = point.distance(edge_line)
        
        # Jika jarak lebih kecil dari jarak minimum sebelumnya, simpan edge ini
        if distance < min_distance:
            # Menghitung bearing dari edge
            edge_bearing = calculate_bearing(start_node[1], start_node[0], end_node[1], end_node[0])
            bearing_diff = abs(edge_bearing - bearing) % 360
            # Memastikan bearing dalam rentang yang masuk akal untuk kesesuaian arah
            if bearing_diff < 90 or bearing_diff > 270:  # Asumsi toleransi arah +-90 derajat
                min_distance = distance
                nearest_edge = edge
                corrected_bearing = edge_bearing
                
    return nearest_edge, min_distance, corrected_bearing

In [6]:
# Fungsi untuk menyesuaikan koordinat GPS ke edge terdekat
def adjust_gps_to_nearest_edge(row):
    gps_point = (row['latitude'], row['longitude'])
    bearing = row['bearing']
    nearest_edge, distance, edge_bearing = find_nearest_edge_with_direction(gps_point, bearing)
    
    # Menggunakan titik terdekat pada edge sebagai koordinat yang disesuaikan
    if nearest_edge:
        edge_line = LineString([nearest_edge[0], nearest_edge[1]])
        adjusted_point = edge_line.interpolate(edge_line.project(Point(gps_point[1], gps_point[0])))
        adjusted_latitude, adjusted_longitude = adjusted_point.y, adjusted_point.x
    else:
        # Jika tidak ada edge terdekat yang sesuai, gunakan koordinat asli
        adjusted_latitude, adjusted_longitude = row['latitude'], row['longitude']
    
    return pd.Series([row['maid'], row['latitude'], row['longitude'], row['timestamp'], adjusted_latitude, adjusted_longitude],
                     index=['maid', 'latitude', 'longitude', 'timestamp', 'adjusted_latitude', 'adjusted_longitude'])

In [7]:
def add_bearings(gps_data): # Menambahkan kolom 'bearing' ke dataframe
    bearings = []
    for i in range(len(gps_data) - 1):
        row1, row2 = gps_data.iloc[i], gps_data.iloc[i + 1]
        bearing = calculate_bearing(row1['latitude'], row1['longitude'], row2['latitude'], row2['longitude'])
        bearings.append(bearing)

    # Untuk titik terakhir, kita ulangi bearing sebelumnya karena tidak ada titik setelahnya
    bearings.append(bearings[-1])

    gps_data['bearing'] = bearings
    
    return gps_data

In [8]:
def adjust_gps_to_nearest_edge_grouped(name, group):
    adjusted_group = group.apply(adjust_gps_to_nearest_edge, axis=1)
    return adjusted_group

In [9]:
# Do pararell for multiple maid
def adjust_path_parallel(data):
    data['date'] = pd.to_datetime(data['timestamp']).dt.date
    grouped = data.groupby(['maid', 'date'])
    results = Parallel(n_jobs=40)(delayed(adjust_gps_to_nearest_edge_grouped)(name, group) for name, group in tqdm(grouped))
    adjusted_gps_data = pd.concat(results, ignore_index=True)
    
    return adjusted_gps_data

In [34]:
# Read the GPS data
gps_data = pd.read_csv('filter5_5m_malas_des.csv')

In [35]:
# Do the process
gps_data = add_bearings(gps_data) # Add bearings

In [36]:
adjusted_gps_data = adjust_path_parallel(gps_data)

# Save the result
adjusted_gps_data.to_csv('filter6_5m_malas_des.csv', index=False)
adjusted_gps_data.head()

100%|█████████████████████████████████████| 31171/31171 [27:40<00:00, 18.78it/s]


Unnamed: 0,maid,latitude,longitude,timestamp,adjusted_latitude,adjusted_longitude
0,00012afc-6daf-461f-96a8-181e5af69db9,-7.800291,110.364998,2021-12-01 17:54:11,-7.800289,110.364984
1,00012afc-6daf-461f-96a8-181e5af69db9,-7.801375,110.365013,2021-12-01 19:22:04,-7.801376,110.365026
2,00012afc-6daf-461f-96a8-181e5af69db9,-7.801375,110.365013,2021-12-01 19:22:04,-7.801376,110.365026
3,000136a6-c76e-4f76-841d-1329727ad906,-7.78573,110.366638,2021-12-06 17:40:37,-7.785727,110.366607
4,000136a6-c76e-4f76-841d-1329727ad906,-7.78573,110.366638,2021-12-08 12:08:42,-7.785731,110.366643


In [37]:
data = adjusted_gps_data
data['timestamp'] = pd.to_datetime(data['timestamp'])
a = data[(data['maid'] == 'd9f515f4-1a78-442b-81c4-55bf5f253b91') & (data['timestamp'].dt.date == pd.to_datetime('2021-12-06').date())]
create_plot(a)

## 7. Split trajectories based on observation data

In [15]:
import warnings
warnings.filterwarnings('ignore')

plot_defaults = {'linewidth':5, 'capstyle':'round', 'figsize':(9,3), 'legend':True}
opts.defaults(opts.Overlay(active_tools=['wheel_zoom'], frame_width=500, frame_height=400))

In [16]:
def split_trajectories(tdf, minute_gap):
    index = 0
    df_min = pd.DataFrame()

    while index < len(tdf):
        my_traj = tdf.trajectories[index]
        split = mpd.ObservationGapSplitter(my_traj).split(gap=timedelta(minutes=minute_gap))
        if len(split) > 1:
            split_df = split.to_traj_gdf()
            df_min = df_min._append(split_df, ignore_index=True)
        index += 1

    return df_min

In [17]:
def create_split_traj(split_df, adjusted_gps_data):
    split_gps_data = adjusted_gps_data.copy()

    for index, row in split_df.iterrows():
        start_t = row['start_t']
        end_t = row['end_t']
        correlated_string = row['maid'].split('_')[-1]  # Get last two strings after "_"
        mask = (adjusted_gps_data['timestamp'] >= start_t) & (adjusted_gps_data['timestamp'] <= end_t)
        split_gps_data.loc[mask, 'maid'] = split_gps_data.loc[mask, 'maid'] + '_' + correlated_string
    
    return split_gps_data   

In [39]:
# Read the GPS data
data_mpd = pd.read_csv('filter6_20m_malas_des.csv')
tdf = mpd.TrajectoryCollection(data_mpd, traj_id_col='maid', t='timestamp', y='latitude', x='longitude')

# Do the process
minute_gap = 5 #Change this variable to choose the observation timedelta gap
split_gps_data = create_split_traj(split_trajectories(tdf, minute_gap), adjusted_gps_data)

# Save the result
split_gps_data.to_csv(f"filter7_20m_{minute_gap}m_malas_des.csv", index=False)

In [40]:
# Read the GPS data
data_mpd = pd.read_csv('filter6_5m_malas_des.csv')
tdf = mpd.TrajectoryCollection(data_mpd, traj_id_col='maid', t='timestamp', y='latitude', x='longitude')

# Do the process
minute_gap = 5 #Change this variable to choose the observation timedelta gap
split_gps_data = create_split_traj(split_trajectories(tdf, minute_gap), adjusted_gps_data)

# Save the result
split_gps_data.to_csv(f"filter7_5m_{minute_gap}m_malas_des.csv", index=False)

## 8. Path reconstruction with PyTrack

from pytrack.graph import graph, distance, download
from pytrack.analytics import visualization
from pytrack.matching import candidate, mpmatching_utils, mpmatching

In [None]:
# Read the GPS data
df = pd.read_csv('filter7_5m_5m_malas_des.csv')

In [None]:
def map_matching_pytrack(group, graph):
    df = group
    G = graph
    maid = df['maid'].values[0]

    latitude = df["adjusted_latitude"].to_list()
    longitude = df["adjusted_longitude"].to_list()
    points = [(lat, lon) for lat, lon in zip(latitude, longitude)]

    # Extract candidates
    G_interp, candidates = candidate.get_candidates(G, points, interp_dist=10, closest=True, radius=30)

    # Extract trellis DAG graph
    trellis = mpmatching_utils.create_trellis(candidates)

    # Perform the map-matching process
    path_prob, predecessor = mpmatching.viterbi_search(G_interp, trellis, "start", "target")

    path_df = pd.DataFrame()
    bad_df = pd.DataFrame()

    # Extract the path results from map-matching process
    if len(predecessor) > 0:
        node, path = mpmatching_utils.create_matched_path(G_interp, trellis, predecessor)

        # Create constructed path
        path_df = pd.DataFrame(path)
        path_df = path_df.rename(columns={0:'latitude', 1:'longitude'})
        path_df.insert(loc=0, column='maid', value=maid)

    else:
        bad_df = bad_df._append({'bad_maid': maid}, ignore_index=True)

    return path_df, bad_df

In [None]:
latitude = df["adjusted_latitude"].to_list()
longitude = df["adjusted_longitude"].to_list()

points = [(lat, lon) for lat, lon in zip(latitude, longitude)]
north, east = np.max(np.array([*points]), 0)
south, west = np.min(np.array([*points]), 0)

In [None]:
G = graph.graph_from_bbox(*distance.enlarge_bbox(north, south, west, east, 300), simplify=True, network_type='drive')

In [None]:
def urutkan_maid(df, maid):
    # Ekstrak angka terakhir dari setiap elemen dalam kolom 'bad_maid'
    df['last_number'] = df[maid].str.extract(r'_(\d+)$')

    # Ubah angka terakhir menjadi tipe data numerik
    df['last_number'] = pd.to_numeric(df['last_number'])

    # Urutkan DataFrame berdasarkan angka terakhir secara numerik
    df = df.sort_values(by='last_number', ignore_index=True)

    # Hapus kolom 'last_number' jika tidak diperlukan lagi
    df.drop(columns='last_number', inplace=True)

    return df

In [None]:
# Group by trajectories to perform Map Matching with PyTrack
def map_matching_pytrack_parallel(data, G):
    grouped = data.groupby('maid')
    results, bad_paths = zip(*(Parallel(n_jobs=40)(delayed(map_matching_pytrack)(group, G) for _, group in tqdm(grouped))))
    matched_path = urutkan_maid(pd.concat(results, ignore_index=True), 'maid')
    bad_path = urutkan_maid(pd.concat(bad_paths, ignore_index=True), 'bad_maid')
    
    return matched_path, bad_path

In [None]:
# Do the process
# Perform the map matching process for all maid
matched_path, bad_path = map_matching_pytrack_parallel(df, G)

## 9. Time interpolation

In [None]:
from scipy.spatial import distance_matrix
def find_nearest_point_index(lat, lon, points_df):
    distances = distance_matrix(points_df[['latitude', 'longitude']], [(lat, lon)])
    nearest_point_index = np.argmin(distances)
    return nearest_point_index

In [None]:
def add_timestamp_to_nearest_point(path_df, df):
    nearest_points_timestamp = {}  # Dictionary to store timestamps for each nearest point index
    for index, row in df.iterrows():
        nearest_point_index = find_nearest_point_index(row['adjusted_latitude'], row['adjusted_longitude'], path_df)
        if nearest_point_index not in nearest_points_timestamp:
            nearest_points_timestamp[nearest_point_index] = row['timestamp']
        else:
            # If multiple points share the same nearest point index, choose the latest timestamp
            nearest_points_timestamp[nearest_point_index] = max(nearest_points_timestamp[nearest_point_index], row['timestamp'])
    
    # Assign timestamps to the nearest points in path_df
    path_df['timestamp'] = path_df.index.map(lambda idx: nearest_points_timestamp.get(idx))
    
    return path_df

In [None]:
df  = pd.read_csv("df.csv")
path_df_30persen = pd.read_csv("path_df_30persen.csv")
path_df_60persen = path_df_30persen[::2]
hasil = add_timestamp_to_nearest_point(path_df_60persen, df)
hasil.reset_index(inplace=False, drop=True)
hasil[:20]