In [30]:
import pandas as pd
import networkx as nx
from scipy.spatial import KDTree
import matplotlib.pyplot as plt
from tqdm import tqdm
from joblib import Parallel, delayed
from folium.plugins import TimestampedGeoJson
from geopy.distance import geodesic
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from pandas import DataFrame
import time
import numpy as np
import folium

In [46]:
def create_pivot(data):
    df = data.copy()
    df['tanggal'] = df['timestamp'].dt.date
    df_pivot = df['maid'].groupby(df['tanggal']).value_counts()
    pivot = df_pivot.unstack().fillna(0).astype(int)

    total_counts = pivot.sum(axis=0)
    sorted_columns = total_counts.sort_values(ascending=False).index
    pivot_sorted = pivot[sorted_columns]
    return pivot_sorted

In [32]:
# data after immobility filter
gps = pd.read_csv('./des_maljan_immobility arkham.csv')
gps

Unnamed: 0,maid,latitude,longitude,timestamp,adjusted_longitude,adjusted_latitude
0,00012afc-6daf-461f-96a8-181e5af69db9,-7.800291,110.364998,2021-12-01 17:54:11,110.364996,-7.800210
1,00012afc-6daf-461f-96a8-181e5af69db9,-7.800300,110.364998,2021-12-01 17:54:22,110.364996,-7.800210
2,00012afc-6daf-461f-96a8-181e5af69db9,-7.800300,110.364998,2021-12-01 17:54:22,110.364996,-7.800210
3,00ac9aac-851f-4b1a-b554-cc1f75d02fcd,-7.799030,110.365288,2021-12-22 15:29:32,110.365144,-7.799082
4,00bdab07-a389-41d8-bb5a-743349b86e7f,-7.792231,110.365898,2021-12-17 16:50:15,110.365921,-7.792343
...,...,...,...,...,...,...
9410,fff98271-0aa0-49dd-9552-0566aac0e6c5,-7.796330,110.365562,2021-12-01 01:34:02,110.365479,-7.796365
9411,fff98271-0aa0-49dd-9552-0566aac0e6c5,-7.796330,110.365562,2021-12-01 01:34:02,110.365479,-7.796365
9412,fff98271-0aa0-49dd-9552-0566aac0e6c5,-7.796340,110.365547,2021-12-01 01:35:12,110.365479,-7.796365
9413,fff98271-0aa0-49dd-9552-0566aac0e6c5,-7.796340,110.365550,2021-12-01 01:35:21,110.365479,-7.796365


In [12]:
# Reading the CSV file into a DataFrame
road = pd.read_csv('./expanded_road_df_DIY.csv')

In [13]:
road

Unnamed: 0.1,Unnamed: 0,name,start_x,start_y,end_x,end_y
0,0,,110.145676,-7.652724,110.145675,-7.652734
1,1,,110.145676,-7.652724,110.145679,-7.652716
2,2,,110.398799,-7.861408,110.398598,-7.861393
3,3,,110.398799,-7.861408,110.398927,-7.861439
4,4,,110.581033,-7.788455,110.580238,-7.788295
...,...,...,...,...,...,...
962264,962264,,110.576041,-7.856010,110.576043,-7.856035
962265,962265,,110.448121,-7.898401,110.448103,-7.898251
962266,962266,,110.290861,-8.012311,110.290827,-8.012374
962267,962267,,110.328035,-7.766726,110.328073,-7.766544


In [14]:
# Create Graph from Road Network Data
G = nx.Graph()
for index, row in road.iterrows():
    G.add_edge((row['start_x'], row['start_y']), (row['end_x'], row['end_y']), weight=1, name=row['name'])

In [26]:
# Map Matching
sorted_gps_data = gps.sort_values(by=['maid', 'timestamp'])
node_list = list(G.nodes())
tree = KDTree(node_list)
distances, indices = tree.query(sorted_gps_data[['longitude', 'latitude']].values)
all_mapped_points_kdtree = [node_list[index] for index in indices]
sorted_gps_data['adjusted_longitude'] = [point[0] for point in all_mapped_points_kdtree]
sorted_gps_data['adjusted_latitude'] = [point[1] for point in all_mapped_points_kdtree]
sorted_gps_data['timestamp'] = pd.to_datetime(sorted_gps_data['timestamp'])

In [33]:
def find_path_on_graph(graph, start, end):
    """Find the shortest path on the graph between two nodes using the A* algorithm."""
    try:
        # Custom heuristic function considering traffic_density on nodes
        def heuristic(u, v):
            node_u_density = graph.nodes[u].get('traffic_density', 0)
            node_v_density = graph.nodes[v].get('traffic_density', 0)
            average_density = (node_u_density + node_v_density) / 2
            return 0.1 * average_density  # Adjust the heuristic as needed
        
        path = nx.astar_path(graph, start, end, heuristic=heuristic, weight='weight')
        return path
    except nx.NetworkXNoPath:
        return []

In [28]:
# Modified process_group function to ensure each GPS data addition follows the node sequence of the road network graph
def process_group(group, graph):
    # Map Matching (Assumed to be Accurate)
    mapped_points_group = group[['adjusted_longitude', 'adjusted_latitude']].values
    
    # Initialize the resulting path and timestamps
    resulting_path_group = [tuple(mapped_points_group[0])]
    new_timestamps = [group['timestamp'].values[0]]
    
    prev_node = tuple(mapped_points_group[0])
    
    for i in range(1, len(mapped_points_group)):
        current_node = tuple(mapped_points_group[i])
        
        # Use Shortest Path on Road Graph to get the sequence of nodes from prev_node to current_node
        path = find_path_on_graph(graph, prev_node, current_node)
        
        # Extend the resulting path with nodes from the graph (no points outside the graph)
        # This also ensures that the added GPS data follows the node sequence of the road network graph
        resulting_path_group.extend(path[1:])
        
        # Time Interpolation (Assuming the path only contains points from the graph)
        if len(path) > 2:
            time_start = group['timestamp'].values[i-1]
            time_end = group['timestamp'].values[i]
            time_duration = (time_end - time_start) // np.timedelta64(1, 's')
            
            num_new_points = len(path) - 2
            time_interval = time_duration // num_new_points if num_new_points > 0 else 0
            
            # Adding validation for interpolated timestamps
            if time_interval < 0:
                print("Negative time interval found, skipping this part.")
                continue
            
            interpolated_timestamps = [time_start + np.timedelta64(int(time_interval * j), 's') for j in range(1, num_new_points + 1)]
            new_timestamps.extend(interpolated_timestamps)
        
        # Append the timestamp of the current node
        new_timestamps.append(group['timestamp'].values[i])
        
        # Update prev_node for the next iteration
        prev_node = current_node
    
    # Use the original 'maid' values
    maids = list(group['maid'].values) + [group['maid'].values[-1]] * (len(resulting_path_group) - len(group))
    
    return [(maids[i], point[1], point[0], new_timestamps[i]) for i, point in enumerate(resulting_path_group)]

In [35]:
# change format to datetime just to make sure
gps['timestamp'] = pd.to_datetime(gps['timestamp'])

grouped_data = gps.groupby(['maid', gps['timestamp'].dt.date])

start = time.time()
processed_results = Parallel(n_jobs=40)(delayed(process_group)(group, G) for _, group in tqdm(grouped_data))
end = time.time()

flattened_results = [item for sublist in processed_results for item in sublist]
resulting_data = pd.DataFrame(flattened_results, columns=['maid', 'latitude', 'longitude', 'timestamp'])





  0%|                                                   | 0/909 [00:00<?, ?it/s][A[A[A


  4%|█▊                                        | 40/909 [00:01<00:23, 37.23it/s][A[A[A


  5%|██                                        | 44/909 [00:03<01:33,  9.29it/s][A[A[A


  9%|███▋                                      | 80/909 [00:09<01:50,  7.51it/s][A[A[A


  9%|███▋                                      | 80/909 [00:28<01:50,  7.51it/s][A[A[A


 13%|█████▍                                   | 120/909 [01:58<18:42,  1.42s/it][A[A[A


 18%|███████▏                                 | 160/909 [03:48<24:35,  1.97s/it][A[A[A


 22%|█████████                                | 200/909 [05:37<26:36,  2.25s/it][A[A[A


 26%|██████████▊                              | 240/909 [07:26<26:52,  2.41s/it][A[A[A


 31%|████████████▋                            | 280/909 [09:15<26:22,  2.52s/it][A[A[A


 35%|██████████████▍                          | 320/909 [11:04<25:21,  2.58s/

In [36]:
print("The time of execution of above program is:", (end - start) / 1000, "s")

The time of execution of above program is: 2.4869866054058076 s


In [37]:
resulting_data

Unnamed: 0,maid,latitude,longitude,timestamp
0,00012afc-6daf-461f-96a8-181e5af69db9,-7.800210,110.364996,2021-12-01 17:54:11
1,00ac9aac-851f-4b1a-b554-cc1f75d02fcd,-7.799082,110.365144,2021-12-22 15:29:32
2,00bdab07-a389-41d8-bb5a-743349b86e7f,-7.792343,110.365921,2021-12-17 16:50:15
3,00bdab07-a389-41d8-bb5a-743349b86e7f,-7.792343,110.365921,2021-12-18 00:54:17
4,00f39daa-44b2-45c6-ae04-dd7b8a020452,-7.792045,110.365950,2021-12-25 21:11:13
...,...,...,...,...
6191,ffddce22-852e-4490-8391-17754638fcdc,-7.790116,110.366190,2021-12-19 20:26:59
6192,ffddce22-852e-4490-8391-17754638fcdc,-7.790146,110.366171,2021-12-19 21:11:37
6193,ffddce22-852e-4490-8391-17754638fcdc,-7.790553,110.366124,2021-12-19 21:11:38
6194,ffddce22-852e-4490-8391-17754638fcdc,-7.790553,110.366124,2021-12-20 00:09:41


In [38]:
# Modified function for animated visualization based on timestamp with connecting lines
def visualisasi_pergerakan_animated_with_lines(selected_date, data, user_id, title):
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    
    if isinstance(selected_date, str):
        selected_date = pd.to_datetime(selected_date)

    selected_day_data = data[(data['timestamp'].dt.date == selected_date.date()) & (data['maid'] == user_id)]

    if selected_day_data.empty:
        print(f"No data available for user_id {user_id} on {selected_date.date()}")
        return

    m = folium.Map(location=[selected_day_data.iloc[0]['latitude'], selected_day_data.iloc[0]['longitude']], zoom_start=15)
    
    # Prepare data for TimestampedGeoJson
    features = []
    
    prev_point = None  # To store the previous point in the loop
    for index, row in selected_day_data.iterrows():
        point = [row['longitude'], row['latitude']]
        feature = {
            'type': 'Feature',
            'geometry': {
                'type': 'Point',
                'coordinates': point,
            },
            'properties': {
                'time': row['timestamp'].strftime('%Y-%m-%d %H:%M:%S'),  # Add timestamp information here
                'icon': 'circle',
                'iconstyle': {
                    'fillColor': 'blue',
                    'fillOpacity': 0.6,
                    'stroke': 'false',
                    'radius': 5
                },
                'popup': row['timestamp'].strftime('%Y-%m-%d %H:%M:%S'),
            }
        }
        features.append(feature)
        
        # Jika ini bukan titik pertama, gambar garis ke titik sebelumnya
        if prev_point:
            line_feature = {
                'type': 'Feature',
                'geometry': {
                    'type': 'LineString',
                    'coordinates': [prev_point, point]
                },
                'properties': {
                    'times': [row['timestamp'].strftime('%Y-%m-%d %H:%M:%S')] * 2,
                    'style': {'color': 'blue', 'weight': 3}
                }
            }
            features.append(line_feature)
        
        # Update previous point
        prev_point = point

    timestamped_geojson = TimestampedGeoJson(
        {'type': 'FeatureCollection', 'features': features},
        period='PT1M',
        add_last_point=True,
        auto_play=False,
        loop=False,
        max_speed=1,
        loop_button=True,
        date_options='YYYY-MM-DD HH:mm:ss',
        time_slider_drag_update=True
    )

    timestamped_geojson.add_to(m)
    return m

In [48]:
map_view = visualisasi_pergerakan_animated_with_lines('2021-12-02', resulting_data, '4a2de4fb-2878-41ae-bc12-2890dc0f62b8','uji coba')

In [49]:
map_view

In [47]:
create_pivot(resulting_data)

maid,4a2de4fb-2878-41ae-bc12-2890dc0f62b8,f501c8a1-e251-4b8c-87fc-9a1a26d2e19d,9a63fd0e-d363-478a-8136-99b1eb82693f,a36ea4b5-8ee8-4f16-9d80-307e39a7aeba,6065d08e-1b9a-45ed-9042-fc0773b25d3a,6f71acfb-483d-4ad9-9965-ba8d97892a5e,36b020a4-0ea4-4f2e-9c9e-41925b5d0681,e6400a35-172f-4533-a832-bce4f4a38aff,d859a648-6835-4fa4-abf4-b8425dfd9635,ff90538a-32dd-4723-bc66-29f7125b7aa9,...,649393a9-2948-4994-962d-e5381d791150,643c5cd4-a41c-4662-a875-f28b25a3c7b4,62d3113c-9ef9-4c89-a1fc-bcc64af77c51,6273e481-5d97-4e02-bb8f-040864ebdf56,605ba7a3-dfad-476b-99d9-23bb23f452bc,605926a7-3e18-4491-a946-027e9e719e08,5fc79936-2aae-4b03-921d-de6fc9a16bf7,5e31306c-a87b-49fd-9d74-844a71a17fac,5e08a9ed-82c6-4bd3-87a0-ac0550c34981,fff98271-0aa0-49dd-9552-0566aac0e6c5
tanggal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-12-01,0,33,0,0,65,1,0,1,0,0,...,0,1,0,0,0,0,0,0,0,1
2021-12-02,26,0,0,0,41,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-12-03,36,1,0,0,123,1,9,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-12-04,10,18,15,0,1,1,0,13,0,0,...,0,0,0,0,1,1,0,0,0,0
2021-12-05,4,1,34,24,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-12-06,16,11,11,0,0,0,1,11,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-12-07,45,0,32,0,0,0,3,6,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-12-08,59,0,0,0,0,1,0,9,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-12-09,76,0,0,0,0,0,0,6,0,0,...,0,0,0,0,0,0,0,0,0,0
2021-12-10,41,0,0,0,0,18,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
resulting_data

Unnamed: 0,maid,latitude,longitude,timestamp
0,00012afc-6daf-461f-96a8-181e5af69db9,-7.800210,110.364996,2021-12-01 17:54:11
1,00ac9aac-851f-4b1a-b554-cc1f75d02fcd,-7.799082,110.365144,2021-12-22 15:29:32
2,00bdab07-a389-41d8-bb5a-743349b86e7f,-7.792343,110.365921,2021-12-17 16:50:15
3,00bdab07-a389-41d8-bb5a-743349b86e7f,-7.792343,110.365921,2021-12-18 00:54:17
4,00f39daa-44b2-45c6-ae04-dd7b8a020452,-7.792045,110.365950,2021-12-25 21:11:13
...,...,...,...,...
6191,ffddce22-852e-4490-8391-17754638fcdc,-7.790116,110.366190,2021-12-19 20:26:59
6192,ffddce22-852e-4490-8391-17754638fcdc,-7.790146,110.366171,2021-12-19 21:11:37
6193,ffddce22-852e-4490-8391-17754638fcdc,-7.790553,110.366124,2021-12-19 21:11:38
6194,ffddce22-852e-4490-8391-17754638fcdc,-7.790553,110.366124,2021-12-20 00:09:41


In [50]:
# Write DataFrame to CSV file
resulting_data.to_csv("des_maljan_adjusted.csv", index=False)