In [1]:
import pandas as pd
import networkx as nx
from scipy.spatial import KDTree
import matplotlib.pyplot as plt
from tqdm import tqdm
from joblib import Parallel, delayed
from folium.plugins import TimestampedGeoJson
from geopy.distance import geodesic
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from pandas import DataFrame
import time
import numpy as np
import folium

In [8]:
path = './filter1_malar_jan.csv'
df = pd.read_csv(path)
# df.head()

In [3]:
df.isnull().any()

Unnamed: 0        False
maid              False
latitude          False
longitude         False
datetime_wib      False
geometry          False
Kelurahan/Desa    False
Kecamatan         False
Kabupaten         False
tanggal           False
dtype: bool

In [4]:
def preprocess_gps_data(df):
    # Step 1: Ubah nama kolom datetime_wib menjadi timestamp
    df = df.rename(columns={'datetime_wib':'timestamp'})
    
    # Step 2: Konversi tipe data pada kolom timestamp menjadi datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Step 3: Ambil hanya kolom maid, latitude, longitude, dan timestamp
    df_filtered = df[['maid', 'latitude', 'longitude', 'timestamp']]
        
    # Step 4: Hapus data duplikat utk data keseluruhan dan data duplikat utk maid dan timestamp yg sama 
    # (ga mgkn berada pada dua tmpt berbeda dalam satu waktu kan)
    df_filtered = df_filtered.drop_duplicates(subset=['maid', 'timestamp'])
    
    # Step 5: Hapus data yang hanya memiliki 1 record data dalam 1 hari
    df_filtered['date'] = df_filtered['timestamp'].dt.date
    record_count_per_day = df_filtered.groupby(['maid', 'date']).size().reset_index(name='count')
    multiple_records_per_day = record_count_per_day[record_count_per_day['count'] > 1]
    df_filtered = pd.merge(df_filtered, multiple_records_per_day[['maid', 'date']], on=['maid', 'date'], how='inner')
    df_filtered.drop('date', axis=1, inplace=True)

    # Step 6: Urutkan data berdasarkan maid dan timestamp
    df_filtered = df_filtered.sort_values(by=['maid', 'timestamp'])
    
    # Step 7: Reset index
    df_filtered.reset_index(inplace=True, drop=True)
    
    return df_filtered

In [9]:
df_preprocessed = preprocess_gps_data(df)
df_preprocessed.to_csv('filter2_malar_jan.csv', index=False)

In [None]:
df_preprocessed['maid'].unique()[:10]

In [None]:
df_preprocessed[df_preprocessed['maid'] == '00071ae2-d9dc-4e14-b3fc-9f8a818ee767']

In [None]:
df_preprocessed[df_preprocessed['maid'] == 'd60a2068-d7ea-48a7-a363-e3e7e0bd02cb'][35:45]

In [None]:
len(df) - len(df_preprocessed)

## Menampilkan visualisasi

In [None]:
# Modified function for animated visualization based on timestamp with connecting lines
def visualisasi_pergerakan_animated_with_lines(selected_date, data, user_id, title):    
    if isinstance(selected_date, str):
        selected_date = pd.to_datetime(selected_date)

    selected_day_data = data[(data['timestamp'].dt.date == selected_date.date()) & (data['maid'] == user_id)]

    if selected_day_data.empty:
        print(f"No data available for user_id {user_id} on {selected_date.date()}")
        return

    m = folium.Map(location=[selected_day_data.iloc[0]['latitude'], selected_day_data.iloc[0]['longitude']], zoom_start=15)
    
    # Prepare data for TimestampedGeoJson
    features = []
    
    prev_point = None  # To store the previous point in the loop
    for index, row in selected_day_data.iterrows():
        point = [row['longitude'], row['latitude']]
        feature = {
            'type': 'Feature',
            'geometry': {
                'type': 'Point',
                'coordinates': point,
            },
            'properties': {
                'time': row['timestamp'].strftime('%Y-%m-%d %H:%M:%S'),  # Add timestamp information here
                'icon': 'circle',
                'iconstyle': {
                    'fillColor': 'blue',
                    'fillOpacity': 0.6,
                    'stroke': 'false',
                    'radius': 5
                },
                'popup': row['timestamp'].strftime('%Y-%m-%d %H:%M:%S'),
            }
        }
        features.append(feature)
        
        # Jika ini bukan titik pertama, gambar garis ke titik sebelumnya
        if prev_point:
            line_feature = {
                'type': 'Feature',
                'geometry': {
                    'type': 'LineString',
                    'coordinates': [prev_point, point]
                },
                'properties': {
                    'times': [row['timestamp'].strftime('%Y-%m-%d %H:%M:%S')] * 2,
                    'style': {'color': 'blue', 'weight': 3}
                }
            }
            features.append(line_feature)
        
        # Update previous point
        prev_point = point

    timestamped_geojson = TimestampedGeoJson(
        {'type': 'FeatureCollection', 'features': features},
        period='PT1M',
        add_last_point=True,
        auto_play=False,
        loop=False,
        max_speed=1,
        loop_button=True,
        date_options='YYYY-MM-DD HH:mm:ss',
        time_slider_drag_update=True
    )

    timestamped_geojson.add_to(m)
    return m

In [None]:
map_view = visualisasi_pergerakan_animated_with_lines('2021-12-01', filtered_df, 'd60a2068-d7ea-48a7-a363-e3e7e0bd02cb','uji coba')

In [None]:
map_view