In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import math

In [2]:
df_track = pd.read_csv("Track Analysis Cloud/data/device_track_1.csv")
df_elev = pd.read_csv("Track Analysis Cloud/data/track_with_elevation.csv")

In [3]:
df_track.head()

Unnamed: 0,track_id,device_id,timestamp,longitude,latitude,battery_level,emergency_status,condition,off_track
0,T-WAT_1-1762845227.0,WAT_1,11/11/2025 7:13,109.959301,-7.224694,65.96,False,badai petir,False
1,T-WAT_1-1762845287.0,WAT_1,11/11/2025 7:14,109.958848,-7.224514,65.91,False,badai petir,False
2,T-WAT_1-1762845347.0,WAT_1,11/11/2025 7:15,109.958525,-7.224461,65.9,False,badai petir,False
3,T-WAT_1-1762845407.0,WAT_1,11/11/2025 7:16,109.958535,-7.224219,65.85,False,badai petir,False
4,T-WAT_1-1762845467.0,WAT_1,11/11/2025 7:17,109.958295,-7.223958,65.81,False,badai petir,False


In [4]:
df_track.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 269958 entries, 0 to 269957
Data columns (total 9 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   track_id          269958 non-null  object 
 1   device_id         269958 non-null  object 
 2   timestamp         269958 non-null  object 
 3   longitude         269958 non-null  float64
 4   latitude          269958 non-null  float64
 5   battery_level     269958 non-null  float64
 6   emergency_status  269958 non-null  bool   
 7   condition         269958 non-null  object 
 8   off_track         269958 non-null  bool   
dtypes: bool(2), float64(3), object(4)
memory usage: 14.9+ MB


In [5]:
df_track["timestamp"] = pd.to_datetime(df_track["timestamp"])

In [6]:
df_track_1 = df_track[df_track['timestamp'].dt.date == pd.to_datetime("2025-11-11").date()].copy()
df_track_1.head()

Unnamed: 0,track_id,device_id,timestamp,longitude,latitude,battery_level,emergency_status,condition,off_track
0,T-WAT_1-1762845227.0,WAT_1,2025-11-11 07:13:00,109.959301,-7.224694,65.96,False,badai petir,False
1,T-WAT_1-1762845287.0,WAT_1,2025-11-11 07:14:00,109.958848,-7.224514,65.91,False,badai petir,False
2,T-WAT_1-1762845347.0,WAT_1,2025-11-11 07:15:00,109.958525,-7.224461,65.9,False,badai petir,False
3,T-WAT_1-1762845407.0,WAT_1,2025-11-11 07:16:00,109.958535,-7.224219,65.85,False,badai petir,False
4,T-WAT_1-1762845467.0,WAT_1,2025-11-11 07:17:00,109.958295,-7.223958,65.81,False,badai petir,False


In [7]:
df_track_2 = df_track[df_track['timestamp'].dt.date == pd.to_datetime("2025-11-12").date()].copy()
df_track_2.head()

Unnamed: 0,track_id,device_id,timestamp,longitude,latitude,battery_level,emergency_status,condition,off_track


In [None]:
df_track_3 = df_track[df_track['timestamp'].dt.date == pd.to_datetime("2025-11-13").date()].copy()
df_track_3.head()

In [8]:
df_elev.head()

Unnamed: 0,track,lat,lon,elevation
0,Via Patakbanteng 001,-7.209568,109.925435,1996.0
1,Via Patakbanteng 001,-7.20964,109.92545,1996.0
2,Via Patakbanteng 001,-7.209698,109.925453,1996.0
3,Via Patakbanteng 001,-7.209734,109.925416,1996.0
4,Via Patakbanteng 001,-7.209695,109.925361,1996.0


In [9]:
# pastikan kolom terstandarisasi namanya
df_elev.rename(columns={ 
    "elevation": "ele"
}, inplace=True)

In [10]:
df_elev.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1308 entries, 0 to 1307
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   track   1308 non-null   object 
 1   lat     1308 non-null   float64
 2   lon     1308 non-null   float64
 3   ele     1308 non-null   float64
dtypes: float64(3), object(1)
memory usage: 41.0+ KB


In [11]:
# ==========================================================
# 2. HITUNG FITUR SEGMENT-LEVEL
# ==========================================================
# Setiap segment = titik i → i+1 dalam track_with_elevation

segments = []

for i in range(len(df_elev) - 1):
    lat1, lon1, ele1 = df_elev.loc[i, ["lat", "lon", "ele"]]
    lat2, lon2, ele2 = df_elev.loc[i+1, ["lat", "lon", "ele"]]

    # -----------------------------
    # a) segment length (meters)
    # -----------------------------
    R = 6371000
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dlambda = math.radians(lon2 - lon1)

    a = (math.sin(dphi/2)**2 +
         math.cos(phi1) * math.cos(phi2) * math.sin(dlambda/2)**2)
    dist = 2 * R * math.atan2(np.sqrt(a), np.sqrt(1 - a))

    # -----------------------------
    # b) slope (elevation gain / distance)
    # -----------------------------
    if dist == 0:
        slope = 0
    else:
        slope = (ele2 - ele1) / dist  # meters per meter → unitless

    # -----------------------------
    # c) curvature (perubahan heading)
    # -----------------------------
    # heading i → i+1
    heading1 = math.atan2(
        math.radians(lon2 - lon1),
        math.radians(lat2 - lat1)
    )

    # heading i+1 → i+2 (kalau ada)
    if i < len(df_elev) - 2:
        lat3, lon3 = df_elev.loc[i+2, ["lat", "lon"]]
        heading2 = math.atan2(
            math.radians(lon3 - lon2),
            math.radians(lat3 - lat2)
        )
        curvature = abs(heading2 - heading1)
    else:
        curvature = 0  # segment terakhir

    # simpan
    segments.append({
        "segment_id": i,
        "lat": lat1,
        "lon": lon1,
        "length_m": dist,
        "slope": slope,
        "curvature": curvature
    })

df_seg = pd.DataFrame(segments)

In [12]:
df_seg.head()

Unnamed: 0,segment_id,lat,lon,length_m,slope,curvature
0,0,-7.209568,109.925435,8.131323,0.0,0.161757
1,1,-7.20964,109.92545,6.521666,0.0,5.448443
2,2,-7.209698,109.925453,5.651978,0.0,1.391117
3,3,-7.209734,109.925416,7.517339,0.0,0.969437
4,4,-7.209695,109.925361,17.765037,0.0,0.054262


In [13]:
# ==========================================================
# 3. GABUNG DENGAN device_track (density, stuck, offtrack)
# ==========================================================

# hitung jumlah device per segmen berdasarkan kedekatan koordinat
def closest_segment(lat, lon):
    d = (df_seg["lat"] - lat)**2 + (df_seg["lon"] - lon)**2
    return d.idxmin()

df_track_1["segment_id"] = df_track_1.apply(
    lambda row: closest_segment(row["latitude"], row["longitude"]), axis=1
)

In [14]:
df_track_1[:10]

Unnamed: 0,track_id,device_id,timestamp,longitude,latitude,battery_level,emergency_status,condition,off_track,segment_id
0,T-WAT_1-1762845227.0,WAT_1,2025-11-11 07:13:00,109.959301,-7.224694,65.96,False,badai petir,False,771
1,T-WAT_1-1762845287.0,WAT_1,2025-11-11 07:14:00,109.958848,-7.224514,65.91,False,badai petir,False,778
2,T-WAT_1-1762845347.0,WAT_1,2025-11-11 07:15:00,109.958525,-7.224461,65.9,False,badai petir,False,780
3,T-WAT_1-1762845407.0,WAT_1,2025-11-11 07:16:00,109.958535,-7.224219,65.85,False,badai petir,False,783
4,T-WAT_1-1762845467.0,WAT_1,2025-11-11 07:17:00,109.958295,-7.223958,65.81,False,badai petir,False,788
5,T-WAT_1-1762845527.0,WAT_1,2025-11-11 07:18:00,109.958105,-7.223698,65.78,False,badai petir,False,799
6,T-WAT_1-1762845587.0,WAT_1,2025-11-11 07:19:00,109.957989,-7.223665,65.74,False,badai petir,False,798
7,T-WAT_1-1762845647.0,WAT_1,2025-11-11 07:20:00,109.958177,-7.22333,65.72,False,badai petir,False,802
8,T-WAT_1-1762845707.0,WAT_1,2025-11-11 07:21:00,109.957711,-7.223493,65.7,False,badai petir,True,807
9,T-WAT_1-1762845767.0,WAT_1,2025-11-11 07:22:00,109.957885,-7.223006,65.69,False,badai petir,False,812


In [15]:
# hitung fitur tambahan
seg_stats = df_track_1.groupby("segment_id").agg({
    "device_id": "count",
    "off_track": "mean"
}).rename(columns={
    "device_id": "density",
    "off_track": "offtrack_rate"
}).reset_index()

df_seg = df_seg.merge(seg_stats, on="segment_id", how="left")

df_seg.fillna({
    "density": 0,
    "offtrack_rate": 0
}, inplace=True)

In [16]:
features = df_seg[["length_m", "slope", "curvature", "density", "offtrack_rate"]]

# Standarisasi fitur
scaler = StandardScaler()
X = scaler.fit_transform(features)

kmeans = KMeans(n_clusters=3, random_state=0, n_init="auto")
kmeans.fit(X)

# # Label cluster
# difficulty_map = {
#      0: "Noise / Outlier",
#      1: "Mudah",
#      2: "Sedang",
#      3: "Sulit",
#      4: "Sangat Sulit"
# }

# df_seg["difficulty"] = df_seg["cluster"].map(difficulty_map).fillna("Sulit")

[WinError 2] The system cannot find the file specified
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 501, in run
    with Popen(*popenargs, **kwargs) as process:
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 966, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 1435, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,


In [20]:
df_seg["cluster"] = kmeans.labels_

In [24]:
kmeans.feature_names_in_

AttributeError: 'KMeans' object has no attribute 'feature_names_in_'

In [None]:
df_seg[:10]

In [None]:
diff_count = df_seg.groupby("cluster").agg({
    "segment_id": "count"
})
print(diff_count)

In [None]:
import matplotlib.pyplot as plt

difficulty = df_seg['cluster']
norm = (difficulty - difficulty.min()) / (difficulty.max() - difficulty.min() + 1e-9)

plt.figure(figsize=(10, 8))
sc = plt.scatter(
    df_seg["lon"],
    df_seg["lat"],
    c=norm,
    cmap="plasma",     # bisa diganti viridis, inferno, turbo, magma, dll
    s=25,
    edgecolor="none"
)

plt.colorbar(sc, label="Difficulty Level (Normalized)")
plt.title("Visualisasi Kesulitan Segmen Jalur Gunung (DBSCAN)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.grid(True, alpha=0.3)

plt.show()

In [None]:
outlier = df_seg.loc[df_seg['cluster'] == -1]

plt.figure(figsize=(8, 6))
sc = plt.scatter(
    outlier["lon"],
    outlier["lat"],     
    edgecolor="none"
)

plt.title("Noise")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.grid(True, alpha=0.3)

plt.show()

In [None]:
outlier

In [None]:
from sklearn import metrics

labels = db.labels_
sc = metrics.silhouette_score(X, labels)
print("Silhouette Coefficient:%0.2f" % sc)

In [None]:
# ==========================================================
# 5. SAVE OUTPUT
# ==========================================================

# df_seg.to_csv("segment_difficulty_dbscan.csv", index=False)
# print("Saved: segment_difficulty_dbscan.csv")