<a href="https://colab.research.google.com/github/nidamaulida/KP_NIDA_PT_POS/blob/main/test2internlatlong.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

test = pd.read_excel('/content/sample_geocoded_results.xlsx')
display(test.tail())

Unnamed: 0,connote__connote_code,full_address,connote__connote_service,custom_field__destination_nopen,pod__timereceive,pod__coordinate,cleaned_address,pred_lat,pred_long,has_city
995,P2410100013654,"JL SURYALAYA INDAH 2 RW 02 RT 003, KOTA BANDU...",PKH,40000,2024-10-14 12:56:59.000,"-6.9385157,107.6240814","jalan suryalaya indah 2 , kota bandung, sumur ...",-6.921846,107.607083,True
996,P2410120007185,"KP CIBOGO LEBAK RW 07 RT 004, KOTA BANDUNG, S...",PKH,40000,2024-10-17 17:25:07.000,"-6.8883106,107.5797625","cibogo lebak , kota bandung, sumur bandung, br...",-6.921846,107.607083,True
997,P2410100107655,JL SOEKARNO HATTA NO 748 CIMENERANG KEC GEDEBA...,PKH,40000,2024-10-12 15:20:13.000,"-6.9397531,107.7164349",jalan soekarno hatta no 748 cimenerang kec ged...,,,True
998,P2410100091542,"JL MUARARAJEUN LAMA II NO 4C RT 2 RW 8, KOTA B...",PE,40000,2024-10-11 14:23:07.000,"-6.9028349,107.6314919","jalan muararajeun lama ii no 4c , kota bandung...",-6.921846,107.607083,True
999,P2410140159557,"JL TERUSAN BUAH BATU NO 12 UP IBU NOVI, KOTA B...",PE,40000,2024-10-15 11:36:50.000,"-6.9433756,107.6508445","jalan terusan buah batu no 12 up ibu novi, kot...",-6.921846,107.607083,True


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import numpy as np

# --- 1. Filter data yang punya koordinat (untuk training) ---
train_df = test.dropna(subset=['pred_lat', 'pred_long'])

X = train_df['cleaned_address']
y = train_df[['pred_lat', 'pred_long']]

# --- 2. Vectorize address ---
vectorizer = TfidfVectorizer(max_features=5000)
X_vec = vectorizer.fit_transform(X)

# --- 3. Split Train-Test ---
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

# --- 4. Train Model ---
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=200, random_state=42))
model.fit(X_train, y_train)

# --- 5. Evaluate ---
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae:.5f}")

# --- 6. Prediksi untuk data NaN ---
nan_df = test[test['pred_lat'].isna()].copy()
if not nan_df.empty:
    nan_vec = vectorizer.transform(nan_df['cleaned_address'])
    preds = model.predict(nan_vec)
    nan_df[['pred_lat', 'pred_long']] = preds
    print("Prediksi titik koordinat untuk data NaN berhasil.")

# --- 7. Gabungkan kembali ---
final_df = pd.concat([train_df, nan_df]).sort_index()


MAE: 0.01041
Prediksi titik koordinat untuk data NaN berhasil.


In [3]:
import folium
from geopy.distance import geodesic
import pandas as pd
import numpy as np

# Pastikan dataframe kamu bernama test dan punya kolom:
# 'pred_lat', 'pred_long', dan 'pod__coordinate' (untuk koordinat asli)

# Fungsi untuk mengekstrak lat/long dari string 'latitude,longitude'
def extract_coords(coord_str):
    if isinstance(coord_str, str):
        try:
            lat, lon = coord_str.split(',')
            return float(lat), float(lon)
        except (ValueError, IndexError):
            return None, None
    return None, None

# Terapkan fungsi untuk membuat kolom 'original_lat' dan 'original_long'
test['original_lat'], test['original_long'] = zip(*test['pod__coordinate'].apply(extract_coords))

# 1. Hitung error dalam KM untuk tiap data
def hitung_error_km(row):
    if pd.notna(row['original_lat']) and pd.notna(row['original_long']) and pd.notna(row['pred_lat']) and pd.notna(row['pred_long']):
        return geodesic((row['original_lat'], row['original_long']), (row['pred_lat'], row['pred_long'])).km
    else:
        return None

test['error_km'] = test.apply(hitung_error_km, axis=1)

# 2. Hitung rata-rata error
# Hanya hitung rata-rata dari baris yang berhasil dihitung errornya
avg_error_km = test['error_km'].dropna().mean()
print(f"Rata-rata error jarak prediksi (dibandingkan koordinat asli): {avg_error_km:.2f} km")

# 3. Buat peta Folium (ambil titik tengah dari koordinat asli yang ada)
# Filter hanya baris dengan koordinat asli yang valid untuk pusat peta
valid_original_coords = test.dropna(subset=['original_lat', 'original_long'])

if not valid_original_coords.empty:
    center_lat = valid_original_coords['original_lat'].mean()
    center_long = valid_original_coords['original_long'].mean()
    m = folium.Map(location=[center_lat, center_long], zoom_start=11)

    # 4. Tambahkan marker untuk titik asli & prediksi
    for _, row in test.iterrows():
        # Marker untuk titik asli (jika ada)
        if pd.notna(row['original_lat']) and pd.notna(row['original_long']):
            folium.CircleMarker(
                location=[row['original_lat'], row['original_long']],
                radius=4, color='green', fill=True, fill_opacity=0.7,
                popup=f"Asli: {row['original_lat']}, {row['original_long']}"
            ).add_to(m)
        # Marker untuk titik prediksi (jika ada)
        if pd.notna(row['pred_lat']) and pd.notna(row['pred_long']):
            folium.CircleMarker(
                location=[row['pred_lat'], row['pred_long']],
                radius=4, color='blue', fill=True, fill_opacity=0.7,
                popup=f"Prediksi: {row['pred_lat']}, {row['pred_long']}"
            ).add_to(m)
        # Tambahkan garis antara titik asli dan prediksi jika keduanya ada
        if pd.notna(row['original_lat']) and pd.notna(row['original_long']) and pd.notna(row['pred_lat']) and pd.notna(row['pred_long']):
             folium.PolyLine(
                 locations=[(row['original_lat'], row['original_long']), (row['pred_lat'], row['pred_long'])],
                 color='red',
                 weight=1,
                 opacity=0.8
             ).add_to(m)


    # 5. Simpan peta ke file HTML
    m.save("hasil_prediksi_vs_asli.html")
    print("Peta perbandingan prediksi vs asli disimpan ke 'hasil_prediksi_vs_asli.html'")
else:
    print("\nTidak ada data koordinat asli yang valid untuk membuat peta perbandingan.")

Rata-rata error jarak prediksi (dibandingkan koordinat asli): 3.37 km
Peta perbandingan prediksi vs asli disimpan ke 'hasil_prediksi_vs_asli.html'


In [11]:
import pandas as pd
import folium
from sklearn.cluster import KMeans

# 1. Baca dataset
df = pd.read_excel('sample_geocoded_results.xlsx')

# Pastikan kolom 'pred_lat' & 'pred_long' ada
df = df.dropna(subset=['pred_lat', 'pred_long'])

# 2. Clustering (contoh: 5 cluster)
kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
df['Cluster'] = kmeans.fit_predict(df[['pred_lat', 'pred_long']])

# 3. Hitung centroid tiap cluster (masih dihitung tapi tidak ditampilkan)
centroids = df.groupby('Cluster')[['pred_lat', 'pred_long']].mean().reset_index()

# 4. Peta Interaktif
map_center = [df['pred_lat'].mean(), df['pred_long'].mean()]
m = folium.Map(location=map_center, zoom_start=11)

colors = ['red', 'blue', 'green', 'orange', 'purple']

# Tambahkan titik-titik pengiriman
for idx, row in df.iterrows():
    folium.CircleMarker(
        location=[row['pred_lat'], row['pred_long']],
        radius=4,
        color=colors[row['Cluster']],
        fill=True,
        fill_color=colors[row['Cluster']],
        popup=f"Address: {row['full_address']}, Cluster: {row['Cluster']}"
    ).add_to(m)

# Tambahkan centroid tiap cluster (bagian ini dihapus agar tidak tampil)
# for idx, row in centroids.iterrows():
#     folium.Marker(
#         location=[row['pred_lat'], row['pred_long']],
#         icon=folium.Icon(color='black', icon='star'),
#         popup=f"Centroid Cluster {row['Cluster']}"
#     ).add_to(m)

# 5. Tambahkan LEGEND (HTML & CSS custom)
legend_html = '''
<div style="
position: fixed;
bottom: 30px; left: 30px; width: 150px; height: 160px;
background-color: white; z-index:9999; font-size:14px;
border:2px solid grey; padding:10px;">
<b>Cluster Legend</b><br>
<i style="background:red; width:10px; height:10px; display:inline-block;"></i> Cluster 0<br>
<i style="background:blue; width:10px; height:10px; display:inline-block;"></i> Cluster 1<br>
<i style="background:green; width:10px; height:10px; display:inline-block;"></i> Cluster 2<br>
<i style="background:orange; width:10px; height:10px; display:inline-block;"></i> Cluster 3<br>
<i style="background:purple; width:10px; height:10px; display:inline-block;"></i> Cluster 4<br>
</div>
'''
m.get_root().html.add_child(folium.Element(legend_html))

m  # langsung tampil di Jupyter/Colab

In [12]:
test.head()

Unnamed: 0,connote__connote_code,full_address,connote__connote_service,custom_field__destination_nopen,pod__timereceive,pod__coordinate,cleaned_address,pred_lat,pred_long,has_city,original_lat,original_long,error_km
0,P2410120007887,"GG. AWI NGAHGAR RW 08 RT 011, KOTA BANDUNG, S...",PKH,40000,2024-10-16 15:17:58.000,"-6.8773312,107.5754132","gang awi ngahgar , kota bandung, sumur bandung...",-6.921846,107.607083,True,-6.877331,107.575413,6.040334
1,P2410110024749,"KOMP UJUNG BERUNG INDAH BLOK 26 NO.9 BANDUNG, ...",PKH,40000,2024-10-12 13:17:42.000,"-6.9094083,107.6986211","komplek ujung berung indah 26 no.9 bandung, ko...",,,True,-6.909408,107.698621,
2,P2410100005614,"JL PINUS RAYA . RW 01 RT 001, KOTA BANDUNG, S...",PKH,40000,2024-10-11 13:18:51.000,"-6.9346281,107.6821706","jalan pinus raya . , kota bandung, sumur bandu...",-6.964009,107.698787,True,-6.934628,107.682171,3.732262
3,LPU231316644437,"KOTA BANDUNG, REGOL, PASIRLUYU, KOTA BANDUNG, ...",PPB_KARTUPOS,40000,2024-10-15 15:23:40.000,"-6.9395524,107.6188576","kota bandung, regol, pasirluyu, kota bandung, ...",-6.861153,107.591677,True,-6.939552,107.618858,9.175827
4,P2410140110424,JL KARAPITAN NO 32 RT 2/5 PAEDAN LENGKONG BAND...,PKH,40000,2024-10-15 12:21:44.000,"-6.9278908,107.6144284",jalan karapitan no 32 5 paedan lengkong bandun...,,,True,-6.927891,107.614428,


In [5]:
test.tail()

Unnamed: 0,connote__connote_code,full_address,connote__connote_service,custom_field__destination_nopen,pod__timereceive,pod__coordinate,cleaned_address,pred_lat,pred_long,has_city,original_lat,original_long,error_km
995,P2410100013654,"JL SURYALAYA INDAH 2 RW 02 RT 003, KOTA BANDU...",PKH,40000,2024-10-14 12:56:59.000,"-6.9385157,107.6240814","jalan suryalaya indah 2 , kota bandung, sumur ...",-6.921846,107.607083,True,-6.938516,107.624081,2.631988
996,P2410120007185,"KP CIBOGO LEBAK RW 07 RT 004, KOTA BANDUNG, S...",PKH,40000,2024-10-17 17:25:07.000,"-6.8883106,107.5797625","cibogo lebak , kota bandung, sumur bandung, br...",-6.921846,107.607083,True,-6.888311,107.579763,4.78237
997,P2410100107655,JL SOEKARNO HATTA NO 748 CIMENERANG KEC GEDEBA...,PKH,40000,2024-10-12 15:20:13.000,"-6.9397531,107.7164349",jalan soekarno hatta no 748 cimenerang kec ged...,,,True,-6.939753,107.716435,
998,P2410100091542,"JL MUARARAJEUN LAMA II NO 4C RT 2 RW 8, KOTA B...",PE,40000,2024-10-11 14:23:07.000,"-6.9028349,107.6314919","jalan muararajeun lama ii no 4c , kota bandung...",-6.921846,107.607083,True,-6.902835,107.631492,3.420062
999,P2410140159557,"JL TERUSAN BUAH BATU NO 12 UP IBU NOVI, KOTA B...",PE,40000,2024-10-15 11:36:50.000,"-6.9433756,107.6508445","jalan terusan buah batu no 12 up ibu novi, kot...",-6.921846,107.607083,True,-6.943376,107.650845,5.390452


In [None]:
# Simpan DataFrame ke file Excel
df.to_excel('geocode2.xlsx', index=False)
print("DataFrame berhasil disimpan ke geocode2.xlsx")

In [14]:
import pandas as pd

test = pd.read_excel('/content/geocode2.xlsx')
display(test.head())

Unnamed: 0,connote__connote_code,full_address,connote__connote_service,custom_field__destination_nopen,pod__timereceive,pod__coordinate,cleaned_address,pred_lat,pred_long,has_city,Cluster
0,P2410120007887,"GG. AWI NGAHGAR RW 08 RT 011, KOTA BANDUNG, S...",PKH,40000,2024-10-16 15:17:58.000,"-6.8773312,107.5754132","gang awi ngahgar , kota bandung, sumur bandung...",-6.921846,107.607083,True,1
1,P2410100005614,"JL PINUS RAYA . RW 01 RT 001, KOTA BANDUNG, S...",PKH,40000,2024-10-11 13:18:51.000,"-6.9346281,107.6821706","jalan pinus raya . , kota bandung, sumur bandu...",-6.964009,107.698787,True,0
2,LPU231316644437,"KOTA BANDUNG, REGOL, PASIRLUYU, KOTA BANDUNG, ...",PPB_KARTUPOS,40000,2024-10-15 15:23:40.000,"-6.9395524,107.6188576","kota bandung, regol, pasirluyu, kota bandung, ...",-6.861153,107.591677,True,3
3,P2410160203113,"JL TERS KOPO KM 13,4 NO 633 641, KAB BANDUNG, ...",PE,40000,2024-10-17 14:00:13.000,"-7.0076084,107.5513731","jalan ters kopo km 13,4 no 633 641, kab bandun...",-6.973888,107.551139,True,2
4,P2410130002194,"KO KOPO MAS REGENCY 9-DD RW 01 RT 001, KOTA BA...",PKH,40000,2024-10-14 17:29:21.000,"-6.9590934,107.5792048","ko kopo mas regency 9dd , kota bandung, sumur ...",-6.921846,107.607083,True,1
