In [18]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import folium
import requests
import polyline

data = pd.read_excel('Provinces.xlsx')
coordinates = data[['Latitude', 'Longitude']].values

distribution_centers = {
    'Hanoi': (21.028511, 105.804817),
    'HCMC': (10.762622, 106.660172),
    'Da Nang': (16.054407, 108.202167),
    'Can Tho': (10.045162, 105.746854),
    'Hai Phong': (20.844912, 106.688084)
}

# Function to find the nearest distribution center for each centroid
def find_nearest_center(centroid, centers):
    nearest = None
    min_distance = float('inf')
    for name, location in centers.items():
        dist = np.linalg.norm(np.array(centroid) - np.array(location))
        if dist < min_distance:
            nearest = name
            min_distance = dist
    return nearest

# Function to get routing data from GraphHopper
def get_route(start, end, graphhopper_key):
    url = "https://graphhopper.com/api/1/route"
    params = {
        'point': [f"{start[0]},{start[1]}", f"{end[0]},{end[1]}"],
        'vehicle': 'car',
        'key': graphhopper_key,
        'instructions': False,
        'points_encoded': True
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        response_data = response.json()
        if 'paths' in response_data:
            route_polyline = response_data['paths'][0]['points']
            return polyline.decode(route_polyline)
    return None

# Apply K-means clustering
kmeans = KMeans(n_clusters=5, random_state=0).fit(coordinates)
centroids = kmeans.cluster_centers_

map_vietnam = folium.Map(location=[14.058324, 108.277199], zoom_start=6, tiles='CartoDB Positron')

# Add warehouse and RDC markers with distinct icons
icons = {'RDC': 'cloud', 'Warehouse': 'industry'}
colors = {'RDC': 'green', 'Warehouse': 'red'}
for name, location in distribution_centers.items():
    if name in ['Da Nang', 'Can Tho', 'Hai Phong']: 
        icon_type = icons['RDC']
        color = colors['RDC']
    else:
        icon_type = icons['Warehouse']
        color = colors['Warehouse']
    folium.Marker(
        location=location,
        popup=name,
        icon=folium.Icon(icon=icon_type, color=color, prefix='fa')
    ).add_to(map_vietnam)

# Add routes and demand points
graphhopper_key = 'e5059108-7bef-4f9f-8d45-186941e5e089' 
for i, centroid in enumerate(centroids):
    cluster_points = coordinates[kmeans.labels_ == i]
    center_name = find_nearest_center(centroid, distribution_centers)
    center_location = distribution_centers[center_name]
    color = ['blue', 'green', 'orange','purple','yellow'][i % 5]
    for idx, point in enumerate(cluster_points):
        province_name = data.loc[kmeans.labels_ == i, 'Province'].iloc[idx]
        route = get_route(center_location, point, graphhopper_key)
        if route:
            folium.PolyLine(locations=route, weight=2, color=color).add_to(map_vietnam)
        else:
            folium.PolyLine(locations=[center_location, point], weight=2, color='gray').add_to(map_vietnam)
        folium.CircleMarker(
            location=point,
            radius=3,
            color=color,
            fill=True,
            fill_color=color,
            popup=f'Demand Point - {province_name}'
        ).add_to(map_vietnam)

map_vietnam


In [19]:
from geopy.distance import geodesic
results = []
def calculate_distance(start, end):
    return geodesic(start, end).kilometers
for i, row in data.iterrows():
    point = (row['Latitude'], row['Longitude'])
    province_name = row['Province']
    nearest_warehouse_name = find_nearest_warehouse(point, distribution_centers)
    nearest_warehouse_location = distribution_centers[nearest_warehouse_name]
    distance = calculate_distance(point, nearest_warehouse_location)
    results.append({
        'Province': province_name,
        'Latitude': row['Latitude'],
        'Longitude': row['Longitude'],
        'Nearest Warehouse': nearest_warehouse_name,
        'Distance (km)': distance
    })

# Convert results to DataFrame and save to Excel
results_df = pd.DataFrame(results)
results_df