In [None]:
import json
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Load JSON data from file
def load_data():
    # Assuming the JSON data is saved in a file named 'data.json' in the subfolder '/Test instances'
    with open('./Test instances/scenario_example_id_2.json', 'r') as file:
        data = json.load(file)
    return data['Coordinates']

# Perform KMeans clustering
def cluster_coordinates(coordinates, n_clusters=3, init='k-means++', n_init=10, max_iter=300, random_state=42):
    # Convert list to numpy array for processing
    coordinates = np.array(coordinates)
    
    # Scale the coordinates using MinMaxScaler
    scaler = MinMaxScaler()
    coordinates_scaled = scaler.fit_transform(coordinates)
    
    # Create a KMeans object with specified parameters
    kmeans = KMeans(n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, random_state=random_state)
    
    # Fit the model
    kmeans.fit(coordinates_scaled)
    
    # Get the cluster labels
    labels = kmeans.labels_
    return labels

# Main function to load data and cluster
def main():
    coordinates = load_data()
    
    # You can change the clustering parameters here
    labels = cluster_coordinates(coordinates, n_clusters=4, init='random', n_init=20, max_iter=500, random_state=42)
    
    print("Cluster labels for each coordinate:")
    for coord, label in zip(coordinates, labels):
        print(f"{coord} is in cluster {label}")

if __name__ == "__main__":
    main()


In [None]:
import json
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler

# Load JSON data from file
def load_data():
    with open('./Test instances/scenario_example_id_2.json', 'r') as file:
        data = json.load(file)
    return data

def cluster_coordinates(coordinates, n_clusters=3, init='k-means++', n_init=10, max_iter=300, random_state=42):
    coordinates = np.array(coordinates)
    scaler = MinMaxScaler()
    coordinates_scaled = scaler.fit_transform(coordinates)
    kmeans = KMeans(n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, random_state=random_state)
    kmeans.fit(coordinates_scaled)
    return kmeans.labels_

def calculate_total_distance(distances, labels):
    total_distance = 0
    clusters = np.unique(labels)
    for cluster in clusters:
        # Get the indices of the points in the current cluster
        points_in_cluster = np.where(labels == cluster)[0]
        # Calculate distances between all pairs in the cluster
        for i in range(len(points_in_cluster)):
            for j in range(i + 1, len(points_in_cluster)):
                total_distance += distances[points_in_cluster[i]][points_in_cluster[j]]
    return total_distance

def main():
    data = load_data()
    coordinates = data['Coordinates']
    distances = data['Distances']
    
    labels = cluster_coordinates(coordinates, n_clusters=4, init='random', n_init=20, max_iter=500, random_state=42)
    
    total_distance = calculate_total_distance(distances, labels)
    
    print("Total distance for visiting all points within the same cluster before moving to the next:", total_distance)

if __name__ == "__main__":
    main()


In [1]:
import json
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import pdist, squareform
from itertools import permutations

# Load JSON data from file
def load_data(path):
    with open('./Test instances/'+path, 'r') as file:
        data = json.load(file)
    return data['Coordinates']

# Perform KMeans clustering
def cluster_coordinates(coordinates, n_clusters=3, init='k-means++', n_init=10, max_iter=300, random_state=42):
    coordinates = np.array(coordinates)
    scaler = MinMaxScaler()
    coordinates_scaled = scaler.fit_transform(coordinates)
    kmeans = KMeans(n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, random_state=random_state)
    kmeans.fit(coordinates_scaled)
    return kmeans.labels_, kmeans.cluster_centers_

# Calculate the total distance of the tour
def calculate_total_distance(coordinates, labels):
    clusters = {}
    for index, label in enumerate(labels):
        if label not in clusters:
            clusters[label] = []
        clusters[label].append(coordinates[index])

    total_distance = 0
    paths = []
    for cluster in clusters.values():
        if len(cluster) > 1:
            dist_matrix = squareform(pdist(cluster, 'euclidean'))
            min_distance, min_path = float('inf'), None
            for perm in permutations(range(len(cluster))):
                distance = sum(dist_matrix[perm[i], perm[i+1]] for i in range(len(perm)-1))
                if distance < min_distance:
                    min_distance = distance
                    min_path = perm
            total_distance += min_distance
            path = [cluster[i] for i in min_path]
            paths.append(path)
            #print(f"First point in cluster: {path[0]}, Last point in cluster: {path[-1]}")
        else:
            paths.append(cluster[0])
            #print(f"Only point in cluster: {cluster[0]}")

    if len(paths) > 1:
        cluster_centers = [np.mean(np.array(path), axis=0) for path in paths]
        dist_matrix = squareform(pdist(cluster_centers, 'euclidean'))
        remaining = list(range(len(paths)))
        current = remaining.pop(0)
        while remaining:
            next_cluster = min(remaining, key=lambda x: dist_matrix[current, x])
            total_distance += dist_matrix[current, next_cluster]
            current = next_cluster
            remaining.remove(current)

    print(f"Overall first point visited: {paths[0][0]}, Overall last point visited: {paths[-1][-1]}")
    return total_distance

# Main function to load data and cluster
def main(path):
    coordinates = load_data(path)
    labels, _ = cluster_coordinates(coordinates, n_clusters=4, init='random', n_init=20, max_iter=500, random_state=42)
    total_distance = calculate_total_distance(coordinates, labels)
    print(f"Total distance of the tour: {total_distance} for {path}, {labels}")

#if __name__ == "__main__":
#    main()
for i in range(1, 5):
    try:
        main(f'scenario_example_id_{i}.json')
    except:
        pass
#main(f'scenario_example_id_9978.json')


Overall first point visited: [51, 36], Overall last point visited: [32, 116]
Total distance of the tour: 445.7724525748613 for scenario_example_id_1.json, [0 2 2 1 0 0 1 0 1 3 2 3 2 0 2 3 2 3 1 1]
Overall first point visited: [130, 125], Overall last point visited: [131, 38]
Total distance of the tour: 575.4860255027211 for scenario_example_id_2.json, [2 3 2 1 2 0 0 3 3 2 1 2 1 0 0 2 3 0 3 2]
Overall first point visited: [37, 76], Overall last point visited: [1, 3]
Total distance of the tour: 227.3232030352641 for scenario_example_id_3.json, [1 3 1 1 3 0 0 1 3 0 0 0 2 0 1 1 1 2 2 3]
Overall first point visited: [99, 83], Overall last point visited: [138, 138]
Total distance of the tour: 585.1914691838206 for scenario_example_id_4.json, [0 2 0 2 1 2 1 0 3 1 2 0 1 3 0 1 0 1 1 1]


In [None]:
556.67311-euclidean([0,0],[51,36])-euclidean([0,0],[32,116])