In [2]:
import os
#os.environ["OMP_NUM_THREADS"] = "1"
import sys
import numpy as np
from scipy.spatial.distance import squareform, pdist

In [3]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [4]:
from kneed import KneeLocator

In [83]:
class KMEANS(object):
    def __init__(self, dist_matrix, node_coord):
        self.node_coord = node_coord
        self.dist_matrix = dist_matrix
        self.k = self.estimate_k()
        self.kmeans = KMeans(n_clusters=self.k)
        
    # Estimate optimal k-value for KMeans  
    def estimate_k(self):
        sse_list = list()
        for k in range(10):
            kmeans = KMeans(n_clusters=k+1)
            kmeans = kmeans.fit(self.node_coord)
            centroids = kmeans.cluster_centers_
            labels = kmeans.labels_
            
            sse = 0
            for idx in range(len(labels)):
                center = centroids[labels[idx]]
                sse += (self.node_coord[idx, 0] - center[0])**2 + (self.node_coord[idx, 1] - center[1])**2  
            sse_list.append(sse)
            
        x = list( range( len(sse_list) ) )
        kneedle = KneeLocator(x=x, y=sse_list, curve="convex", direction="decreasing")
        return kneedle.knee
    
    def cluster(self):
        self.kmeans = self.kmeans.fit(self.dist_matrix)
        return self.kmeans.labels_

In [87]:
#kmeans = KMEANS(distance_matrix, coordinates)
#labels = kmeans.cluster()

In [86]:
#plt.scatter(coordinates[:,0], coordinates[:,1], c=labels)
#plt.title("Simple city map")
#plt.xlabel("x")
#plt.ylabel("y")
#plt.show()
#plt.close()

In [70]:
class FileParser(object):
    """ Parse TSP input file """
    def __init__(self, filepath, edge_weight_section=False):
        self.filepath = filepath
        self.distMatrix = None
        self.edge_weight_section = edge_weight_section
        self.coordinates = None
    
    # Input given as node coordinates
    def node_coord(self, idx):
        with open(self.filepath, 'r') as ifile:
            nodes = list()
            for line in ifile.readlines()[idx+1:]:
                if line.strip() == "EOF":
                    break
                x, y = line.strip().split(" ")[1:3]
                nodes.append([float(x), float(y)])
            nodes = np.array(nodes)
            self.coordinates = nodes
            self.distMatrix = squareform( pdist(nodes, metric="euclidean") )
        
    # Input given as edge weights
    def edge_weight(self, idx):
        weights = list()
        with open(self.filepath, 'r') as ifile:
            for line in ifile.readlines()[idx+1:]:
                if line.strip() == "DISPLAY_DATA_SECTION":
                    break
                weights += line.strip().split(" ")
                
            weights = [float(x) for x in weights]
            n = weights.count(0)
            distMatrix = np.zeros((n, n))
            rowIdx = 0
            colIdx = 0
            for w in weights:
                if w == 0:
                    rowIdx += 1
                    colIdx = 0
                    continue
                distMatrix[rowIdx, colIdx] = w
                colIdx += 1
            self.distMatrix = distMatrix+distMatrix.T

    def parse(self):
        with open(self.filepath, 'r') as ifile:
            for idx, line in enumerate(ifile.readlines()):
                if line.strip() == "NODE_COORD_SECTION" or line.strip() == "DISPLAY_DATA_SECTION":
                    self.node_coord(idx)
                    return self
                if self.edge_weight_section:
                    if line.strip() == "EDGE_WEIGHT_SECTION":
                        self.edge_weight(idx)
                        return self
            if line.strip().split(" ")[0].isdigit():
                print("The input file is in the wrong format (see README.md for format specs).")
                sys.exit(0)
        return self
    
    # Return parsed data
    def parsed_data(self):
        return self.distMatrix, self.coordinates

In [68]:
class VarNeighbSearch(object):
    def __init__(self):
        pass

In [80]:
fileparser = FileParser("../input_data/pcb442.txt").parse()
distance_matrix, coordinates = fileparser.parsed_data()

In [11]:
#plt.scatter(coordinates[:,0], coordinates[:,1])