In [3]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.datasets import make_blobs

def fuzzy_c_means_clustering(X, n_clusters, m, max_iter=100, error=1e-5):
    membership_mat = np.random.rand(len(X), n_clusters)
    membership_mat = membership_mat / np.sum(membership_mat, axis=1)[:, None]

    centroids = np.zeros((n_clusters, X.shape[1]))
    for i in range(n_clusters):
        centroids[i] = np.sum((membership_mat[:, i]**m)[:, None] * X, axis=0) / np.sum(membership_mat[:, i]**m)

    for i in range(max_iter):
        for j in range(len(X)):
            distances = np.linalg.norm(X[j] - centroids, axis=1)
            membership_mat[j] = 1 / np.sum((distances / distances[:, None])**2 / (m-1), axis=1)

        old_centroids = centroids.copy()
        for j in range(n_clusters):
            centroids[j] = np.sum((membership_mat[:, j]**m)[:, None] * X, axis=0) / np.sum(membership_mat[:, j]**m)

        if np.linalg.norm(centroids - old_centroids) < error:
            break

    return centroids, membership_mat

n_roads = 100
m_vehicles = 50
n_clusters = 2
X = np.random.choice([2, 4, 6], size=(n_roads * m_vehicles,))[:, None]
flow_rates = np.random.rand(n_roads)
flow_rates = (flow_rates - np.min(flow_rates)) / (np.max(flow_rates) - np.min(flow_rates))
X = X.reshape((n_roads, m_vehicles))

for i in range(n_roads):
    print("Road {}: {} ({}), flow rate: {}".format(i+1, X[i], np.mean(X[i]), flow_rates[i]))

X_combined = np.concatenate((np.mean(X, axis=1)[:, None], flow_rates[:, None]), axis=1)
centroids, membership_mat = fuzzy_c_means_clustering(X_combined, n_clusters, m=2)
cluster_1 = np.where(membership_mat[:, 0] > membership_mat[:, 1])[0]
cluster_2 = np.where(membership_mat[:, 0] <= membership_mat[:, 1])[0]

print("\nCluster 1: {} roads".format(len(cluster_1)))
print("Cluster 2: {} roads".format(len(cluster_2)))
print("Centroids: {}".format(centroids))
print("Membership matrix:\n{}".format(membership_mat))


Road 1: [2 6 6 4 6 6 6 4 4 2 4 2 6 2 2 6 2 4 6 2 2 4 4 6 4 4 2 6 6 6 4 4 2 2 6 6 6
 4 4 2 4 4 4 4 6 6 4 4 2 4] (4.16), flow rate: 0.27172754752992134
Road 2: [4 4 6 2 2 2 6 4 4 4 2 2 6 2 6 4 4 2 2 6 4 2 6 2 2 6 4 4 6 6 6 6 6 2 4 4 6
 2 6 2 4 2 6 6 2 6 6 6 6 4] (4.16), flow rate: 0.488553795256286
Road 3: [2 6 2 4 6 2 2 4 2 2 4 6 2 4 4 2 2 6 2 4 2 6 4 4 6 4 2 4 2 2 6 4 6 2 6 2 4
 2 6 6 2 4 2 2 6 2 6 6 6 2] (3.72), flow rate: 0.0
Road 4: [2 6 2 2 4 6 2 4 4 4 4 2 2 4 4 2 4 2 4 4 4 4 4 2 4 4 6 6 2 6 6 4 6 6 6 4 4
 6 2 6 2 2 2 6 4 2 6 6 6 6] (4.04), flow rate: 0.836330480880643
Road 5: [4 6 2 4 4 6 2 2 4 4 2 2 4 6 6 4 2 2 4 6 4 2 4 2 6 6 2 2 6 2 6 2 2 2 4 4 4
 2 6 4 4 4 6 4 4 4 2 6 6 6] (3.88), flow rate: 0.4065441387466962
Road 6: [6 2 6 6 6 4 2 4 4 4 4 4 6 4 2 4 6 4 4 6 6 2 4 2 4 6 2 4 6 2 4 4 4 2 2 2 6
 2 2 4 4 4 2 6 2 6 2 4 6 6] (4.0), flow rate: 0.5427581467874526
Road 7: [2 2 6 2 4 2 2 6 6 2 2 4 4 6 6 6 4 6 4 4 6 4 4 6 6 4 4 4 6 4 4 2 2 6 2 4 4
 6 6 2 4 2 6 4 2 6 6 2 6 4] (4.16), flow