In [12]:
import numpy as np
import pandas as pd
import random
import sklearn
from sklearn.cluster import KMeans

def membership_mat(X, centers, K, m, seed):

    membership_mat_matrix = np.zeros((X.shape[0],K))
    for i in range(0, X.shape[0]):
        if X[i].tolist() in centers.tolist():
            membership_mat_matrix[i][np.where((centers == X[i]).all(axis=1))]=1
            continue
        for j in range(0, K):
            numerator = np.linalg.norm(X[i] - centers[j])
            sum=0;
            for k in range(0, K):
                denominator = np.linalg.norm(X[i] - centers[k])
                sum = sum + (numerator/denominator)**(2.0/(m-1))
            membership_mat_matrix[i][j] = 1/sum;

    return membership_mat_matrix


def update_centers(X, u, m):
    col = u.shape[1]
    new_u = np.power(u,m)
    new_u = new_u/np.sum(new_u, axis=0)
    centers = np.ones((col,X.shape[1]))
    centers = np.dot(new_u.T, X)
    return centers

def final_clustering(X, K = 4, m=2, error = 1e-3, seed = 101, MAX_ITER = 1000):
    indices = random.sample(range(1,X.shape[0]), K)
    centers = np.ones((K,X.shape[1]))
    count = 0
    for i in range(0,K):
        centers[i,:] = X[indices[i],:]
    prev = 0
    curr = 0
    for i in range(0,MAX_ITER):
        curr = membership_mat(X, centers, K, m, seed);
        if(i == 0):
            prev = curr
            continue;
        centers = update_centers(X,curr,m)
    return curr,centers,count

si = np.ones((56,9))
db = np.ones((56,9))
for i in range(1,57):
    X = pd.read_csv("1.csv", sep=',',header=None)
    X = X.to_numpy()
    X = X[:,0:-1]
    for K in range(2,11):
        u, centers, count = final_clustering(X, K = K, m=2, error = 1e-6, seed = 101, MAX_ITER = 50)
        clusters = [[] for i in range(0,K+1)]
        map = []
        for i in range(X.shape[0]):
            map.append(np.where(u[i]==np.max(u[i]))[0][0])
            clusters[map[i]+1].append(i+1)
        clusters.sort(key=len)

        si[i-1, K-2] = sklearn.metrics.silhouette_score(X, map, metric='euclidean')
        db[i-1, K-2] = sklearn.metrics.davies_bouldin_score(X, map)
        print(si[i-1, K-2], db[i-1, K-2])


0.8676572035835833
0.6368228410706144
0.5433478849623528
0.570011650439244
0.4520422094175051
0.42049386389531834
0.3909703134596366
0.4198052454245894
0.41356271460859284
0.6648551882775534
0.6232999133149253
0.5529596848702256
0.4826353535607278
0.47823286861094244
0.42906815839618195
0.43631520688477
0.40053243303613134
0.3621422769113913
0.7177377911925032
0.7039131255210155
0.6565746638338869
0.5717680300313321
0.5409684379401373
0.47137185370144674
0.43707271534487085
0.4318066673537763
0.41763252656969435
0.7272801085947528
0.6630482431283107
0.5877719749807695
0.5318902512128814
0.5023187982003281
0.4561218397480851
0.4482824282900118
0.44202922951423484
0.4377926430310143
0.7685768686796232
0.7097582267423312
0.574237126581949
0.5434340122288671
0.49667748882098894
0.46299811929653
0.4305510491856749
0.4321829900300619
0.39565444952223056
0.7972953347569972
0.7460201434066862
0.524042755579474
0.5347734897978823
0.4995956666696988
0.48481447703933284
0.5228786010665323
0.46704

KeyboardInterrupt: 

In [2]:
def membership(data, centers, num_clusters, m, seed):

    membership_matrix = np.zeros((data.shape[0],num_clusters))
    for i in range(0, data.shape[0]):
        if data[i].tolist() in centers.tolist():
            membership_matrix[i][np.where((centers == data[i]).all(axis=1))]=1
            continue
        for j in range(0, num_clusters):
            numerator = np.linalg.norm(data[i] - centers[j])
            sum=0;
            for k in range(0, num_clusters):
                denominator = np.linalg.norm(data[i] - centers[k])
                sum = sum + (numerator/denominator)**(2.0/(m-1))
            membership_matrix[i][j] = 1/sum;

    return membership_matrix

In [3]:
def find_new_centers(data, u, m):
    col = u.shape[1]
    new_u = np.power(u,m)
    new_u = new_u/np.sum(new_u, axis=0)
    centers = np.zeros((col,data.shape[1]))
    centers = np.dot(new_u.T, data)
    return centers

In [4]:
def finalc(data, num_clusters = 4, m=2, error = 1e-3, seed = 101, max_iterations = 1000):
    indices = random.sample(range(1,data.shape[0]), num_clusters)
    centers = np.zeros((num_clusters,data.shape[1]))
    i_count = 0
    for i in range(0,num_clusters):
        centers[i,:] = data[indices[i],:]
    membership_matrix_prev = 0
    membership_matrix_curr = 0
    for i in range(0,max_iterations):
        membership_matrix_curr = membership(data, centers, num_clusters, m, seed);
        if(i == 0):
            membership_matrix_prev = membership_matrix_curr
            continue;
        centers = find_new_centers(data,membership_matrix_curr,m)
    return membership_matrix_curr,centers,i_count

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [5]:

my_si = np.zeros((56,9))
my_db = np.zeros((56,9))
for file_number in range(1,57):
    data = pd.read_csv("", csv", sep=',',header=None)
    data = data.to_numpy()
    data = data[:,0:-1]
    for num_clusters in range(2,11):
        u, centers, i_count = finalc(data, num_clusters = num_clusters, m=2, error = 1e-6, seed = 101, max_iterations = 50)
        my_clusters = [[] for i in range(0,num_clusters+1)]
        my_map = []
        for i in range(data.shape[0]):
            my_map.append(np.where(u[i]==np.max(u[i]))[0][0])
            my_clusters[my_map[i]+1].append(i+1)
        my_clusters.sort(key=len)

        my_si[file_number-1, num_clusters-2] = sklearn.metrics.silhouette_score(data, my_map, metric='euclidean')
        my_db[file_number-1, num_clusters-2] = sklearn.metrics.davies_bouldin_score(data, my_map)
        print("Si AND DB for ",num_clusters,my_si[file_number-1, num_clusters-2], my_db[file_number-1, num_clusters-2])
        
    print('done file '+str(file_number))

# from google.colab import files
# fname = 'cmeans_shilv.csv'
# np.savetxt(fname, my_si, delimiter=',', fmt='%f')
# print('done')
# files.download(fname)
# fname2 = 'cmeans_db.csv'
# np.savetxt(fname2, my_db, delimiter=',', fmt='%f')
# print('done')
# files.download(fname2)

Si AND DB for  2 0.6516667938820518 0.7791321097824433
Si AND DB for  3 0.6368228410706144 0.5015883474334912
Si AND DB for  4 0.5433478849623528 0.5912392527019108
Si AND DB for  5 0.570011650439244 0.5458216844466486
Si AND DB for  6 0.5453234410811388 0.651175976151891
Si AND DB for  7 0.4560300811374435 0.6848884335180184
Si AND DB for  8 0.3909703134596366 0.7559947124255489
Si AND DB for  9 0.4198052454245894 0.7207446051543
Si AND DB for  10 0.41379406034828603 0.7447494616446708
done file 1
Si AND DB for  2 0.6516667938820518 0.7791321097824433
Si AND DB for  3 0.6368228410706144 0.5015883474334912
Si AND DB for  4 0.5433478849623528 0.5912392527019108
Si AND DB for  5 0.570011650439244 0.5458216844466486
Si AND DB for  6 0.4520422094175051 0.6275708625417369
Si AND DB for  7 0.42049386389531834 0.7203553270107987
Si AND DB for  8 0.438105968277005 0.7041665380392407
Si AND DB for  9 0.4198052454245894 0.7207446051543
Si AND DB for  10 0.41356271460859284 0.7167554693600218
don

Si AND DB for  4 0.5433478849623528 0.5912392527019108
Si AND DB for  5 0.570011650439244 0.5458216844466486
Si AND DB for  6 0.4520422094175051 0.627570862541737
Si AND DB for  7 0.42049386389531834 0.7203553270107991
Si AND DB for  8 0.3909703134596366 0.7559947124255489
Si AND DB for  9 0.4198052454245894 0.7207446051543003
Si AND DB for  10 0.41356271460859284 0.7167554693600218
done file 17
Si AND DB for  2 0.6516667938820518 0.7791321097824433
Si AND DB for  3 0.6368228410706144 0.5015883474334912
Si AND DB for  4 0.5433478849623528 0.5912392527019108
Si AND DB for  5 0.570011650439244 0.5458216844466486
Si AND DB for  6 0.4520422094175051 0.627570862541737
Si AND DB for  7 0.42049386389531834 0.7203553270107991
Si AND DB for  8 0.3909703134596366 0.7559947124255489
Si AND DB for  9 0.4198052454245894 0.7207446051543002
Si AND DB for  10 0.41356271460859284 0.7167554693600217
done file 18
Si AND DB for  2 0.6516667938820518 0.7791321097824433
Si AND DB for  3 0.6368228410706144 0

KeyboardInterrupt: 