In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import silhouette_samples, silhouette_score

In [2]:
from sklearn.metrics.pairwise import euclidean_distances

def delta(ck, cl):
    values = np.ones([len(ck), len(cl)])*10000
    
    for i in range(0, len(ck)):
        for j in range(0, len(cl)):
            values[i, j] = np.linalg.norm(ck[i]-cl[j])
            
    return np.min(values)
    
def big_delta(ci):
    values = np.zeros([len(ci), len(ci)])
    
    for i in range(0, len(ci)):
        for j in range(0, len(ci)):
            values[i, j] = np.linalg.norm(ci[i]-ci[j])
            
    return np.max(values)
    
def dunn(k_list):
    
    deltas = np.ones([len(k_list), len(k_list)])*1000000
    big_deltas = np.zeros([len(k_list), 1])
    l_range = list(range(0, len(k_list)))
    
    for k in l_range:
        for l in (l_range[0:k]+l_range[k+1:]):
            deltas[k, l] = delta(k_list[k], k_list[l])
        
        big_deltas[k] = big_delta(k_list[k])

    di = np.min(deltas)/np.max(big_deltas)
    return di

In [46]:
import numpy as np
from scipy.linalg import norm
from scipy.spatial.distance import cdist

class FCM:
    """Fuzzy C-means
    
    m: float, optional (default=2.0)
        Exponent for the fuzzy partition matrix, specified as a
        scalar greater than 1.0. This option controls the amount of
        fuzzy overlap between clusters, with larger values indicating
        a greater degree of overlap.
    
    u: array, shape = [n_samples, n_clusters]
        Fuzzy partition array, returned as an array with n_samples rows
        and n_clusters columns. Element u[i,j] indicates the degree of
        membership of the jth data point in the ith cluster. For a given
        data point, the sum of the membership values for all clusters is one.
    
    """
    def __init__(self, n_clusters=10, max_iter=150, m=2, error=1e-5, random_state=0):
        assert m > 1
        self.u, self.centers = None, None
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.m = m
        self.error = error
        self.random_state = random_state

    def fit(self, X):
        
        self.n_samples = X.shape[0]
        r = np.random.RandomState(self.random_state)
        u = r.rand(self.n_samples, self.n_clusters)
        u = u / np.tile(u.sum(axis=1)[np.newaxis].T, self.n_clusters)

        r = np.random.RandomState(self.random_state)
        self.u = r.rand(self.n_samples,self.n_clusters)
        self.u = self.u / np.tile(self.u.sum(axis=1)[np.newaxis].T, self.n_clusters)

        for iteration in range(self.max_iter):
            u_old = self.u.copy()

            self.centers = self.next_centers(X)
            self.u = self._predict(X)

            if norm(self.u - u_old) < self.error:
                break


    def next_centers(self, X):
        
        um = self.u ** self.m
        return (X.T @ um / np.sum(um, axis=0)).T

    def _predict(self, X):
        
        power = float(2 / (self.m - 1))
        temp = cdist(X, self.centers) ** power
        denominator_ = temp.reshape((X.shape[0], 1, -1)).repeat(temp.shape[-1], axis=1)
        denominator_ = temp[:, :, np.newaxis] / denominator_

        return 1 / denominator_.sum(2)

    def predict(self, X):

        if len(X.shape) == 1:
            X = np.expand_dims(X, axis=0)

        u = self._predict(X)
        return np.argmax(u, axis=-1)

In [4]:
class KMeans:

    def __init__(
            self,
            n_cluster: int,
            init_pp: bool = True,
            max_iter: int = 300,
            tolerance: float = 1e-4,
            seed: int = None):
        
        self.n_cluster = n_cluster
        self.max_iter = max_iter
        self.tolerance = tolerance
        self.init_pp = init_pp
        self.seed = seed
        self.centroid = None
        self.mse = None

    def fit(self, data: np.ndarray):
        
        self.centroid = self._init_centroid(data)
        for _ in range(self.max_iter):
            distance = self._calc_distance(data)
            cluster = self._assign_cluster(distance)
            new_centroid = self._update_centroid(data, cluster)
            diff = np.abs(self.centroid - new_centroid).mean()
            self.centroid = new_centroid

            if diff <= self.tolerance:
                break

        self.mse = calc_mse(self.centroid, cluster, data)

    def predict(self, data: np.ndarray):

        distance = self._calc_distance(data)
        cluster = self._assign_cluster(distance)
        return cluster

    def _init_centroid(self, data: np.ndarray):
        
        if self.init_pp:
            np.random.seed(self.seed)
            centroid = [int(np.random.uniform()*len(data))]
            for _ in range(1, self.n_cluster):
                dist = []
                dist = [min([np.inner(data[c]-x, data[c]-x) for c in centroid])
                        for i, x in enumerate(data)]
                dist = np.array(dist)
                dist = dist / dist.sum()
                cumdist = np.cumsum(dist)

                prob = np.random.rand()
                for i, c in enumerate(cumdist):
                    if prob > c and i not in centroid:
                        centroid.append(i)
                        break
            centroid = np.array([data[c] for c in centroid])
        else:
            np.random.seed(self.seed)
            idx = np.random.choice(range(len(data)), size=(self.n_cluster))
            centroid = data[idx]
        return centroid

    def _calc_distance(self, data: np.ndarray):
        distances = []
        for c in self.centroid:
            distance = np.mean((data - c) * (data - c), axis=1)
            distances.append(distance)

        distances = np.array(distances)
        distances = distances.T
        return distances

    def _assign_cluster(self, distance: np.ndarray):
        cluster = np.argmin(distance, axis=1)
        return cluster

    def _update_centroid(self, data: np.ndarray, cluster: np.ndarray):
        centroids = []
        for i in range(self.n_cluster):
            idx = np.where(cluster == i)
            centroid = np.mean(data[idx], axis=0)
            centroids.append(centroid)
        centroids = np.array(centroids)
        return centroids

In [5]:
class sw(object):

    def __init__(self):

        self.__Positions = []
        self.__Gbest = []

    def _set_Gbest(self, Gbest):
        self.__Gbest = Gbest

    def _points(self, agents):
        self.__Positions.append([list(i) for i in agents])

    def get_agents(self):
        """Returns a history of all agents of the algorithm (return type:
        list)"""

        return self.__Positions

    def get_Gbest(self):
        """Return the best position of algorithm (return type: list)"""

        return list(self.__Gbest)

In [38]:
from math import gamma, pi, sin
import numpy as np
from random import normalvariate, randint, random,seed
seed(0)
np.random.seed(0)

class cso(sw):
    """
    Cuckoo Search Optimization
    """

    def __init__(self, n, function, lb, ub, dimension, iteration,values, pa=0.25,
                 nest=156):
        """
        :param n: number of agents
        :param function: test function
        :param lb: lower limits for plot axes
        :param ub: upper limits for plot axes
        :param dimension: space dimension
        :param iteration: number of iterations
        :param pa: probability of cuckoo's egg detection (default value is 0.25)
        :param nest: number of nests (default value is 100)
        """

        super(cso, self).__init__()

        self.__Nests = []

        beta = 3 / 2
        sigma = (gamma(1 + beta) * sin(pi * beta / 2) / (
            gamma((1 + beta) / 2) * beta *
            2 ** ((beta - 1) / 2))) ** (1 / beta)
        u = np.array([normalvariate(0, 1) for k in range(dimension)]) * sigma
        v = np.array([normalvariate(0, 1) for k in range(dimension)])
        step = u / abs(v) ** (1 / beta)

        self.__agents = np.random.uniform(lb, ub, (n, dimension))
        self.__nests = values
        Pbest = self.__nests[np.array([function(x)
                                       for x in self.__nests]).argmin()]
        Gbest = Pbest
        self._points(self.__agents)

        for t in range(iteration):

            for i in self.__agents:
                val = randint(0, nest - 1)
                if function(i) < function(self.__nests[val]):
                    self.__nests[val] = i

            fnests = [(function(self.__nests[i]), i) for i in range(nest)]
            fnests.sort()
            fcuckoos = [(function(self.__agents[i]), i) for i in range(n)]
            fcuckoos.sort(reverse=True)

            nworst = nest // 2
            worst_nests = [fnests[-i - 1][1] for i in range(nworst)]

            for i in worst_nests:
                if random() < pa:
                    self.__nests[i] = np.random.uniform(lb, ub, (1, dimension))

            if nest > n:
                mworst = n
            else:
                mworst = nest

            for i in range(mworst):

                if fnests[i][0] < fcuckoos[i][0]:
                    self.__agents[fcuckoos[i][1]] = self.__nests[fnests[i][1]]

            self.__nests = np.clip(self.__nests, lb, ub)
            self.__Levyfly(step, Pbest, n, dimension)
            self.__agents = np.clip(self.__agents, lb, ub)
            self._points(self.__agents)
            self.__nest()

            Pbest = self.__nests[np.array([function(x)
                                        for x in self.__nests]).argmin()]

            if function(Pbest) < function(Gbest):
                Gbest = Pbest

        self._set_Gbest(Gbest)

    def __nest(self):
        self.__Nests.append([list(i) for i in self.__nests])

    def __Levyfly(self, step, Pbest, n, dimension):

        for i in range(n):
            stepsize = 0.2 * step * (self.__agents[i] - Pbest)
            self.__agents[i] += stepsize * np.array([normalvariate(0, 1)
                                                    for k in range(dimension)])

    def get_nests(self):
        """Return a history of cuckoos nests (return type: list)"""

        return self.__Nests

In [39]:
from math import *
def ackley_function(x):
    return -exp(-sqrt(0.5*sum([i**2 for i in x]))) - \
           exp(0.5*sum([cos(i) for i in x])) + 1 + exp(1)


def bukin_function(x):
    return 100*sqrt(abs(x[1]-0.01*x[0]**2)) + 0.01*abs(x[0] + 10)


def cross_in_tray_function(x):
    return round(-0.0001*(abs(sin(x[0])*sin(x[1])*exp(abs(100 -
                            sqrt(sum([i**2 for i in x]))/pi))) + 1)**0.1, 7)


def sphere_function(x):
    return sum([i**2 for i in x])


def bohachevsky_function(x):
    return x[0]**2 + 2*x[1]**2 - 0.3*cos(3*pi*x[0]) - 0.4*cos(4*pi*x[1]) + 0.7


def sum_squares_function(x):
    return sum([(i+1)*x[i]**2 for i in range(len(x))])


def sum_of_different_powers_function(x):
    return sum([abs(x[i])**(i+2) for i in range(len(x))])


def booth_function(x):
    return (x[0] + 2*x[1] - 7)**2 + (2*x[0] + x[1] - 5)**2


def matyas_function(x):
    return 0.26*sphere_function(x) - 0.48*x[0]*x[1]


def mccormick_function(x):
    return sin(x[0] + x[1]) + (x[0] - x[1])**2 - 1.5*x[0] + 2.5*x[1] + 1


def dixon_price_function(x):
    return (x[0] - 1)**2 + sum([(i+1)*(2*x[i]**2 - x[i-1])**2
                                for i in range(1, len(x))])


def six_hump_camel_function(x):
    return (4 - 2.1*x[0]**2 + x[0]**4/3)*x[0]**2 + x[0]*x[1]\
           + (-4 + 4*x[1]**2)*x[1]**2


def three_hump_camel_function(x):
    return 2*x[0]**2 - 1.05*x[0]**4 + x[0]**6/6 + x[0]*x[1] + x[1]**2


def easom_function(x):
    return -cos(x[0])*cos(x[1])*exp(-(x[0] - pi)**2 - (x[1] - pi)**2)


def michalewicz_function(x):
    return -sum([sin(x[i])*sin((i+1)*x[i]**2/pi)**20 for i in range(len(x))])


def beale_function(x):
    return (1.5 - x[0] + x[0]*x[1])**2 + (2.25 - x[0] + x[0]*x[1]**2)**2 + \
           (2.625 - x[0] + x[0]*x[1]**3)**2


def drop_wave_function(x):
    return -(1 + cos(12*sqrt(sphere_function(x))))/(0.5*sphere_function(x) + 2)
w = [ackley_function,bukin_function,
     cross_in_tray_function,sphere_function,
     bohachevsky_function,sum_squares_function,
     sum_of_different_powers_function,booth_function,matyas_function,
     mccormick_function,dixon_price_function,
     three_hump_camel_function,six_hump_camel_function,
    easom_function,michalewicz_function,beale_function]

In [40]:
def calc_mse(centroids: np.ndarray, labels: np.ndarray, data: np.ndarray):
    distances = []
    for i, c in enumerate(centroids):
        idx = np.where(labels == i)
        dist = np.mean((data[idx] - c)**2)
        distances.append(dist)
    return np.mean(distances)
from sklearn import metrics
def purity_score(y_true, y_pred):
    # compute contingency matrix (also called confusion matrix)
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
    # return purity
    return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix) 

In [41]:
from sklearn.preprocessing import StandardScaler
df=pd.read_csv('norm.csv')
df_req = df[['Stride Length (m)', 'Cadence(steps/min)', 'Leg Length (m)',
       'Age(years)','classs']]

In [65]:
X  = df_req[['Stride Length (m)', 'Cadence(steps/min)', 'Leg Length (m)',
       'Age(years)']].values
labels = df.classs.values
X = StandardScaler().fit_transform(X)
for i in w:
    a = cso(2,i,X.max(),X.min(),4,25,values = X)
    kmeans = KMeans(n_cluster=2, init_pp=False, seed=0)
    kmeans.centroid = a.get_agents()[0]
    predicted = kmeans.predict(X)
    print('<------------------',i,'--------------->')
    print("Silhouette Coefficient : %0.3f"
      % silhouette_score(predicted.reshape(-1,1),labels, metric='euclidean'))
    print('mse : %0.3f' % calc_mse(centroids=a.get_agents()[0], data=X, labels=labels))
    print('purity : %0.3f' % purity_score(labels,predicted))
    print('dunn : %0.3f' % dunn(a.get_agents()[0]))
    print('accuracy : %0.3f' % metrics.accuracy_score(labels,predicted))

<------------------ <function ackley_function at 0x0000009A6C468510> --------------->
Silhouette Coefficient : -0.077
mse : 5.775
purity : 0.564
dunn : 0.015
accuracy : 0.532
<------------------ <function bukin_function at 0x0000009A6C4682F0> --------------->
Silhouette Coefficient : -0.004
mse : 8.145
purity : 0.564
dunn : 0.003
accuracy : 0.449
<------------------ <function cross_in_tray_function at 0x0000009A6C468598> --------------->
Silhouette Coefficient : -0.020
mse : 5.488
purity : 0.564
dunn : 0.039
accuracy : 0.481
<------------------ <function sphere_function at 0x0000009A6C468620> --------------->
Silhouette Coefficient : 0.391
mse : 6.046
purity : 0.776
dunn : 0.003
accuracy : 0.776
<------------------ <function bohachevsky_function at 0x0000009A6C001F28> --------------->
Silhouette Coefficient : -0.029
mse : 6.011
purity : 0.564
dunn : 0.020
accuracy : 0.506
<------------------ <function sum_squares_function at 0x0000009A6C4687B8> --------------->
Silhouette Coefficient :

In [64]:
X  = df_req[['Stride Length (m)', 'Cadence(steps/min)', 'Leg Length (m)',
       'Age(years)']].values
labels = df.classs.values
X = StandardScaler().fit_transform(X)
for i in w:
    a = cso(2,i,X.max(),X.min(),4,25,values = X)
    fcm = FCM(n_clusters=2)
    fcm.centers = a.get_agents()[0]
    predicted = fcm.predict(X)
    print('<------------------',i,'--------------->')
    print("Silhouette Coefficient : %0.3f"
      % silhouette_score(predicted.reshape(-1,1),labels, metric='euclidean'))
    print('mse : %0.3f' % calc_mse(centroids=a.get_agents()[0], data=X, labels=labels))
    print('purity : %0.3f' % purity_score(labels,predicted))
    print('dunn : %0.3f' % dunn(a.get_agents()[0]))
    print('accuracy : %0.3f' % metrics.accuracy_score(labels,predicted))

<------------------ <function ackley_function at 0x0000009A6C468510> --------------->
Silhouette Coefficient : 0.303
mse : 4.769
purity : 0.737
dunn : 0.002
accuracy : 0.737
<------------------ <function bukin_function at 0x0000009A6C4682F0> --------------->
Silhouette Coefficient : 0.071
mse : 5.952
purity : 0.609
dunn : 0.002
accuracy : 0.391
<------------------ <function cross_in_tray_function at 0x0000009A6C468598> --------------->
Silhouette Coefficient : 0.025
mse : 6.287
purity : 0.571
dunn : 0.037
accuracy : 0.429
<------------------ <function sphere_function at 0x0000009A6C468620> --------------->
Silhouette Coefficient : 0.034
mse : 5.107
purity : 0.577
dunn : 0.009
accuracy : 0.577
<------------------ <function bohachevsky_function at 0x0000009A6C001F28> --------------->
Silhouette Coefficient : 0.045
mse : 4.594
purity : 0.583
dunn : 0.010
accuracy : 0.417




<------------------ <function sum_squares_function at 0x0000009A6C4687B8> --------------->
Silhouette Coefficient : 0.077
mse : 5.400
purity : 0.603
dunn : 0.007
accuracy : 0.397
<------------------ <function sum_of_different_powers_function at 0x0000009A688B3510> --------------->
Silhouette Coefficient : 0.023
mse : 6.546
purity : 0.564
dunn : 0.006
accuracy : 0.436
<------------------ <function booth_function at 0x0000009A6C468730> --------------->
Silhouette Coefficient : -0.032
mse : 7.981
purity : 0.564
dunn : 0.568
accuracy : 0.487
<------------------ <function matyas_function at 0x0000009A6C4686A8> --------------->
Silhouette Coefficient : 0.139
mse : 6.483
purity : 0.654
dunn : 0.063
accuracy : 0.346
<------------------ <function mccormick_function at 0x0000009A6C468840> --------------->
Silhouette Coefficient : 0.028
mse : 7.403
purity : 0.571
dunn : 0.002
accuracy : 0.429




<------------------ <function dixon_price_function at 0x0000009A6C4688C8> --------------->
Silhouette Coefficient : -0.033
mse : 7.396
purity : 0.564
dunn : 0.026
accuracy : 0.500
<------------------ <function three_hump_camel_function at 0x0000009A6C468D90> --------------->
Silhouette Coefficient : -0.014
mse : 7.169
purity : 0.564
dunn : 0.024
accuracy : 0.506
<------------------ <function six_hump_camel_function at 0x0000009A6C468D08> --------------->
Silhouette Coefficient : 0.000
mse : 5.228
purity : 0.564
dunn : 0.039
accuracy : 0.449
<------------------ <function easom_function at 0x0000009A6C468E18> --------------->
Silhouette Coefficient : 0.003
mse : 3.911
purity : 0.564
dunn : 0.051
accuracy : 0.545
<------------------ <function michalewicz_function at 0x0000009A6C468EA0> --------------->
Silhouette Coefficient : -0.004
mse : 5.583
purity : 0.564
dunn : 0.008
accuracy : 0.532
<------------------ <function beale_function at 0x0000009A6C468F28> --------------->
Silhouette Coef

