In [48]:
import numpy as np
import os
from sklearn import preprocessing
from sklearn.model_selection import cross_validate
import pandas as pd
import random
import pickle

class hybrid:
    def __init__(self,k,tol,max_iter):
        self.k=k
        self.tol=tol
        self.max_iter=max_iter
   
    def fit(self,data):
       
        self.centroids={};
       
        for i in range(self.k):
            self.centroids[i]=data[i]
                     
        for i in range(self.max_iter):
            self.classifications = {}

            for i in range(self.k):
                self.classifications[i] = []

            for featureset in data:
                distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
                classification = distances.index(min(distances))
                self.classifications[classification].append(featureset)

            prev_centroids = dict(self.centroids)

            for classification in self.classifications:
                self.centroids[classification] = np.average(self.classifications[classification],axis=0)
            optimized = True

            for c in self.centroids:
                original_centroid = prev_centroids[c]
                current_centroid = self.centroids[c]
                if np.sum((current_centroid-original_centroid)/original_centroid*100.0) > self.tol:
                    #print(np.sum((current_centroid-original_centroid)/original_centroid*100.0))
                    optimized = False

            if optimized:
                break
        return (self.classifications,self.centroids)
            #print(self.classifications[1])
           
    #def predict(self,data):
     #   distances = [np.linalg.norm(data-self.centroids[centroid]) for centroid in self.centroids]
      #  classification = distances.index(min(distances))
       # return classification

df = pd.read_csv('cleveland_1.csv')
df.replace('?',0,inplace = True)
X = np.array(df.drop(['num'], 1))
X = preprocessing.scale(X)


k = 2
max_iter = 5
kmeans = hybrid(k,0.02,max_iter)

classifications,centroids = kmeans.fit(X)
centroids = np.array(list(centroids.values()))

#print('CLASSIFICATIONS:',classifications)
#print('CENTROIDS',centroids)

#DEF ROULETTE
def Roulette(antlion_fitness):
    totalsum = 0
    for x in range(len(antlion_fitness)-1,0,-1):
        totalsum += antlion_fitness[x]
    rand = random.uniform(0, totalsum)
    partialsum = 0
    for x in range(len(antlion_fitness)-1,0,-1):
        partialsum += antlion_fitness[x]
        if(partialsum >= rand):
            return x
    return -1

#DEF RANDOM WALK
def random_walk(max_iter):
    x_random_walk = [0]*(max_iter + 1)
    x_random_walk[0] = 0
    for k in range(1, len( x_random_walk)):
        randm = random.random()
        if randm > 0.5:
            rt = 1
        else:
            rt = 0
        x_random_walk[k] = x_random_walk[k-1] + (2*rt - 1)  
    return(x_random_walk)

def fitness_function(antlion,cluster): 
    antlion_fitness = []
    for al in antlion:
        fit_al = np.linalg.norm(al-centroids[cluster])
        antlion_fitness.append(fit_al)    
    return antlion_fitness
   
def update_ants(population, antlions, count, iterations, cluster, antlion_fitness):
    i_ratio = 1
    minimum_c_i   = np.zeros((1, population.shape[1]))
    maximum_d_i   = np.zeros((1, population.shape[1]))
    minimum_c_e   = np.zeros((1, population.shape[1]))
    maximum_d_e   = np.zeros((1, population.shape[1]))
    elite_antlion = np.zeros((1, population.shape[1]))
    if  (count > 0.10*iterations):
        w_exploration = 2
        i_ratio = (10**w_exploration)*(count/iterations)  
    elif(count > 0.50*iterations):
        w_exploration = 3
        i_ratio = (10**w_exploration)*(count/iterations)  
    elif(count > 0.75*iterations):
        w_exploration = 4
        i_ratio = (10**w_exploration)*(count/iterations)    
    elif(count > 0.90*iterations):
        w_exploration = 5
        i_ratio = (10**w_exploration)*(count/iterations)  
    elif(count > 0.95*iterations):
        w_exploration = 6
        i_ratio = (10**w_exploration)*(count/iterations)
    for i in range (0, population.shape[0]):
        fitness = fitness_function(antlions, cluster)
        #print(fitness)
        ant_lion = Roulette(fitness)
        for j in range (0, population.shape[1]-1):  
            minimum_c_i[0,j]   = antlions[antlions[:,-1].argsort()][0,j]/i_ratio
            maximum_d_i[0,j]   = antlions[antlions[:,-1].argsort()][-1,j]/i_ratio
            elite_antlion[0,j] = antlions[antlions[:,-1].argsort()][0,j]
            minimum_c_e[0,j]   = antlions[antlions[:,-1].argsort()][0,j]/i_ratio
            maximum_d_e[0,j]   = antlions[antlions[:,-1].argsort()][-1,j]/i_ratio  
            rand = int.from_bytes(os.urandom(8), byteorder = "big") / ((1 << 64) - 1)
            if (rand < 0.5):
                minimum_c_i[0,j] =   minimum_c_i[0,j] + antlions[ant_lion,j]
                minimum_c_e[0,j] =   minimum_c_e[0,j] + elite_antlion[0,j]
            else:
                minimum_c_i[0,j] = - minimum_c_i[0,j] + antlions[ant_lion,j]
                minimum_c_e[0,j] = - minimum_c_e[0,j] + elite_antlion[0,j]
               
            rand = int.from_bytes(os.urandom(8), byteorder = "big") / ((1 << 64) - 1)
            if (rand >= 0.5):
                maximum_d_i[0,j] =   maximum_d_i[0,j] + antlions[ant_lion,j]
                maximum_d_e[0,j] =   maximum_d_e[0,j] + elite_antlion[0,j]
            else:
                maximum_d_i[0,j] = - maximum_d_i[0,j] + antlions[ant_lion,j]
                maximum_d_e[0,j] = - maximum_d_e[0,j] + elite_antlion[0,j]  
            x_random_walk = random_walk(iterations)
            e_random_walk = random_walk(iterations)    
            min_x, max_x = min(x_random_walk), max(x_random_walk)
            x_random_walk[count] = (((x_random_walk[count] - min_x)*(maximum_d_i[0,j] - minimum_c_i[0,j]))/(max_x - min_x)) + minimum_c_i[0,j]  
            min_e, max_e = min(e_random_walk), max(e_random_walk)
            e_random_walk[count] = (((e_random_walk[count] - min_e)*(maximum_d_e[0,j] - minimum_c_e[0,j]))/(max_e - min_e)) + minimum_c_e[0,j]    
            population[i,j] = (x_random_walk[count] + e_random_walk[count])/2
            #, min_values[j], max_values[j])        
    return population, antlions

cent = {}
for cluster in range(0,k):
    print('CLUSTER',cluster,': ')
    itr = 0
    ant={}
    antlion={}
    ant_length = int(len(classifications[cluster])/2)
    for i in range(0,ant_length):
        ant[i] = random.choice(classifications[cluster])
    count=0
    lion_length = int(len(classifications[cluster]))-ant_length
    for j in classifications[cluster]:
        if j not in ant.items() and count<lion_length:
            antlion[count] = j
        count = count+1
   
    antlion = np.array(list(antlion.values()))
    ant = np.array(list(ant.values()))
    #print('ANT',cluster,ant)
    #print('ANTLION',cluster,antlion)
   
    ant_fitness = []
    for a in ant:
        fit_a = np.linalg.norm(a-centroids[cluster])
        ant_fitness.append(fit_a)

    antlion_fitness = []
    for al in antlion:
        fit_al = np.linalg.norm(al-centroids[cluster])
        antlion_fitness.append(fit_al)

   
    #print(len(ant_fitness))
    #print('ANT FITNESS',cluster,ant_fitness)
   
    #print(len(antlion_fitness))
    #print('ANTLION FITNESS',cluster,antlion_fitness)
    minimum = antlion_fitness.index(min(antlion_fitness))
    #print(minimum)
    elite = antlion[minimum]
    #print('ELITE',cluster,elite)  
    elite_fitness= antlion_fitness[minimum] #fitness_function(elite,cluster)
    
    while(itr<max_iter):
            #call update_ants function here
        #print("Iteration = ", itr)  
        ants, antlions = update_ants(ant, antlion, count = itr, iterations = max_iter, cluster = cluster, antlion_fitness = antlion_fitness)
     
        ant_fitness = fitness_function(ants, cluster)
        antlion_fitness= fitness_function(antlions,cluster)
        #print(antlion_fitness)
        for a in range(0,len(ant_fitness)):
            for al in range(0,len(antlion_fitness)):
                if ant_fitness[a]<antlion_fitness[al]:
                    antlions[al]=ants[a]
                else:
                    pass
        for al in range(0,len(antlion_fitness)):
            if antlion_fitness[al]<elite_fitness:
                elite = antlion[al]
                elite_fitness = antlion_fitness[al]
            else:
                pass
            #population, antlions = combine(population, antlions)    
            #value = np.copy(antlion[antlion[:,-1].argsort()][0,:])
            #if(elite[-1] > value[-1]):
             #   elite = np.copy(value)
            #else:
             #   antlion[antlion[:,-1].argsort()][0,:] = np.copy(elite)  
        itr = itr + 1
    print("ELITE",cluster,elite)
    cent[cluster] = elite 


with open('antlionmodel.pickle','wb') as f:
    pickle.dump('Updated project code',f)
    
def pred(data):
        distances = [np.linalg.norm(data-cent[centroid]) for centroid in cent]
        classification = distances.index(min(distances))
        return classification
        
y = np.array(df['num'])
correct = 0

for i in range(len(X)):

    pred_me = X[i]
    pred_me = pred_me.reshape(-1, len(pred_me))
    predn = pred(pred_me)
    
    if predn == y[i]:
        correct += 1
print('\nAccuracy - ')
print((correct/len(X))*100)



CLUSTER 0 : 
ELITE 0 [-0.16340252 -0.6363297  -0.50824399 -0.67942694 -0.21618052 -0.23616435
  0.29488196  0.04960146 -0.5447477  -0.71584031 -0.50797508 -1.03451862
 -0.86542592]
CLUSTER 1 : 
ELITE 1 [ 0.55993519  0.34749275  1.21406282  0.32289204  0.6777733   0.05441335
  0.50153761 -1.82628982  0.86869147 -0.23980967  0.30332947  0.59681293
  1.16731868]

Accuracy - 
84.15841584158416
