In [None]:
def pso_iris(particles, ds, n_iters):
    """
    
    particles: list of particles and their centroids
    ds: dataset to apply pso on
    n_iters: nr of iterations
    returns: global best error and global best position

    """
    
    ds = ds.to_numpy()
    
    omega = 0.72 # from paper
    alpha = 1.49 # from paper
    
    global_best_fitness = float("-inf")
    global_best_error = float("inf")
    global_best_pos = []
    personal_best_error = np.full((len(particles)), np.inf)
    personal_best_pos = np.zeros((len(particles), len(particles[0]), len(ds[0])))
    velocity = np.zeros((len(particles), len(particles[0]), len(ds[0])))
    
    min_quant_error = []
    
    for i in range(0, n_iters):
        # for each iteration, assign datapoints to cluster based on particle centroids 
        # and compute quantization error
        
        quantization_errors = []
        
        for particle in particles:
    
            cluster_0 = []
            cluster_1 = []
            cluster_2 = []
                        
            for datapoint in ds:
                # calculate euclidean distance particle centroid and datapoint
                
                dis0 = scipy.spatial.distance.euclidean(datapoint, particle[0])
                dis1 = scipy.spatial.distance.euclidean(datapoint, particle[1])
                dis2 = scipy.spatial.distance.euclidean(datapoint, particle[2])

                # assigning datapoints to clusters
                if dis0 < dis1 and dis0 < dis2:
                    cluster_0.append(datapoint)
                    
                if dis1 < dis0 and dis1 < dis2:
                    cluster_1.append(datapoint)
                    
                if dis2 < dis0 and dis2 < dis1:
                    cluster_2.append(datapoint)
                    
            clusters = [cluster_0, cluster_1, cluster_2]
            error = quantization_error(clusters, particle) 
            quantization_errors.append(error)
        
        # keeping track of min quantization error per iteration 
        min_quant_error.append(np.min(quantization_errors))
        
        # updating best error and position for update rule
        for idx, q_error in enumerate(quantization_errors):
            
            if q_error < personal_best_error[idx]:
                personal_best_error[idx] = q_error
                personal_best_pos[idx] = particles[idx]
            
            if personal_best_error[idx] < global_best_error:
                global_best_error = personal_best_error[idx]
                global_best_pos = np.array(particles[idx])
                
        # updating the velocity and particle position based on update rule from slides
        for idx, particle in enumerate(particles):
            velocity[idx], particles[idx] = update_velocity(particle, velocity[idx], personal_best_pos[idx], global_best_pos)
            
            
    return global_best_error, global_best_pos

In [None]:
def pso_ds1(particles, ds, n_iters):
    """
    particles: list of particles and their centroids
    ds: dataset to apply pso on
    n_iters: nr of iterations
    returns: global best error and global best position
    """
    ds = ds.to_numpy()
    
    global_best_fitness = float("-inf")
    global_best_error = float("inf")
    global_best_pos = []
    personal_best_error = np.ones(len(particles))
    personal_best_pos = np.zeros((len(particles), len(particles[0]), len(ds[0])))
    velocity = np.zeros((len(particles), len(particles[0]), len(ds[0])))
    
    min_quant_error = []
    
    for i in range(0, n_iters):
        # for each iteration, assign datapoints to cluster based on particle centroids 
        # and compute quantization error
        
        quantization_errors = []
        
        for particle in particles:
    
            cluster_0 = []
            cluster_1 = []
                        
            for datapoint in ds:
                # calculate euclidean distance particle centroid and datapoint
                dis0 = scipy.spatial.distance.euclidean(datapoint, particle[0])
                dis1 = scipy.spatial.distance.euclidean(datapoint, particle[1])

                # assigning datapoints to clusters
                if dis0 < dis1:
                    cluster_0.append(datapoint)
                else:
                    cluster_1.append(datapoint)
                    
            clusters = [cluster_0, cluster_1]
            error = quantization_error(clusters, particle)        
            quantization_errors.append(error)
            
        # keeping track of min quantization error per iteration 
        min_quant_error.append(np.min(quantization_errors))
        
        # updating best error and position for update rule
        for idx, q_error in enumerate(quantization_errors):
            
            if q_error < personal_best_error[idx]:
                personal_best_error[idx] = q_error
                personal_best_pos[idx] = particles[idx]
            
            if personal_best_error[idx] < global_best_error:
                global_best_error = personal_best_error[idx]
                global_best_pos = particles[idx]
                
        
        # updating the velocity and particle position based on update rule from slides
        for idx, particle in enumerate(particles):
            velocity[idx], particles[idx] = update_velocity(particle, velocity[idx], personal_best_pos[idx], global_best_pos)
            
    return global_best_error, global_best_pos

In [1]:
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import scipy

In [2]:
z1=np.random.uniform(-1,1,400)
z2=np.random.uniform(-1,1,400)

In [3]:
artificial_df=pd.DataFrame({"Z1" : z1,"Z2" : z2})

In [4]:
kmeans1 = KMeans(n_clusters=2, random_state=0).fit(artificial_df)

In [5]:
prediction_artificial_df=kmeans1.predict(artificial_df)

In [6]:
centroid1_A_df=kmeans1.cluster_centers_[0]
centroid2_A_df=kmeans1.cluster_centers_[1]


In [7]:
cluster1_A=[]
cluster2_A=[]
for i in range(0,len(artificial_df)):
    if prediction_artificial_df[i]==0:
        
        cluster1_A.append(artificial_df.loc[i])
        
    if prediction_artificial_df[i]==1:
        cluster2_A.append(artificial_df.loc[i])


In [8]:
def quantization_error2(cluster1,cluster2,centroid1,centroid2):
            
    distance1=0
    distance2=0


    for i in range(0,len(cluster1)):

        dist1=scipy.spatial.distance.euclidean(np.asarray(cluster1[i]), centroid1)
        distance1=distance1+dist1
    
    if len(cluster1)>0:
        distance1=distance1/len(cluster1)
    else:
        distance1=0

    for i in range(0,len(cluster2)):

        dist2=scipy.spatial.distance.euclidean(np.asarray(cluster2[i]), centroid2)
        distance2=distance2+dist2
    
    if len(cluster2)>0:
        distance2=distance2/len(cluster2)
    else:
        distance2=0


    QE=(distance1+distance2)/2
    ##print("Quantization error: ",QE)
    return QE

In [9]:
print ("The quantization error for the KMEANS algorithm for the artificial dataset is  ", quantization_error2(cluster1_A,cluster2_A,centroid1_A_df,centroid2_A_df))


The quantization error for the KMEANS algorithm for the artificial dataset is   0.5666574240638876


In [22]:
particles_list=[]
for i in range(0,10):
    particles=np.random.uniform(-1,1,4).reshape(2,2)
    particles_list.append(particles)

In [23]:
personal_best_error=np.ones(10)
global_best_position=[]
global_best_error=1
personal_best_position=np.zeros(40).reshape(10,2,2)

alpha=1.49618
w=0.7298
velocity=np.zeros(4).reshape(2,2) 
min_quant_error=[]
##for each iteration
for iteration in range(0,10):
    
    quantization_error=[] 
    count=0
    
    ##for each particles 
    for i in range(0,len(particles_list)):

        
        current_particle=particles_list[i]
        cluster1=[]
        cluster2=[]
        
       ##for each data point in the Artificial dataset
        for a in range(0,len(artificial_df)):
            
            
            ##we measure the euclidean distance from each centroid
            dist1=scipy.spatial.distance.euclidean(np.asarray(artificial_df.loc[a]), current_particle[0])
            dist2=scipy.spatial.distance.euclidean(np.asarray(artificial_df.loc[a]), current_particle[1])

            ##we assign each datapoint to a cluster based on the closest centroid
            if dist1<dist2:
                cluster1.append(artificial_df.loc[a])


            elif dist1>dist2:
                cluster2.append(artificial_df.loc[a])


        
        ##Once we have our cluster we compute the quantization error and we store it
        ## we will have a list with all our quantization error
        quantization_error.append(quantization_error2(cluster1,cluster2,current_particle[0],current_particle[1]))
#         print(quantization_error)

    min_quant_error.append(min(quantization_error))
    
    ##Then for each quantization error in the list
    for z in range(0,len(quantization_error)):
        count=count+1    
        
        ##if the quantization error of the current iteration is smaller than the personal best , we store it as new personal best
        if quantization_error[z]<personal_best_error[z]:
            personal_best_error[z]=quantization_error[z]
            ##and we store the corresponding particle as new best position
            personal_best_position[z]=particles_list[z]

        ##we also update the best global error if we find a new best 
        if personal_best_error[z]<global_best_error:
            global_best_error=personal_best_error[z]
            global_best_position=particles_list[z]
            
    ###print ("Particle list BEFORE, ", particles_list)
    
    
    
    
    ## then for each particle we update the position of the centroids based on the formula
    
    for i in range(0,len(particles_list)):
        
        ##I've disassembled the formula for clarity.

        

        ##we set the random r
        r1=np.random.uniform(0,1,1)
        r2=np.random.uniform(0,1,1)

        ##we compute the first term of the formula
        first_term=np.multiply(w,velocity)
        
        ##we compute the two multiplication of the alpha and the r
        alphaR1=alpha*r1
        alphaR2=alpha*r2
        
        #we copmute the 2 subtractions
        first_subtraction=np.subtract(personal_best_position[i],particles_list[i])
        second_subtraction=np.subtract(global_best_position,particles_list[i])

        #we compute the second term
        second_term=np.multiply(alphaR1,first_subtraction)
        
        #we compute the second term
        third_term=np.multiply(alphaR2,second_subtraction)
        
        ##we compute the velocity
        velocity=np.add(first_term,second_term)
        velocity=np.add(velocity,third_term)

        #we update the position of the centroids of the particles
        particles_list[i]=np.add(particles_list[i],velocity)
        
    ##print ("global best error is: ", global_best_error)
    ##print ("global best position: ", global_best_position)
    ##print ("personal best error: ", personal_best_error)
    ##print ("Particle list AFTER", particles_list)
    ##print("personal best position  ",personal_best_position )

    
print("After ",iteration," iteration , the global best error is: ", global_best_error, "best  swarn has positions: ", global_best_position)
print(min_quant_error)

After  9  iteration , the global best error is:  0.40708035270694104 best  swarn has positions:  [[1.3130555  2.27214181]
 [0.1950484  0.31202336]]
[0.6049142047129293, 0.40708035270694104, 0.42496899891610723, 0.47749938471237924, 1.181324312959327, 3.733493605510646, 5.066579945080508, 17.300274202699264, 49.09387909738464, 172.65453170890152]
