# Particle Swarm Optimization (PSO) implementation for training the neural network

In [411]:
# We calculate the constants related to the quantizer and the distortions.

# 79-character line limit
######### ######### ######### ######### ######### ######### ######### #########

%reset -f

import numpy as np # The NumPy library
#import math # The math module, np includes it
from scipy.integrate import quad # Method for integration in scipy.integrate sub-package


cells_num = 128
# The opitimized spaces for a Guassian distribution in a uniform quantizer(UQ).
# The sizes are for UQs for number of cells: 4, 8, 16, 32, 64, 128, 256.
all_size_gaus = np.array([0.9957, 0.5860, 0.3352, 0.1881, 0.1041, 0.0569, 0.0308])

cell_size = all_size_gaus[int(np.log2(cells_num))-2]

# np.arange has rounding error issue.
# boundaries_symm = np.arange(-(cells_num/2-1) * cell_size, # For symmetric distributions
#                              (cells_num/2) * cell_size,
#                             cell_size) 

boundaries_symm = np.linspace(-(cells_num/2-1) * cell_size, # For symmetric distributions
                               (cells_num/2-1) * cell_size,
                              cells_num-1)
n_inf = float("-inf")
p_inf = float("inf")

boundaries_symm = np.insert(boundaries_symm, 0, n_inf)
boundaries_symm = np.append(boundaries_symm, p_inf)

def pdf(x): # Defining the distribution
    gaus_std = 1
    gaus_mean = 0
    pdf = 1/(gaus_std*np.sqrt(2*np.pi)) * \
                  np.exp(-0.5*((x-gaus_mean)/gaus_std)**2) # Gaussian pdf
    return pdf

def xpdf(x):
    xpdf = x * pdf(x)
    return xpdf

def x2pdf (x):
    x2pdf = x * xpdf(x)
    return x2pdf

prbs = []
xprbs = []
x2prbs = []
cell_reps = []
for i in range (0, cells_num):
    cell_prb, integ_err = quad(pdf, boundaries_symm[i], boundaries_symm[i+1])
    prbs = np.append(prbs, cell_prb)
    
    cell_xprb, integ_err = quad(xpdf, boundaries_symm[i], boundaries_symm[i+1])
    xprbs = np.append(xprbs, cell_xprb)
    
    cell_x2prb, integ_err = quad(x2pdf, boundaries_symm[i], boundaries_symm[i+1])
    x2prbs = np.append(x2prbs, cell_x2prb)
    
    cell_rep = cell_xprb / cell_prb
    cell_reps = np.append(cell_reps, cell_rep)

In [416]:
l1_size = 128
particles_num = 12800

# We initialize the particles.
W1 = np.random.randn(particles_num, l1_size, cells_num)
B1 = np.random.randn(particles_num, l1_size)
W2 = np.random.randn(particles_num, cells_num, l1_size)

In [None]:
# We define the activation functions.
def relu(x):
    return np.maximum(0,x)

def lrelu(x):
    return np.where(x > 0, x, x * 0.1)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

distortions = []
for i in range (particles_num):
    l1_outs = relu(np.dot(W1[i], cell_reps)+B1[i])
    l2_outs = np.dot(W2[i], l1_outs)
    l2_outs = np.round(l2_outs, 16)

    # We assigned the indexes based on the final layer outputs.
    sorted_outs = np.sort(l2_outs)
    indexes = []
    for i in range(0, cells_num):
        index = np.where(sorted_outs == l2_outs[i])
        indexes = np.append(indexes, index)

#     print(indexes)

    yijs = [] # Yijs based on the assigned indexes
    dijs = []
    for i in range(0, int(cells_num/2)):
        j = i + cells_num/2
        celli = np.where(indexes == i)
        cellj = np.where(indexes == j)

        yij = (xprbs[celli] + xprbs[cellj])/(prbs[celli] + prbs[cellj])
        yijs = np.append(yijs, yij)

        dij = x2prbs[celli] + yij**2 * prbs[celli] - yij *2 * xprbs[celli]\
            + x2prbs[cellj] + yij**2 * prbs[cellj] - yij *2 * xprbs[cellj]
        dijs = np.append(dijs, dij)

    distortion = np.round(sum(dijs), 16) # The Eve's distortion based on the assigned indexes

    distortions = np.append(distortions, distortion)
    
# print(distortions)
print(np.max(distortions))

gb_pos = np.argmin(-distortions) # Global best particle
gb_value = -distortions[gb_pos]

gbW1 = W1[gb_pos]
gbB1 = B1[gb_pos]
gbW2 = W2[gb_pos]

pbW1 = W1
pbB1 = B1
pbW2 = W2

pb_value = -distortions

last_VW1 = np.zeros([particles_num, l1_size, cells_num])
last_VB1 = np.zeros([particles_num, l1_size])
last_VW2 = np.zeros([particles_num, cells_num, l1_size])

tW1 = W1.copy()
tB1 = B1.copy()
tW2 = W2.copy()

0.6302091486928542


In [410]:
######### ######### ######### ######### ######### ######### ######### #########
iterations = 1000
# inertia_weight = 0.9
# c1 = 2 # cognitive_weight
# c2 = 2 # global_weight

# r1 = np.random.random()
# r2 = np.random.random()

VW1 = np.zeros([particles_num, l1_size, cells_num])
VB1 = np.zeros([particles_num, l1_size])
VW2 = np.zeros([particles_num, cells_num, l1_size])

gb_values =[]

rond = 1.5
sigma = 4
delta1 = 2.5
delta2 = 0.5

phi = 0.666
tau = 0.6

for m in range(0, iterations):
    c1 = -rond * np.arctan(m/iterations*sigma)+delta1
    c2 =  rond * np.arctan(m/iterations*sigma)+delta2
    
    inertia_weight = phi * np.cos((m/iterations)*np.pi)+tau
#     print(c1)
#     print(c2)
#     print(inertia_weight)
    
    for n in range(0, particles_num):
        r1 = np.random.random()
        r2 = np.random.random()
               

        # We compute the new position.
        VW1[n] = inertia_weight * last_VW1[n] + (c1 * r1 * (pbW1[n]-tW1[n]))\
             + (c2 * r2 * (gbW1-tW1[n]))
        tW1[n] = tW1[n] + VW1[n]
        last_VW1[n] = VW1[n] 
                 
        VB1[n] = (inertia_weight * last_VB1[n]) + (c1 * r1 * (pbB1[n]-tB1[n]))\
              + (c2 * r2 * (gbB1 - tB1[n]))
        tB1[n] = tB1[n] + VB1[n]
        last_VB1[n] = VB1[n]
                 
        VW2[n] = (inertia_weight * last_VW2[n]) + (c1 * r1 * (pbW2[n]-tW2[n]))\
              + (c2 * r2 * (gbW2 - tW2[n]))
        tW2[n] =tW2[n] + VW2[n]
        last_VW2[n] = VW2[n] 
        
        l1_outs = []
        l2_outs = []
        # Now we compute the new distortion.
        l1_outs = relu(np.dot(tW1[n], cell_reps)+tB1[n])
        l2_outs = np.dot(tW2[n], l1_outs)
        l2_outs = np.round(l2_outs, 16)
        
        for p in range(0, cells_num-1):
            for q in range (p+1, cells_num):
                if l2_outs[p] == l2_outs[q]:
                    l2_outs[q] += np.random.rand(1,1)-0.5

        sorted_outs = np.sort(l2_outs)
        indexes = []
        for i in range(0, cells_num):
            index = np.where(sorted_outs == l2_outs[i])
            indexes = np.append(indexes, index)

        yijs = [] # Yijs based on the assigned indexes
        dijs = []
        for i in range(0, int(cells_num/2)):
            j = i + cells_num/2
            celli = np.where(indexes == i)
            cellj = np.where(indexes == j)

            yij = (xprbs[celli] + xprbs[cellj])/(prbs[celli] + prbs[cellj])
            yijs = np.append(yijs, yij)

            dij = x2prbs[celli] + yij**2 * prbs[celli] - yij *2 * xprbs[celli]\
                + x2prbs[cellj] + yij**2 * prbs[cellj] - yij *2 * xprbs[cellj]
            dijs = np.append(dijs, dij)

        distortion = np.round(sum(dijs), 16) # The Eve's distortion based on the assigned indexes
                         
        if -distortion < pb_value[n]:
            pbW1[n] = W1[n]
            pbB1[n] = B1[n]
            pbW2[n] = W2[n]
            pb_value[n] = -distortion
        
        if -distortion < gb_value:
            gbW1 = W1[n]
            gbB1 = B1[n]
            gbW2 = W2[n]
            gb_value = -distortion
            gb_values = np.append(gb_values, gb_value)
            best_indexes = indexes
            
            print(gb_value)        
        #print(distortion)
#         if distortion >= 1:
#             print(indexes)

# print(gb_values)
print(best_indexes)

[15. 12.  3.  9.  2.  8. 13.  6.  7.  1.  0. 10.  5. 11. 14.  4.]
