## Import Libraries

In [94]:
import pandas as pd
from sklearn.model_selection import train_test_split 

## Load Dataset

In [95]:
data = pd.read_csv('data/Swarm_Behaviour.csv')

In [96]:
data.isnull().sum()

x1                 0
y1                 0
xVel1              0
yVel1              0
xA1                0
                  ..
xC200              0
yC200              0
nAC200             0
nS200              0
Swarm_Behaviour    0
Length: 2401, dtype: int64

In [93]:
data.fillna(0, inplace = True)

In [101]:
data.head()

Unnamed: 0,x1,y1,xVel1,yVel1,xA1,yA1,xS1,yS1,xC1,yC1,...,yVel200,xA200,yA200,xS200,yS200,xC200,yC200,nAC200,nS200,Swarm_Behaviour
0,562.05,-0.62,-10.7,-4.33,0.0,0.0,0.0,0.0,0.0,0.0,...,-15.15,0.0,0.0,0.0,0.0,0.0,0.0,28,0,0.0
1,175.66,-57.09,2.31,-2.67,0.0,0.0,0.0,0.0,0.0,0.0,...,-3.48,0.0,0.0,0.0,0.0,0.0,0.0,4,0,0.0
2,200.16,-320.07,4.01,-6.37,0.0,0.0,0.0,0.0,0.18,-0.26,...,-9.38,0.0,0.0,0.0,0.0,-0.11,-0.3,15,1,0.0
3,316.99,-906.84,0.85,9.17,-0.17,1.03,0.0,0.0,0.0,0.0,...,10.39,-0.26,1.01,0.0,0.0,0.0,0.0,16,0,0.0
4,1277.68,908.54,-2.02,8.23,-1.0,1.0,0.0,0.0,0.0,0.0,...,13.91,-1.0,0.0,3.21,15.67,0.0,0.0,12,0,0.0


In [99]:
data['Swarm_Behaviour'].describe()

count    23309.000000
mean         0.341242
std          0.474136
min          0.000000
25%          0.000000
50%          0.000000
75%          1.000000
max          1.000000
Name: Swarm_Behaviour, dtype: float64

## Define Target and Feature Variables

In [103]:
#feature variables
X = data.drop(['Swarm_Behaviour'], axis=1).values
X

sc = StandardScaler()
X = sc.fit_transform(X)

#target variable
y = data['Swarm_Behaviour'].values
y

array([0., 0., 0., ..., 0., 0., 0.])

## Standard Scaler

In [104]:
from sklearn.preprocessing import StandardScaler

## Train-Test Split

In [105]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [106]:
X_train.shape

(18647, 2400)

## Logistic Regression

In [108]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


lr = LogisticRegression(solver='lbfgs', max_iter=10000)
lr.fit(X_train, y_train)
LogisticRegression()
y_pred = lr.predict(X_test)

accuracy_score(y_pred, y_test)

0.868940368940369

## DFO Algorithm

In [None]:
import numpy as np

# FITNESS FUNCTION (SPHERE FUNCTION)
def f(x): # x IS A VECTOR REPRESENTING ONE FLY
    sum = 0.0
    for i in range(len(x)):
        sum = sum + np.power(x[i],2)
    return sum

N = X_train.shape[0] # POPULATION SIZE
D = X_train.shape[1] # DIMENSIONALITY 
delta = 0.001 # DISTURBANCE THRESHOLD 
maxIterations = 100 # ITERATIONS ALLOWED
lowerB = [-5.12]*D # LOWER BOUND (IN ALL DIMENSIONS)
upperB = [ 5.12]*D # UPPER BOUND (IN ALL DIMENSIONS)

# INITIALISATION PHASE
A = X_train # EMPTY FLIES ARRAY OF SIZE: (N,D)
fitness = [None]*N  # EMPTY FITNESS ARRAY OF SIZE N

# INITIALISE FLIES WITHIN BOUNDS
# for i in range(N):
#     for d in range(D):
#         X[i,d] = np.random.uniform(lowerB[d], upperB[d])

# MAIN DFO LOOP
for itr in range (maxIterations):
    for i in range(N): # EVALUATION
        fitness[i] = f(A[i,])
    s = np.argmin(fitness) # FIND BEST FLY

    if (itr%10 == 0): # PRINT BEST FLY EVERY 100 ITERATIONS
        print ("Iteration:", itr, "\tBest fly index:", s, 
               "\tFitness value:", fitness[s])

    # TAKE EACH FLY INDIVIDUALLY 
    for i in range(N): 
        if i == s: continue # ELITIST STRATEGY

        # FIND BEST NEIGHBOUR
        left = (i-1)%N
        right = (i+1)%N
        bNeighbour = right if fitness[right]<fitness[left] else left

        for d in range(D): # UPDATE EACH DIMENSION SEPARATELY 
            if (np.random.rand() < delta):
                A[i,d] = np.random.uniform(lowerB[d], upperB[d])
                continue;

            u = np.random.rand()
            A[i,d] = A[bNeighbour,d] + u*(A[s,d] - A[i,d])

            # OUT OF BOUND CONTROL
            if A[i,d] < lowerB[d] or A[i,d] > upperB[d]:
                A[i,d] = np.random.uniform(lowerB[d], upperB[d])

for i in range(N): fitness[i] = f(A[i,]) # EVALUATION
s = np.argmin(fitness) # FIND BEST FLY

print("\nFinal best fitness:\t", fitness[s])
print("\nBest fly position:\n",  A[s,])

Iteration: 0 	Best fly index: 10207 	Fitness value: 719.007938324525
Iteration: 10 	Best fly index: 7335 	Fitness value: 501.9478362488423
Iteration: 20 	Best fly index: 15337 	Fitness value: 492.2800668729308
Iteration: 30 	Best fly index: 163 	Fitness value: 489.19188242871184
Iteration: 40 	Best fly index: 162 	Fitness value: 489.14886684187337
Iteration: 50 	Best fly index: 163 	Fitness value: 488.7702118640765
Iteration: 60 	Best fly index: 12723 	Fitness value: 485.1317763568461
Iteration: 70 	Best fly index: 12723 	Fitness value: 485.1317763568461


In [66]:
fitness[s]

55.33182535516758

In [67]:
len(fitness)

1253

In [68]:
X_train.shape[0]

1253

In [69]:
A[s,].shape

(590,)

In [70]:
dim = np.size(X_train, 1)

In [71]:
dim

590

In [72]:
def binary_conversion(X, thres, N, dim):
    Xbin = np.zeros([N, dim], dtype='int')
    for i in range(N):
        for d in range(dim):
            if X[i,d] > thres:
                Xbin[i,d] = 1
            else:
                Xbin[i,d] = 0
    
    return Xbin

In [73]:
def Fun(xtrain, ytrain, x):
    # parameters
    cost = 0
    alpha = 0.99
    beta = 1 - alpha
    # original feature size
    max_feat = len(x)
    # Number of selected features
    num_feat = np.sum(x == 1)
    # Solve if no feature selected
    if num_feat == 0:
        cost = 1
    else:
        cost = 0
    
    return cost


In [74]:

fit   = np.zeros([N, 1], dtype='float')
Xgb   = np.zeros([1, dim], dtype='float')
fitG  = float('inf')
Xpb   = np.zeros([N, dim], dtype='float')
fitP  = float('inf') * np.ones([N, 1], dtype='float')
curve = np.zeros([1, maxIterations], dtype='float') 
t     = 0

In [75]:
new_arr = []
for i in range(len(fitness)):
    new_arr.append([fitness[i]])

z_fitness = np.array(new_arr)
z_fitness.shape

(1253, 1)

In [76]:
thres = 0.5

In [77]:
while t < maxIterations:
    # Binary conversion
    Xbin = binary_conversion(X, thres, N, dim)

    # Fitness
    for i in range(N):
        fit[i,0] = z_fitness[i,0]
        if fit[i,0] < fitP[i,0]:
            Xpb[i,:]  = X[i,:]
            fitP[i,0] = fit[i,0]
        if fitP[i,0] < fitG:
            Xgb[0,:]  = Xpb[i,:]
            fitG      = fitP[i,0]
#     for d in range(D): # UPDATE EACH DIMENSION SEPARATELY 
#         if (np.random.rand() < delta):
#             X[i,d] = np.random.uniform(lowerB[d], upperB[d])
#             continue;

#         u = np.random.rand()
#         X[i,d] = X[bNeighbour,d] + u*(X[s,d] - X[i,d])

#         # OUT OF BOUND CONTROL
#         if X[i,d] < lowerB[d] or X[i,d] > upperB[d]:
#             X[i,d] = np.random.uniform(lowerB[d], upperB[d])
    t += 1

In [78]:
len(fitness)

1253

In [79]:
fit[i,0]

120.37401821820235

In [80]:
Xgb

array([[ 0.13020517, -0.3725489 , -0.21266106, -0.4606115 , -0.06023932,
         0.09494635,  0.56809714,  0.26335882, -0.41903682, -1.24224413,
        -0.45684648,  0.07315341, -0.18449544,  0.        ,  0.88929482,
        -0.21331804,  0.43523039,  0.27448503, -0.27168226,  0.11679263,
        -0.80412219, -0.20762563, -0.23807269, -0.05074505, -0.2958806 ,
         0.67205534,  0.20091175,  0.57160983,  1.15658569, -0.49784617,
         0.07973527, -0.37519268, -0.58298538, -0.04819046, -0.18718787,
        -0.17034609,  0.22665083, -0.15832674,  0.0967582 , -0.24605976,
         0.53174385,  0.03923908,  0.02526993, -0.44754238,  0.7274168 ,
        -1.40004547, -0.93609401,  1.05711763,  1.29601137,  0.02526993,
        -0.52068902,  1.05102779,  0.        , -0.07661556, -0.10016968,
         0.04862659,  0.16726831,  0.07296464,  0.55989579,  0.05279572,
        -0.23774758,  0.30893176, -0.7085948 ,  0.1209418 , -0.75713361,
        -0.46099197, -0.33518447, -0.05109127,  0.4

In [81]:
fitness[s]

55.33182535516758

In [82]:
# Best feature subset
Gbin       = binary_conversion(Xgb, thres, 1, dim) 
Gbin       = Gbin.reshape(dim)
pos        = np.asarray(range(0, dim))    
sel_index  = pos[Gbin == 1]
num_feat   = len(sel_index)
# Create dictionary
opt_data = {'sf': sel_index, 'nf': num_feat}

In [83]:
print(Gbin)
print(pos)
print(sel_index)
print(num_feat)

[0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 0
 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 0 1 1 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1
 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 1 0 0 0 1 1 1 0 

In [84]:
opt_data['sf']

array([  6,  14,  25,  27,  28,  40,  44,  47,  48,  51,  58,  72,  73,
        77,  80,  81,  88, 100, 102, 105, 106, 115, 119, 126, 128, 142,
       143, 150, 159, 166, 167, 170, 172, 174, 175, 176, 212, 215, 218,
       219, 225, 239, 250, 255, 269, 278, 294, 301, 302, 303, 305, 307,
       309, 310, 311, 350, 353, 356, 357, 363, 365, 366, 377, 388, 393,
       416, 437, 438, 441, 443, 445, 446, 447, 480, 485, 489, 490, 491,
       497, 499, 500, 522, 527, 541, 550, 553, 556, 559, 560, 561, 563,
       564, 565, 566, 567, 568, 569, 571, 578, 579, 580])

In [85]:
new_data = data.iloc[:,opt_data['sf']]

In [86]:
new_data

Unnamed: 0,5,13,24,26,27,39,43,46,47,50,...,563,564,565,566,567,568,570,577,578,579
0,100.0,0.0,751.00,1.7730,3.0490,117.5132,352.7173,723.3092,1.3072,624.3145,...,0.0000,0.00,0.0000,0.0000,0.0000,0.0000,533.8500,14.9509,0.0000,0.0000
1,100.0,0.0,-1640.25,2.0143,7.3900,118.1188,352.2445,724.8264,1.2887,631.2618,...,0.0000,0.00,0.0000,0.0000,0.0000,0.0000,535.0164,10.9003,0.0096,0.0201
2,100.0,0.0,-1916.50,2.0295,7.5788,118.6128,364.3782,734.7924,1.2992,637.2655,...,0.9032,1.10,0.6219,0.4122,0.2562,0.4119,535.0245,9.2721,0.0584,0.0484
3,100.0,0.0,-1657.25,2.0038,7.3145,117.0442,363.0273,733.8778,1.3027,637.3727,...,0.6511,7.32,0.1630,3.5611,0.0670,2.7290,530.5682,8.5831,0.0202,0.0149
4,100.0,0.0,117.00,1.9912,7.2748,121.4364,353.3400,789.7523,1.0341,667.7418,...,0.0000,0.00,0.0000,0.0000,0.0000,0.0000,532.0155,10.9698,0.0000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,100.0,0.0,356.00,1.9540,7.0793,120.5156,369.9009,739.6150,1.0736,636.6018,...,0.5671,4.98,0.0877,2.0902,0.0382,1.8844,536.3418,11.7256,0.0068,0.0138
1563,100.0,0.0,339.00,1.8023,5.1515,120.2552,351.4055,745.4058,0.8566,635.3809,...,0.6254,4.56,0.1308,1.7420,0.0495,1.7089,537.9264,17.8379,0.0000,0.0000
1564,100.0,0.0,-1226.00,1.9435,7.2315,117.8912,350.3145,724.8053,1.3337,628.0573,...,0.8209,11.09,0.2388,4.4128,0.0965,4.3197,530.3709,17.7267,0.0197,0.0086
1565,100.0,0.0,394.75,1.9880,7.3255,118.9288,370.5845,738.7740,1.0855,639.9136,...,0.5671,4.98,0.0877,2.0902,0.0382,1.8844,534.3936,19.2104,0.0262,0.0245


## Logistic Regression after Optimisation

In [89]:
#feature variables
X = new_data.values #.drop(['Time'], axis=1)
X

#target variable
y = data['Swarm_Behaviour'].values
y

sc = StandardScaler()
X = sc.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

print("Shape of X_train:", X.shape)
print("Shape of y_train:", y.shape)

lr1 = LogisticRegression(solver='lbfgs', max_iter=1000)
lr1.fit(X_train, y_train)
LogisticRegression()
y_pred = lr1.predict(X_test)

accuracy_score(y_pred, y_test)

Shape of X_train: (1567, 101)
Shape of y_train: (1567,)


0.9331210191082803