In [3]:
import numpy as np
import pandas as pd
import pyswarms as ps
from sklearn.metrics import mean_squared_error
from sklearn import neighbors
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import multilabel_confusion_matrix, precision_recall_fscore_support, recall_score, precision_score, f1_score, confusion_matrix, accuracy_score
from sklearn import preprocessing

np.random.seed(42)

## Load data and labels ###

label = pd.read_csv('labels.csv')
data = pd.read_csv('data.csv')

#y =  label.Class.values
X = data.values[:,1:]

#Encode the variable : 

encode = preprocessing.LabelEncoder()
encode.fit(label.Class.unique())
y = encode.transform(label.Class.values)


### PSO function ###

def pso_feature_selection(X,y):

    ### Change this to NN here ####
    classifier = neighbors.KNeighborsClassifier(n_neighbors = 3)
    total_feat = X.shape[1]
    
    # Define objective function
    def f_per_particle(m):

        #total_features = total_feat
        # Get the subset of the features from the binary mask
        if np.count_nonzero(m) == 0:
            X_subset = X
        else:
            X_subset = X[:,m==1]
    # Perform classification and store performance in P
        classifier.fit(X_subset, y)

        # Compute for the objective function
        P = (classifier.score(X_subset,y))
        #j = (alpha * (1.0 - P)+ (1.0 - alpha) * (1 - (X_subset.shape[1] / total_features)))

        return P


    def f(x):
        n_particles = x.shape[0]
        j = [f_per_particle(x[i]) for i in range(n_particles)]
        return np.array(j)


    options = {'c1': 0.5, 'c2': 0.5, 'w':0.9, 'k': 5, 'p':2}

    # Call instance of PSO
    dimensions = total_feat # dimensions should be the number of features
    optimizer = ps.discrete.BinaryPSO(n_particles=10, dimensions=dimensions, options=options)

    # Perform optimization
    cost, pos = optimizer.optimize(f,iters=5,verbose=True)

    return pos


## Cross validation ###

KF = KFold(n_splits=5,shuffle=True)

for train_index, test_index in KF.split(X):
    # Split train-test
    x_train, x_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    #### Feature Selection

    pos = pso_feature_selection(x_test,y_test)

    x1_train = np.array(x_train[:,pos==1]) 
    x1_test = np.array(x_test[:,pos==1]) 

    ### Classification model (ADD NN to be fit here)###

    classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 42)
    classifier.fit(x1_train, y_train)

    y_pred = classifier.predict(x1_test)

    ##### Scoring #########

    print ("\nAccuracy on Training Set :")
    print (classifier.score(x1_train, y_train))

    #y_test = encode.inverse_transform(y_test)
    #y_pred = encode.inverse_transform(y_pred)

    print ("Checking on Test Set")
    print ("\nAccuracy on Testing Set :")
    print (accuracy_score(y_test,y_pred))

    print ("\nPrecision Score")
    print (precision_score(y_test, y_pred,average=None))
    print ("\nRecall Score")
    print (recall_score(y_test, y_pred,average=None))
    print ("\nF1 Score")
    print (f1_score(y_test, y_pred,average=None))


    ####### Confusion matrix ###########

    print(encode.transform(label.Class.unique()))

    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Confusion matrix :\n")
    print(conf_matrix)

    conf_matrix = multilabel_confusion_matrix(y_test, y_pred)
    print("Confusion matrix :\n")
    print(conf_matrix)

2021-01-06 05:44:53,456 - pyswarms.discrete.binary - INFO - Optimize for 5 iters with {'c1': 0.5, 'c2': 0.5, 'w': 0.9, 'k': 5, 'p': 2}
pyswarms.discrete.binary: 100%|██████████|5/5, best_cost=1
2021-01-06 05:45:30,219 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 1.0, best pos: [1 0 0 ... 0 0 0]



Accuracy on Training Set :
1.0
Checking on Test Set

Accuracy on Testing Set :
0.9937888198757764

Precision Score
[1.         1.         1.         1.         0.96666667]

Recall Score
[1.         1.         1.         0.96551724 1.        ]

F1 Score
[1.         1.         1.         0.98245614 0.98305085]
[4 3 0 2 1]
Confusion matrix :

[[61  0  0  0  0]
 [ 0 17  0  0  0]
 [ 0  0 25  0  0]
 [ 0  0  0 28  1]
 [ 0  0  0  0 29]]
Confusion matrix :

[[[100   0]
  [  0  61]]

 [[144   0]
  [  0  17]]

 [[136   0]
  [  0  25]]

 [[132   0]
  [  1  28]]

 [[131   1]
  [  0  29]]]


2021-01-06 05:45:33,711 - pyswarms.discrete.binary - INFO - Optimize for 5 iters with {'c1': 0.5, 'c2': 0.5, 'w': 0.9, 'k': 5, 'p': 2}
pyswarms.discrete.binary: 100%|██████████|5/5, best_cost=0.994
2021-01-06 05:46:10,964 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.99375, best pos: [0 1 1 ... 1 1 1]



Accuracy on Training Set :
1.0
Checking on Test Set

Accuracy on Testing Set :
0.9875

Precision Score
[1.         1.         1.         0.91666667 1.        ]

Recall Score
[0.98387097 1.         0.97142857 1.         1.        ]

F1 Score
[0.99186992 1.         0.98550725 0.95652174 1.        ]
[4 3 0 2 1]
Confusion matrix :

[[61  0  0  1  0]
 [ 0 17  0  0  0]
 [ 0  0 34  1  0]
 [ 0  0  0 22  0]
 [ 0  0  0  0 24]]
Confusion matrix :

[[[ 98   0]
  [  1  61]]

 [[143   0]
  [  0  17]]

 [[125   0]
  [  1  34]]

 [[136   2]
  [  0  22]]

 [[136   0]
  [  0  24]]]


2021-01-06 05:46:14,511 - pyswarms.discrete.binary - INFO - Optimize for 5 iters with {'c1': 0.5, 'c2': 0.5, 'w': 0.9, 'k': 5, 'p': 2}
pyswarms.discrete.binary: 100%|██████████|5/5, best_cost=1
2021-01-06 05:46:51,340 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 1.0, best pos: [1 1 1 ... 0 1 1]



Accuracy on Training Set :
1.0
Checking on Test Set

Accuracy on Testing Set :
0.98125

Precision Score
[0.95238095 1.         1.         1.         1.        ]

Recall Score
[1.  1.  1.  0.9 1. ]

F1 Score
[0.97560976 1.         1.         0.94736842 1.        ]
[4 3 0 2 1]
Confusion matrix :

[[60  0  0  0  0]
 [ 0 15  0  0  0]
 [ 0  0 25  0  0]
 [ 3  0  0 27  0]
 [ 0  0  0  0 30]]
Confusion matrix :

[[[ 97   3]
  [  0  60]]

 [[145   0]
  [  0  15]]

 [[135   0]
  [  0  25]]

 [[130   0]
  [  3  27]]

 [[130   0]
  [  0  30]]]


2021-01-06 05:46:54,812 - pyswarms.discrete.binary - INFO - Optimize for 5 iters with {'c1': 0.5, 'c2': 0.5, 'w': 0.9, 'k': 5, 'p': 2}
pyswarms.discrete.binary: 100%|██████████|5/5, best_cost=0.994
2021-01-06 05:47:31,628 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.99375, best pos: [0 1 1 ... 1 0 1]



Accuracy on Training Set :
1.0
Checking on Test Set

Accuracy on Testing Set :
0.98125

Precision Score
[0.96774194 0.94117647 1.         1.         1.        ]

Recall Score
[1.         1.         1.         0.90909091 1.        ]

F1 Score
[0.98360656 0.96969697 1.         0.95238095 1.        ]
[4 3 0 2 1]
Confusion matrix :

[[60  0  0  0  0]
 [ 0 16  0  0  0]
 [ 0  0 29  0  0]
 [ 2  1  0 30  0]
 [ 0  0  0  0 22]]
Confusion matrix :

[[[ 98   2]
  [  0  60]]

 [[143   1]
  [  0  16]]

 [[131   0]
  [  0  29]]

 [[127   0]
  [  3  30]]

 [[138   0]
  [  0  22]]]


2021-01-06 05:47:35,091 - pyswarms.discrete.binary - INFO - Optimize for 5 iters with {'c1': 0.5, 'c2': 0.5, 'w': 0.9, 'k': 5, 'p': 2}
pyswarms.discrete.binary: 100%|██████████|5/5, best_cost=1
2021-01-06 05:48:12,223 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 1.0, best pos: [0 0 1 ... 0 0 0]



Accuracy on Training Set :
1.0
Checking on Test Set

Accuracy on Testing Set :
0.975

Precision Score
[0.95       0.92307692 1.         1.         1.        ]

Recall Score
[1.         0.92307692 1.         0.92592593 0.96774194]

F1 Score
[0.97435897 0.92307692 1.         0.96153846 0.98360656]
[4 3 0 2 1]
Confusion matrix :

[[57  0  0  0  0]
 [ 1 12  0  0  0]
 [ 0  0 32  0  0]
 [ 1  1  0 25  0]
 [ 1  0  0  0 30]]
Confusion matrix :

[[[100   3]
  [  0  57]]

 [[146   1]
  [  1  12]]

 [[128   0]
  [  0  32]]

 [[133   0]
  [  2  25]]

 [[129   0]
  [  1  30]]]
