Importing relevant Libraries

In [3]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from SCA.sca import jfs
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, roc_auc_score,f1_score


# load data
data  = pd.read_csv('pc1_scaled.csv')
data  = data.values
feat  = np.asarray(data[:, 0:-1])
label = np.asarray(data[:, -1])

acc_results = []
feature_results = []
feature_results_col =[]
roc_all = []
f1_scores=[]
t_elapsed =[]

for i in range(30):
    # split data into train & validation (70 -- 30)
    import time
    t1= time.time()
    x_train, x_test, y_train, y_test = train_test_split(feat, label, test_size=0.3, stratify=label)
    fold = {'xt':x_train, 'yt':y_train, 'xv':x_test, 'yv':y_test}
    
    # parameter
    k    = 5     # k-value in KNN
    N    = 10    # number of particles
    T    = 150      #maximum number of iterations
    opts = {'k':k, 'fold':fold, 'N':N, 'T':T}
    
    # perform feature selection
    fmdl = jfs(feat, label, opts)
    sf   = fmdl['sf']
    
    # model with selected features
    num_train = np.size(x_train, 0)
    num_valid = np.size(x_test, 0)
    x_train   = x_train[:, sf]
    y_train   = y_train.reshape(num_train)  # Solve bug
    x_valid   = x_test[:, sf]
    y_valid   = y_test.reshape(num_valid)  # Solve bug
    
    mdl       = KNeighborsClassifier(n_neighbors=7) 
    mdl.fit(x_train, y_train)
    
    # accuracy
    y_pred    = mdl.predict(x_valid)
    y_pred_proba =    mdl.predict_proba(x_valid).T[1]
    roc =   roc_auc_score(y_valid,y_pred_proba)
    Acc       = np.sum(y_valid == y_pred)  / num_valid
    f1 = f1_score(y_valid,y_pred)
    print("Accuracy:", 100 * Acc)
    acc_results.append(Acc)
    roc_all.append(roc)
    f1_scores.append(f1)
    # number of selected features
    num_feat = fmdl['nf']
    print("Feature Size:", num_feat)
    feature_results.append(num_feat)
    print(sf)
    feature_results_col.append(sf)
    
    # plot convergence
    curve   = fmdl['c']
    curve   = curve.reshape(np.size(curve,1))
    x       = np.arange(0, opts['T'], 1.0) + 1.0
    
    fig, ax = plt.subplots()
    ax.plot(x, curve, 'o-')
    ax.set_xlabel('Number of Iterations')
    ax.set_ylabel('Fitness')
    ax.set_title('PSO')
    ax.grid()
    plt.show()
    time.sleep(1)
    t2= time.time()
    t_elap = t2-t1
    t_elapsed.append(t_elap)
    i=i+1
df_acc =      pd.DataFrame(acc_results)
df_feature=      pd.DataFrame(feature_results)
df_cols=    pd.DataFrame(feature_results_col)
df_roc = pd.DataFrame(roc_all)
df_f1 = pd.DataFrame(f1_scores)
df_time = pd.DataFrame(t_elapsed)
writer = pd.ExcelWriter('SCA15030.xlsx', engine='xlsxwriter')
df_acc.to_excel(writer, sheet_name='acc', index=False)
df_feature.to_excel(writer, sheet_name='fe', index=False)
df_cols.to_excel(writer, sheet_name='co', index=False)
df_roc.to_excel(writer,sheet_name='roc',index = False)
df_f1.to_excel(writer,sheet_name ='f1',index= False)
df_time.to_excel(writer, sheet_name = 'time',index =False)
writer.save()

FileNotFoundError: [Errno 2] No such file or directory: 'pc1_scaled.csv'

In [None]:
data  = pd.read_csv('pc1_scaled.csv')
data  = data.values
feat  = np.asarray(data[:, 0:-1])
label = np.asarray(data[:, -1])
