In [1]:
import pandas as pd
from scipy.io import arff
import matplotlib.pyplot as plt 
import numpy as np 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn import tree
from sklearn.metrics import confusion_matrix
import seaborn as sn
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
from collections import Counter
from sklearn.metrics import f1_score
from sklearn.cluster import KMeans
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
from pyclustering.cluster.xmeans import xmeans, splitting_type
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.feature_selection import SelectFromModel
from ensemble_models import adaBoost
from ensemble_models import bagging
from implicitModel import Implicit_ME
from explicitModel import Explicit_ME

In [2]:
def DecisionTree(X_train,y_train,X_test,y_test):
    clf = DecisionTreeClassifier(criterion = "entropy", splitter = "random").fit(X_train,y_train)
    y_pred_test = clf.predict(X_test)
    
    #matrices
    fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_test)
    auc = metrics.roc_auc_score(y_test,  y_pred_test)
    result = {
      "Accuracy": metrics.accuracy_score(y_test, y_pred_test),
      "F1_score": f1_score(y_test, y_pred_test, average='macro'),
      "AUC_score": auc,
      "Prediction": y_pred_test,
      "MCC" : metrics.matthews_corrcoef(y_test,  y_pred_test)
    }
    return result

In [3]:
def SVM(X_train,y_train,X_test,y_test):
    clf = svm.SVC().fit(X_train,y_train)
    y_pred_test = clf.predict(X_test)
    
    #matrices
    fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_test)
    auc = metrics.roc_auc_score(y_test,  y_pred_test)
    result = {
      "Accuracy": metrics.accuracy_score(y_test, y_pred_test),
      "F1_score": f1_score(y_test, y_pred_test, average='macro'),
      "AUC_score": auc,
      "Prediction": y_pred_test,
      "MCC" : metrics.matthews_corrcoef(y_test,  y_pred_test)
      
    }
    return result

In [4]:
def KNN(X_train,y_train,X_test,y_test):
    clf = KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)
    y_pred_test = clf.predict(X_test)
    
    #matrices
    fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_test)
    auc = metrics.roc_auc_score(y_test,  y_pred_test)
    result = {
      "Accuracy": metrics.accuracy_score(y_test, y_pred_test),
      "F1_score": f1_score(y_test, y_pred_test, average='macro'),
      "AUC_score": auc,
      "Prediction": y_pred_test,
      "MCC" : metrics.matthews_corrcoef(y_test,  y_pred_test)
    }
    return result

In [5]:
def Logistic(X_train,y_train,X_test,y_test):
    clf = LogisticRegression(max_iter=100,penalty = 'none').fit(X_train,y_train)
    y_pred_test = clf.predict(X_test)
    
    #matrices
    fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_test)
    auc = metrics.roc_auc_score(y_test,  y_pred_test)
    result = {
      "Accuracy": metrics.accuracy_score(y_test, y_pred_test),
      "F1_score": f1_score(y_test, y_pred_test, average='macro'),
      "AUC_score": auc,
      "Prediction": y_pred_test,
      "MCC" : metrics.matthews_corrcoef(y_test,  y_pred_test)
      
    }
    return result

In [6]:
def gate_ntw(X_train,y_train,X_test,y_test):
    gateDT = DecisionTreeClassifier(criterion = "entropy", splitter = "best").fit(X_train,y_train)
    gateSVM = svm.SVC(probability=True).fit(X_train,y_train)
    gateKNN = KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)
    gateLOG = LogisticRegression(max_iter=10000).fit(X_train,y_train)
    voting_clf = VotingClassifier(
    estimators=[('DecisionTree',gateDT), ('SVM',gateSVM),('KNN',gateKNN),('Logistic',gateLOG)],voting='soft')
    voting_clf.fit(X_train, y_train)
    final_predictions = voting_clf.predict(X_test)
    result = {
      "Accuracy": metrics.accuracy_score(y_test, final_predictions),
      "F1_score": f1_score(y_test, final_predictions, average='macro'),
      "AUC_score": metrics.roc_auc_score(y_test,  final_predictions),
      "Prediction": final_predictions,
      "MCC" : metrics.matthews_corrcoef(y_test,  final_predictions)
    }
    return result
    

In [7]:
def main(data,name):
    
    #Data Preprocessing
    df = pd.DataFrame(data[0])
    X= df.iloc[ : , :-1].values
    y= df['defects'].apply(lambda x : 1 if(x > 0) else 0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    st_x= StandardScaler()  
    X_train= st_x.fit_transform(X_train) 
    X_test= st_x.transform(X_test) 
    sel_ = SelectFromModel(LogisticRegression(C=1, penalty='l1', solver='liblinear'))
    sel_.fit(X_train, y_train)
    sel_.get_support()
    X_train = sel_.transform(X_train)
    X_test = sel_.transform(X_test)
    oversample = SMOTE()
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    
    #Models
    experts = [DecisionTree,KNN,SVM,Logistic]
    impME = Implicit_ME(X_train,y_train,X_test,y_test,2,experts,gate_ntw)
    expME = Explicit_ME(X_train,y_train,X_test,y_test,experts,gate_ntw)
    dt = DecisionTree(X_train,y_train,X_test,y_test)
    svm = SVM(X_train,y_train,X_test,y_test)
    knn = KNN(X_train,y_train,X_test,y_test)
    log = Logistic(X_train,y_train,X_test,y_test)
    bag = bagging(X_train,y_train,X_test,y_test)
    ada = adaBoost(X_train,y_train,X_test,y_test)
    
    #results
    acc = [name,dt["Accuracy"],svm["Accuracy"],knn["Accuracy"],log["Accuracy"],bag["Accuracy"],ada["Accuracy"],impME["Accuracy"],expME["Accuracy"]]
    f1 = [name,dt["F1_score"],svm["F1_score"],knn["F1_score"],log["F1_score"],bag["F1_score"],ada["F1_score"],impME["F1_score"],expME["F1_score"]]
    auc = [name,dt["AUC_score"],svm["AUC_score"],knn["AUC_score"],log["AUC_score"],bag["AUC_score"],ada["AUC_score"],impME["AUC_score"],expME["AUC_score"]]
    mcc = [name,dt["MCC"],svm["MCC"],knn["MCC"],log["MCC"],bag["MCC"],ada["MCC"],impME["MCC"],expME["MCC"]]
    res = {
        "acc" : acc,
        "f1" : f1,
        "auc" : auc,
        "mcc" : mcc
    }
    return res

In [8]:
acc = []
f1 = []
auc = []
mcc = []

data = arff.loadarff('../dataSet/PROMISE/ivy-2.0.arff')
res = main(data,"PROMISE-IVY")
acc.append(res["acc"])
f1.append(res["f1"])
auc.append(res["auc"])
mcc.append(res["mcc"])
print("acc", acc)
print("f1", f1)
print("auc", auc)
print("mcc", mcc)
DF = pd.DataFrame(acc)
DF.to_csv("Results/accuracy.csv")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = co

acc [['PROMISE-IVY', 0.8169014084507042, 0.8309859154929577, 0.7183098591549296, 0.8309859154929577, 0.8450704225352113, 0.8450704225352113, 0.8309859154929577, 0.8309859154929577]]
f1 [['PROMISE-IVY', 0.5154855643044619, 0.5773809523809523, 0.49858757062146897, 0.6182795698924731, 0.5342874180083482, 0.6324705882352941, 0.5245535714285714, 0.5245535714285714]]
auc [['PROMISE-IVY', 0.5152329749103942, 0.5707885304659499, 0.5062724014336918, 0.618279569892473, 0.5313620071684588, 0.6263440860215053, 0.5232974910394266, 0.5232974910394266]]
mcc [['PROMISE-IVY', 0.036441774667353495, 0.15800505624270095, 0.010224481595409255, 0.23655913978494625, 0.09050728720129525, 0.2658806580433345, 0.06058976646391747, 0.06058976646391747]]
