In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, label_binarize
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, roc_curve, auc
from sklearn.multiclass import OneVsRestClassifier

In [2]:
dataset = pd.read_csv('./files/dataset.csv')

dataset.head()

Unnamed: 0,Feature Model,NF,NM,NTop,NLeaf,DTMax,CogC,FEX,FoC,SCDF,MCDF,RDen,RoV,NVC,NGOr,NGXOr,Maintainability
0,RaaS,32,2,19,28,3,3,28,0.53125,0,0,0.0,4.857143,125829120.0,3,0,moderate
1,AvionFEatures,10,4,5,7,3,2,7,0.1,0,0,0.0,2.2,12.0,2,0,verygood
2,Bike Shop,21,3,5,15,5,5,27,0.095238,9,3,0.0,2.272727,70.0,1,4,good
3,Jetbrains,33,13,3,25,4,2,28,0.272727,3,0,2.0,3.4,4884.0,0,2,moderate
4,Ubuntu,11,3,4,7,3,3,7,0.090909,0,0,1.0,1.857143,12.0,0,3,verygood


In [3]:
X = dataset.iloc[:, 1:-1] # Full Dataset
# X = dataset.iloc[:, [1, 4, 5, 6, 7, 13]] # Correlation Spearman Mean
# X = dataset.iloc[:, [1, 2, 3, 4, 5, 6, 7]] # Select From Model - Logistic Regression
# X = dataset.iloc[:, [1, 2, 3, 4, 5, 6, 8, 9, 11]] # Select From Model - Decision Tree

y = dataset.iloc[:, -1]

X.head()

Unnamed: 0,NF,NM,NTop,NLeaf,DTMax,CogC,FEX,FoC,SCDF,MCDF,RDen,RoV,NVC,NGOr,NGXOr
0,32,2,19,28,3,3,28,0.53125,0,0,0.0,4.857143,125829120.0,3,0
1,10,4,5,7,3,2,7,0.1,0,0,0.0,2.2,12.0,2,0
2,21,3,5,15,5,5,27,0.095238,9,3,0.0,2.272727,70.0,1,4
3,33,13,3,25,4,2,28,0.272727,3,0,2.0,3.4,4884.0,0,2
4,11,3,4,7,3,3,7,0.090909,0,0,1.0,1.857143,12.0,0,3


In [4]:
scaler = MinMaxScaler()
clf = LogisticRegression(random_state=0)
pipeline = make_pipeline(scaler, clf)

In [5]:
maintainability_classes = ["verybad", "bad", "moderate", "good", "verygood"]
maintainability_classes_num = {
    "verybad": 1,
    "bad": 2,
    "moderate": 3,
    "good": 4,
    "verygood": 5
}
maintainability_converter = lambda x: maintainability_classes_num[x]

X_ = X.to_numpy()
y_ = np.array([maintainability_converter(i) for i in y.to_numpy()])

kf = StratifiedKFold(n_splits=10)

scores_accuracy = []
scores_precision = []
scores_recall = []
scores_f1 = []
scores_roc_auc = []

scores_precision_by_class = {
    "verybad": [],
    "bad": [],
    "moderate": [],
    "good": [],
    "verygood": []
}
scores_recall_by_class = {
    "verybad": [],
    "bad": [],
    "moderate": [],
    "good": [],
    "verygood": []
}
scores_f1_by_class = {
    "verybad": [],
    "bad": [],
    "moderate": [],
    "good": [],
    "verygood": []
}
scores_roc_auc_by_class = {
    "verybad": [],
    "bad": [],
    "moderate": [],
    "good": [],
    "verygood": []
}

for train_index, test_index in kf.split(X_, y_):
    X_train, X_test = X_[train_index], X_[test_index]
    y_train, y_test = y_[train_index], y_[test_index]
    
    pipeline.fit(X_train, y_train)
    
    y_pred = pipeline.predict(X_test)
    y_pred_proba = pipeline.predict_proba(X_test)
    
    cr = classification_report(y_test, y_pred, target_names=maintainability_classes, output_dict=True, zero_division=0)
    
    for key in cr.keys():
        if key in maintainability_classes:
            scores_precision_by_class[key].append(cr[key]["precision"])
            scores_recall_by_class[key].append(cr[key]["recall"])
            scores_f1_by_class[key].append(cr[key]["f1-score"])
    
    scores_accuracy.append(accuracy_score(y_test, y_pred))
    scores_precision.append(precision_score(y_test, y_pred, average="weighted", zero_division=0))
    scores_recall.append(recall_score(y_test, y_pred, average="weighted", zero_division=0))
    scores_f1.append(f1_score(y_test, y_pred, average="weighted", zero_division=0))
    scores_roc_auc.append(roc_auc_score(y_test, y_pred_proba, average="weighted", multi_class="ovo"))

    oneVsRestClf = OneVsRestClassifier(pipeline)
    y_pred_proba_binarized = oneVsRestClf.fit(X_train, y_train).predict_proba(X_test)
    y_test_binarized = label_binarize(y_test, classes=[maintainability_converter(c) for c in maintainability_classes])
    
    fpr = dict()
    tpr = dict()
    for i, key in enumerate(maintainability_classes):
        fpr[key], tpr[key], _ = roc_curve(y_test_binarized[:, i], y_pred_proba_binarized[:, i])
        scores_roc_auc_by_class[key].append(auc(fpr[key], tpr[key]))
        
print("\n>> Accuracy")
print("Scores:", list(scores_accuracy))
print("Min:", np.min(scores_accuracy))
print("Max:", np.max(scores_accuracy))
print("Mean:", np.mean(scores_accuracy))
print("Standard Deviation:", np.std(scores_accuracy))

print("\n>> Precision")
print("Scores:", list(scores_precision))
print("Min:", np.min(scores_precision))
print("Max:", np.max(scores_precision))
print("Mean:", np.mean(scores_precision))
print("Standard Deviation:", np.std(scores_precision))

print("\n>>> Mean Precision By Class")

for key in scores_precision_by_class.keys():
    print("Class:", key, "| Value", np.mean(scores_precision_by_class[key]))

print("\n>> Recall")
print("Scores:", list(scores_recall))
print("Min:", np.min(scores_recall))
print("Max:", np.max(scores_recall))
print("Mean:", np.mean(scores_recall))
print("Standard Deviation:", np.std(scores_recall))

print("\n>>> Mean Recall By Class")

for key in scores_recall_by_class.keys():
    print("Class:", key, "| Value", np.mean(scores_recall_by_class[key]))

print("\n>> F1")
print("Scores:", list(scores_f1))
print("Min:", np.min(scores_f1))
print("Max:", np.max(scores_f1))
print("Mean:", np.mean(scores_f1))
print("Standard Deviation:", np.std(scores_f1))

print("\n>>> Mean F1 By Class")

for key in scores_f1_by_class.keys():
    print("Class:", key, "| Value", np.mean(scores_f1_by_class[key]))

print("\n>> ROC_AUC")
print("Scores:", list(scores_roc_auc))
print("Min:", np.min(scores_roc_auc))
print("Max:", np.max(scores_roc_auc))
print("Mean:", np.mean(scores_roc_auc))
print("Standard Deviation:", np.std(scores_roc_auc))

print("\n>>> Mean ROC_AUC By Class")

for key in scores_roc_auc_by_class.keys():
    print("Class:", key, "| Value", np.mean(scores_roc_auc_by_class[key]))


>> Accuracy
Scores: [0.6, 0.6285714285714286, 0.5882352941176471, 0.6176470588235294, 0.5882352941176471, 0.5294117647058824, 0.5882352941176471, 0.6470588235294118, 0.5588235294117647, 0.6176470588235294]
Min: 0.5294117647058824
Max: 0.6470588235294118
Mean: 0.5963865546218488
Standard Deviation: 0.032583051581081664

>> Precision
Scores: [0.5083505866114562, 0.6823747680890537, 0.5441176470588235, 0.5147058823529411, 0.6862745098039216, 0.37516339869281046, 0.48670076726342704, 0.6073529411764705, 0.4330242966751918, 0.6126050420168067]
Min: 0.37516339869281046
Max: 0.6862745098039216
Mean: 0.5450669839740901
Standard Deviation: 0.09712472474481933

>>> Mean Precision By Class
Class: verybad | Value 0.0
Class: bad | Value 0.6373484848484849
Class: moderate | Value 0.55
Class: good | Value 0.5833333333333333
Class: verygood | Value 0.5846085074345944

>> Recall
Scores: [0.6, 0.6285714285714286, 0.5882352941176471, 0.6176470588235294, 0.5882352941176471, 0.5294117647058824, 0.58823529