In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel

In [2]:
dataset = pd.read_csv('./files/dataset.csv')

dataset.head()

Unnamed: 0,Feature Model,NF,NM,NTop,NLeaf,DTMax,CogC,FEX,FoC,SCDF,MCDF,RDen,RoV,NVC,NGOr,NGXOr,Maintainability
0,RaaS,32,2,19,28,3,3,28,0.53125,0,0,0.0,4.857143,125829120.0,3,0,moderate
1,AvionFEatures,10,4,5,7,3,2,7,0.1,0,0,0.0,2.2,12.0,2,0,verygood
2,Bike Shop,21,3,5,15,5,5,27,0.095238,9,3,0.0,2.272727,70.0,1,4,good
3,Jetbrains,33,13,3,25,4,2,28,0.272727,3,0,2.0,3.4,4884.0,0,2,moderate
4,Ubuntu,11,3,4,7,3,3,7,0.090909,0,0,1.0,1.857143,12.0,0,3,verygood


In [3]:
# X = dataset.iloc[:, [1, 2, 3, 4, 5, 6, 7, 14]] # Select From Model
# X = dataset.iloc[:, [1, 4, 5, 6, 7, 13]] # Correlation Spearman Mean
X = dataset.iloc[:, 1:-1] # Full Dataset
y = dataset.iloc[:, -1]

X.head()

Unnamed: 0,NF,NM,NTop,NLeaf,DTMax,CogC,FEX,FoC,SCDF,MCDF,RDen,RoV,NVC,NGOr,NGXOr
0,32,2,19,28,3,3,28,0.53125,0,0,0.0,4.857143,125829120.0,3,0
1,10,4,5,7,3,2,7,0.1,0,0,0.0,2.2,12.0,2,0
2,21,3,5,15,5,5,27,0.095238,9,3,0.0,2.272727,70.0,1,4
3,33,13,3,25,4,2,28,0.272727,3,0,2.0,3.4,4884.0,0,2
4,11,3,4,7,3,3,7,0.090909,0,0,1.0,1.857143,12.0,0,3


In [4]:
import warnings

warnings.filterwarnings('ignore')

scaler = MinMaxScaler()
clf = LogisticRegression(random_state=0)
pipeline = make_pipeline(scaler, clf)
pipeline.fit(X, y)

scores_accuracy = cross_val_score(pipeline, X, y, cv=10)

print("\n>> Accuracy")
print("Scores:", list(scores_accuracy))
print("Min:", np.min(scores_accuracy))
print("Max:", np.max(scores_accuracy))
print("Mean:", np.mean(scores_accuracy))
print("Standard Deviation:", np.std(scores_accuracy))

scores_precision = cross_val_score(pipeline, X, y, cv=10, scoring="precision_weighted")

print("\n>> Precision")
print("Scores:", list(scores_precision))
print("Min:", np.min(scores_precision))
print("Max:", np.max(scores_precision))
print("Mean:", np.mean(scores_precision))
print("Standard Deviation:", np.std(scores_precision))

scores_recall = cross_val_score(pipeline, X, y, cv=10, scoring="recall_weighted")

print("\n>> Recall")
print("Scores:", list(scores_recall))
print("Min:", np.min(scores_recall))
print("Max:", np.max(scores_recall))
print("Mean:", np.mean(scores_recall))
print("Standard Deviation:", np.std(scores_recall))

scores_f1 = cross_val_score(pipeline, X, y, cv=10, scoring="f1_weighted")

print("\n>> F1")
print("Scores:", list(scores_f1))
print("Min:", np.min(scores_f1))
print("Max:", np.max(scores_f1))
print("Mean:", np.mean(scores_f1))
print("Standard Deviation:", np.std(scores_f1))

scores_roc_auc = cross_val_score(pipeline, X, y, cv=10, scoring="roc_auc_ovo_weighted")

print("\n>> ROC_AUC")
print("Scores:", list(scores_roc_auc))
print("Min:", np.min(scores_roc_auc))
print("Max:", np.max(scores_roc_auc))
print("Mean:", np.mean(scores_roc_auc))
print("Standard Deviation:", np.std(scores_roc_auc))


>> Accuracy
Scores: [0.6, 0.6285714285714286, 0.5882352941176471, 0.6176470588235294, 0.5882352941176471, 0.5294117647058824, 0.5882352941176471, 0.6470588235294118, 0.5588235294117647, 0.6176470588235294]
Min: 0.5294117647058824
Max: 0.6470588235294118
Mean: 0.5963865546218488
Standard Deviation: 0.032583051581081664

>> Precision
Scores: [0.5083505866114562, 0.6823747680890537, 0.5441176470588235, 0.5147058823529411, 0.6862745098039216, 0.37516339869281046, 0.48670076726342704, 0.6073529411764705, 0.4330242966751918, 0.6126050420168067]
Min: 0.37516339869281046
Max: 0.6862745098039216
Mean: 0.5450669839740901
Standard Deviation: 0.09712472474481933

>> Recall
Scores: [0.6, 0.6285714285714286, 0.5882352941176471, 0.6176470588235294, 0.5882352941176471, 0.5294117647058824, 0.5882352941176471, 0.6470588235294118, 0.5588235294117647, 0.6176470588235294]
Min: 0.5294117647058824
Max: 0.6470588235294118
Mean: 0.5963865546218488
Standard Deviation: 0.032583051581081664

>> F1
Scores: [0.490