In [1]:
import numpy as np
import pandas as pd
from main import tree_grow, tree_grow_b, tree_pred, tree_pred_b
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from IPython.display import display, Markdown, Latex
from mlxtend.evaluate import mcnemar
from mlxtend.evaluate import mcnemar_table
from time import time

In [2]:
train = pd.read_csv("eclipse-metrics-packages-2.0.csv", delimiter=";")
test = pd.read_csv("eclipse-metrics-packages-3.0.csv", delimiter=";")
list(train.columns[4:44])
feats = ['pre',
 'ACD_avg',
 'ACD_max',
 'ACD_sum',
 'FOUT_avg',
 'FOUT_max',
 'FOUT_sum',
 'MLOC_avg',
 'MLOC_max',
 'MLOC_sum',
 'NBD_avg',
 'NBD_max',
 'NBD_sum',
 'NOCU',
 'NOF_avg',
 'NOF_max',
 'NOF_sum',
 'NOI_avg',
 'NOI_max',
 'NOI_sum',
 'NOM_avg',
 'NOM_max',
 'NOM_sum',
 'NOT_avg',
 'NOT_max',
 'NOT_sum',
 'NSF_avg',
 'NSF_max',
 'NSF_sum',
 'NSM_avg',
 'NSM_max',
 'NSM_sum',
 'PAR_avg',
 'PAR_max',
 'PAR_sum',
 'TLOC_avg',
 'TLOC_max',
 'TLOC_sum',
 'VG_avg',
 'VG_max',
 'VG_sum']
train_x = train[feats]
train_y = train["post"]
test_x = test[feats]
test_y = test["post"]

In [3]:
train_x_np = train_x.to_numpy()
test_x_np = test_x.to_numpy()

In [4]:
train_y_np = train_y.to_numpy()
train_y_np = np.where(train_y_np > 0, 1, 0)
test_y_np = test_y.to_numpy()
test_y_np = np.where(test_y_np > 0, 1, 0)

In [5]:
preds_model = {}

In [6]:
start = time()
single_tree = tree_grow(train_x_np, train_y_np, 15, 5, 41)
end = time()
print(f"Training single tree took {end-start} seconds")
test_y_np_pred_st = tree_pred(test_x_np, single_tree)
preds_model["single tree"] = test_y_np_pred_st

Training single tree took 1.0645403861999512 seconds


In [7]:
# len(single_tree.y) == (len(single_tree.l.y) + len(single_tree.r.y))

In [8]:
def report_scores(y_true,y_pred):
    conf_mat = confusion_matrix(y_true,y_pred)
    tn = conf_mat[0][0]
    fp = conf_mat[0][1]
    fn = conf_mat[1][0]
    tp = conf_mat[1][1]
    print("Confusion matrix:")
    display(Markdown(f'''| True/Pred |Pos|Neg|
|-----------|---|---|
|       Pos |{tp}|{fn}|
|       Neg |{fp}|{tn}|
    '''))
    print("---------------------------------")
    print("LaTex formatting:", '''
\\begin{table}[]
\\begin{tabular}{|r|l|l|}
\\hline
True/Pred & Pos & Neg \\\ \hline''')
    print(f"Pos                             &  {tp}   &  {fn}   \\\ \hline")
    print(f"Neg                             & {fp}    &  {tn}   \\\ \hline")
    print('''\end{tabular}
\end{table}''')
    print("------------------------------------")
    print("accuracy:", accuracy_score(y_true,y_pred))
    print("precision:", precision_score(y_true,y_pred))
    print("recall:", recall_score(y_true,y_pred))

In [9]:
report_scores(test_y_np, test_y_np_pred_st)

Confusion matrix:


| True/Pred |Pos|Neg|
|-----------|---|---|
|       Pos |185|128|
|       Neg |82|266|
    

---------------------------------
LaTex formatting: 
\begin{table}[]
\begin{tabular}{|r|l|l|}
\hline
True/Pred & Pos & Neg \\ \hline
Pos                             &  185   &  128   \\ \hline
Neg                             & 82    &  266   \\ \hline
\end{tabular}
\end{table}
------------------------------------
accuracy: 0.6822995461422088
precision: 0.6928838951310862
recall: 0.5910543130990416


In [10]:
# Print the first two-layers of the tree
# Root node information
root = single_tree
split_feature = feats[root.f]
split_value = root.s
y_values = root.y
y_values_1_ratio = sum(y_values)/len(y_values)
print(f"Root values: split_feature = {split_feature}, split_value = {split_value}, y_values_1_ratio = {y_values_1_ratio}")

Root values: split_feature = pre, split_value = 4.5, y_values_1_ratio = 0.5039787798408488


In [11]:
print(np.bincount(single_tree.y))
# print(single_tree.f)
print(single_tree.s)
print(feats[single_tree.f])

[187 190]
4.5
pre


In [12]:
print(np.bincount(single_tree.l.y))
print(single_tree.l.s)
print(feats[single_tree.l.f])

[156  58]
26.5
VG_max


In [13]:
print(np.bincount(single_tree.r.y))
print(single_tree.r.s)
print(feats[single_tree.r.f])

[ 31 132]
0.1583333333333333
NOI_avg


In [14]:
start = time()
bagging_tree = tree_grow_b(train_x_np, train_y_np, 15, 5, 41, 100)
end = time()
print(f"Training bagging tree took {end-start} seconds")
test_y_np_pred_bt = tree_pred_b(test_x_np, bagging_tree)
preds_model["bagging tree"] = test_y_np_pred_bt

Training bagging tree took 72.33398222923279 seconds


In [15]:
report_scores(test_y_np, test_y_np_pred_bt)

Confusion matrix:


| True/Pred |Pos|Neg|
|-----------|---|---|
|       Pos |207|106|
|       Neg |39|309|
    

---------------------------------
LaTex formatting: 
\begin{table}[]
\begin{tabular}{|r|l|l|}
\hline
True/Pred & Pos & Neg \\ \hline
Pos                             &  207   &  106   \\ \hline
Neg                             & 39    &  309   \\ \hline
\end{tabular}
\end{table}
------------------------------------
accuracy: 0.7806354009077155
precision: 0.8414634146341463
recall: 0.6613418530351438


In [16]:
start = time()
random_tree = tree_grow_b(train_x_np, train_y_np, 15, 5, 6, 100)
end = time()
print(f"Training random forest took {end-start} seconds")
test_y_np_pred_rt = tree_pred_b(test_x_np, random_tree)
preds_model["random forest"] = test_y_np_pred_rt

Training random forest took 12.569201946258545 seconds


In [17]:
report_scores(test_y_np, test_y_np_pred_rt)

Confusion matrix:


| True/Pred |Pos|Neg|
|-----------|---|---|
|       Pos |217|96|
|       Neg |58|290|
    

---------------------------------
LaTex formatting: 
\begin{table}[]
\begin{tabular}{|r|l|l|}
\hline
True/Pred & Pos & Neg \\ \hline
Pos                             &  217   &  96   \\ \hline
Neg                             & 58    &  290   \\ \hline
\end{tabular}
\end{table}
------------------------------------
accuracy: 0.7670196671709532
precision: 0.7890909090909091
recall: 0.6932907348242812


In [19]:
models = list(preds_model.keys())
for model1 in models:
    for model2 in models:
        if not(model1 == model2):
            print(f"McNemar's test {model1} vs {model2}")
            # The code in the following three lines was adopted from [1].
            table = mcnemar_table(y_target=test_y_np, y_model1=preds_model[model1], y_model2=preds_model[model2])
            chi2_, p = mcnemar(ary=table, corrected=True)
            print(f"chi² statistic: {chi2_}, p-value: {p}\n")
    models.remove(model1)

McNemar's test single tree vs bagging tree
chi² statistic: 32.25196850393701, p-value: 1.3541982115265086e-08

McNemar's test single tree vs random forest
chi² statistic: 25.208333333333332, p-value: 5.14593654465032e-07

McNemar's test random forest vs bagging tree
chi² statistic: 0.9846153846153847, p-value: 0.3210619922539037



# References
[1] T. Toledo Jr, “Statistical tests for comparing classification algorithms,” Medium, Jan. 04, 2022. [Online]. Available: https://towardsdatascience.com/statistical-tests-for-comparing-classification-algorithms-ac1804e79bb7