In [1]:
import sys
!{sys.executable} -m pip install chefboost



In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from chefboost import Chefboost as chef

In [2]:
data = pd.read_csv('sampled.csv')
data = data.drop(columns=['commit', 'testClass', 'testMethod', 'testFilePath', 'productionFilePath', 'relativeTestFilePath', 'relativeProductionFilePath', 'tsTestClass', 'tsTestMethod', 'is_sampled', 'dependentTest', 'exceptionCatchingThrowing', 'vocabulary', 'tokens_parser', 'keywords_parser', 'strings_parser', 'string_type_parser', 'anotations_parser'], axis=1)
data = data.reset_index()

data = data.replace(np.nan, 0)
data = data.replace(True, 1)
data = data.replace(False, 0)

data = data.replace('flaky', 1)
data = data.replace('nonflaky', 0)

data.head(5)

Unnamed: 0,index,project,loc,smellsCount,assertionRoulette,conditionalTestLogic,constructorInitialization,defaultTest,duplicateAssert,eagerTest,...,mysteryGuest,printStatement,redundantAssertion,resourceOptimism,sensitiveEquality,sleepyTest,unknownTest,verboseTest,klass,dataset
0,0,oozie,85,4,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,msr4flakiness
1,1,hadoop,18,4,1,1,0,0,0,0,...,0,0,0,0,0,1,0,0,1,msr4flakiness
2,2,oozie,25,2,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,msr4flakiness
3,3,okhttp,21,4,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,msr4flakiness
4,4,oozie,40,3,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,msr4flakiness


In [3]:
data = data.astype({
    'loc': 'int32',
    'smellsCount': 'int32',
    'assertionRoulette': 'int32',
    'conditionalTestLogic': 'int32',
    'constructorInitialization': 'int32',
    'defaultTest': 'int32',
    'duplicateAssert': 'int32',
    'eagerTest': 'int32',
    'emptyTest': 'int32',
    'generalFixture': 'int32',
    'ignoredTest': 'int32',
    'lazyTest': 'int32',
    'magicNumberTest': 'int32',
    'mysteryGuest': 'int32',
    'printStatement': 'int32',
    'redundantAssertion': 'int32',
    'resourceOptimism': 'int32',
    'sensitiveEquality': 'int32',
    'sleepyTest': 'int32',
    'unknownTest': 'int32',
    'verboseTest': 'int32',
})

In [4]:
data = data.rename(columns={"klass":"Decision"})

In [5]:
idFlakiesProjects = ['redpipe', 'vertexium', 'javaCasClient', 'c2mon', 'vertx', 'excelastic', 'rxjava2', 'tyrus', 'esper', 'yawp', 'luwak', 'fluentLoggerJava', 'delightNashornSandbox', 'dbScheduler', 'one', 'sawmill', 'springCloudZuulRatelimit', 'timely', 'sos', 'openpojo', 'ociJavaSdk', 'aletheia', 'pippo', 'recast4j', 'noxy', 'springCloudAws', 'vertxMqtt', 'vertxRabbitmqClient', 'admiral', 'carbonApimgt', 'riptide', 'fastjson', 'dubbo', 'webcollector', 'doanduyhaiAchilles', 'elasticjoblite', 'disconf', 'hutool', 'oryx', 'querydsl', 'helios', 'retrofit', 'javaWebsocket', 'undertow', 'alien4cloud', 'cukes', 'hsac', 'googdDataCl', 'springDataBean', 'jhispster', 'marineApi', 'junitQuickcheck', 'nexus', 'springDataEnvers', 'springWs', 'aismessages', 'unix4j', 'wikidata', 'activiti', 'jackrabbit', 'struts', 'jfreechart', 'httpRequest', 'wildflymaven', 'nifiRegistry', 'arangoddb', 'dnsjava', 'as2lib', 'whois', 'dbean', 'searchHighlighter', 'wildflymavenplugin', 'balana', 'limfs', 'jodatime', 'otto', 'dropwizard']
msr4FlakinessProjects = ['Achilles', 'ambari', 'assertj-core', 'checkstyle', 'commons-exec', 'dropwizard', 'hadoop', 'handlebars', 'hbase', 'hector', 'httpcore', 'jackrabbit-oak', 'jimfs', 'logback', 'ninja', 'okhttp', 'oozie', 'orbit', 'oryx', 'spring-boot', 'alluxio', 'togglz', 'undertow', 'wro4j', 'zxing']

In [6]:
# filtering by dataset
allMsr4flakiness = data.loc[data.dataset == 'msr4flakiness']
before = len(allMsr4flakiness)
allMsr4flakiness

Unnamed: 0,index,project,loc,smellsCount,assertionRoulette,conditionalTestLogic,constructorInitialization,defaultTest,duplicateAssert,eagerTest,...,mysteryGuest,printStatement,redundantAssertion,resourceOptimism,sensitiveEquality,sleepyTest,unknownTest,verboseTest,Decision,dataset
0,0,oozie,85,4,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,msr4flakiness
1,1,hadoop,18,4,1,1,0,0,0,0,...,0,0,0,0,0,1,0,0,1,msr4flakiness
2,2,oozie,25,2,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,msr4flakiness
3,3,okhttp,21,4,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,msr4flakiness
4,4,oozie,40,3,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,msr4flakiness
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,2795,assertj-core,4,2,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,msr4flakiness
2796,2796,jackrabbit-oak,13,4,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,msr4flakiness
2797,2797,jackrabbit-oak,25,3,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,msr4flakiness
2798,2798,jackrabbit-oak,13,3,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,msr4flakiness


In [7]:
# filtering by projects
smells = allMsr4flakiness.loc[allMsr4flakiness.project.isin(msr4FlakinessProjects)]
after = len(smells)
smells = smells.reset_index()
smells

Unnamed: 0,level_0,index,project,loc,smellsCount,assertionRoulette,conditionalTestLogic,constructorInitialization,defaultTest,duplicateAssert,...,mysteryGuest,printStatement,redundantAssertion,resourceOptimism,sensitiveEquality,sleepyTest,unknownTest,verboseTest,Decision,dataset
0,0,0,oozie,85,4,1,1,0,0,0,...,0,0,0,0,0,0,0,0,1,msr4flakiness
1,1,1,hadoop,18,4,1,1,0,0,0,...,0,0,0,0,0,1,0,0,1,msr4flakiness
2,2,2,oozie,25,2,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,msr4flakiness
3,3,3,okhttp,21,4,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,msr4flakiness
4,4,4,oozie,40,3,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,msr4flakiness
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2772,2795,2795,assertj-core,4,2,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,msr4flakiness
2773,2796,2796,jackrabbit-oak,13,4,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,msr4flakiness
2774,2797,2797,jackrabbit-oak,25,3,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,msr4flakiness
2775,2798,2798,jackrabbit-oak,13,3,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,msr4flakiness


In [8]:
print(before, after)

2800 2777


# First training with msr4flakiness smells dataset

In [10]:
train = smells

### Dataset Split

In [11]:
labels=train['Decision']
features = train.drop(['project', 'Decision', 'dataset', 'level_0', 'index'], axis=1)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=1) #, random_state=0


### Regular Decision Tree

In [50]:
def print_intra_metrics(y_test, y_pred): 
    print("Intra-project metrics")
    print("Accuracy : ",metrics.accuracy_score(y_test, y_pred))
    print("Precision : ",metrics.precision_score(y_test, y_pred))
    print("Recall : ",metrics.recall_score(y_test, y_pred))
    print("F1 Score : ",metrics.f1_score(y_test, y_pred))

In [51]:
# metrics for regular decision tree
np.random.seed(1)
clf = DecisionTreeClassifier(criterion="gini", max_depth=5)
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print_intra_metrics(y_test, y_pred)


Intra-project metrics
Accuracy :  0.7751798561151079
Precision :  0.7526881720430108
Recall :  0.7894736842105263
F1 Score :  0.7706422018348624


### Regular Decision Tree without LOC

In [55]:
# metrics for decision tree MINUS LOC FEATURE
#features_noloc = features.loc[:, features.columns != 'loc']
#X_train, X_test, y_train, y_test = train_test_split(features_noloc, labels, test_size=0.1, random_state=1)

X_train_noloc = X_train.loc[:, X_train.columns != 'loc']
X_test_noloc = X_test.loc[:, X_test.columns != 'loc']

np.random.seed(1)
clf_noloc = DecisionTreeClassifier(criterion="gini", max_depth=5)
clf_noloc = clf_noloc.fit(X_train_noloc,y_train)
y_pred = clf_noloc.predict(X_test_noloc)
print_intra_metrics(y_test, y_pred)


Intra-project metrics
Accuracy :  0.7428057553956835
Precision :  0.7029702970297029
Recall :  0.8007518796992481
F1 Score :  0.7486818980667838


### metrics for regular decision tree, minus seven metrics where mattew correlation coef was less than 0.1


In [54]:
# metrics for regular decision tree, minus seven metrics where mattew correlation coef was less than 0.1
np.random.seed(1)

X_train_reduced=X_train.drop(columns=['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest'])
X_test_reduced=X_test.drop(columns=['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest'])

clf_reduced = DecisionTreeClassifier()
clf_reduced = clf_reduced.fit(X_train_reduced,y_train)
y_pred_reduced = clf_reduced.predict(X_test_reduced)
print_intra_metrics(y_test, y_pred)

Intra-project metrics
Accuracy :  0.7428057553956835
Precision :  0.7029702970297029
Recall :  0.8007518796992481
F1 Score :  0.7486818980667838


# Using ChefBoost DT Algos

### Combining X and Y sets

In chefboost, the target variable must be stored in the same df as the features, it must be called Decision, and it must be the last column of the dataframe (for a workaround just set target_label=klass instead of having to create a new column)

source: https://towardsdatascience.com/chefboost-an-alternative-python-library-for-tree-based-models-f46af028a348


In [27]:
#labels=train['Decision']
features = train.drop(['project', 'dataset', 'level_0', 'index'], axis=1)
features['Decision'] = features['Decision'].astype(str)
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0


### Training each model with all features selected for intra-project context with msr4flakiness

In [66]:
#models = {'ID3', 'C4.5', 'CART', 'CHAID'}
#trained_models = {}

#for model in models:
#    chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0
#    fitted_model = chef.fit(chef_train, {'algorithm': model})
#    chef.evaluate(fitted_model, chef_test, task="test")
#    trained_models.add(model, fitted_model)
#    print('\n\n')

#classifiers_new = {
#    'ID3 (all)': 
#    'c4.5 (all)': chef.fit(chef_train, {'algorithm': 'C4.5'}),
#    'CART (all)': chef.fit(chef_train, {'algorithm': 'CART'}),
#    'CHAID (all)': chef.fit(chef_train, {'algorithm': 'CHAID'})
   # 'ID3 (reduced)': chef.fit(X_train_reduced, {'algorithm': 'ID3'}),
   # 'c4.5 (reduced)': chef.fit(X_train_reduced, {'algorithm': 'C4.5'}),
   # 'CART (reduced)': chef.fit(X_train_reduced, {'algorithm': 'CART'}),
   # 'CHAID (reduced)': chef.fit(X_train_reduced, {'algorithm': 'CHAID'})
#}

In [87]:
inter_test = data.loc[data.dataset == 'idFlakies']
inter_test = inter_test.reset_index()
inter_test['Decision'] = inter_test['Decision'].astype(str)
inter_test = inter_test.drop(['index', 'project', 'dataset', 'level_0'], axis=1)
#idflakies = idflakies.drop(['index', 'project', 'klass', 'dataset', 'level_0'], axis=1)
inter_test


Unnamed: 0,loc,smellsCount,assertionRoulette,conditionalTestLogic,constructorInitialization,defaultTest,duplicateAssert,eagerTest,emptyTest,generalFixture,...,magicNumberTest,mysteryGuest,printStatement,redundantAssertion,resourceOptimism,sensitiveEquality,sleepyTest,unknownTest,verboseTest,Decision
0,8,3,0,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1,27,4,1,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,1
2,41,5,1,0,0,0,1,1,0,0,...,1,0,0,0,0,0,0,0,0,1
3,104,3,0,1,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,1
4,4,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148,11,3,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
149,11,3,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
150,11,3,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
151,9,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [88]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0

id3_model = chef.fit(chef_train, config={'algorithm': 'ID3'})
chef.evaluate(id3_model, chef_test, task="test")
print('\n\n')
chef.evaluate(id3_model, inter_test, task="test")

[INFO]:  4 CPU cores will be allocated in parallel running
ID3  tree is going to be built...
-------------------------
finished in  123.56367087364197  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  82.62044124268347 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[875, 150], [236, 960]]
Precision:  85.3659 %, Recall:  78.7579 %, F1:  81.9289 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  81.4748201438849 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[245, 58], [45, 208]]
Precision:  80.8581 %, Recall:  84.4828 %, F1:  82.6307 %



-------------------------
Evaluate  test set
-------------------------
Accuracy:  54.90196078431372 % on  153  instances
Labels:  ['1']
Confusion matrix:  [[84]]
Precision:  100.0 %, Recall:  54.902 %, F1:  70.8861 %


In [89]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0

c45_model = chef.fit(chef_train, config={'algorithm': 'C4.5'})
chef.evaluate(c45_model, chef_test, task="test")
print('\n\n')
chef.evaluate(c45_model, inter_test, task="test")

[INFO]:  4 CPU cores will be allocated in parallel running
C4.5  tree is going to be built...
-------------------------
finished in  92.60029220581055  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  82.62044124268347 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[875, 150], [236, 960]]
Precision:  85.3659 %, Recall:  78.7579 %, F1:  81.9289 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  81.29496402877697 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[245, 59], [45, 207]]
Precision:  80.5921 %, Recall:  84.4828 %, F1:  82.4916 %



['loc', 'smellsCount', 'assertionRoulette', 'conditionalTestLogic', 'constructorInitialization', 'defaultTest', 'duplicateAssert', 'eagerTest', 'emptyTest', 'generalFixture', 'ignoredTest', 'lazyTest', 'magicNumberTest', 'mysteryGuest', 'printStatement', 'redundantAssertion', 'resourceOptimism', 'sensitiveEquality', 'sleepyTest', 'unknownTest', 'verbo

In [92]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0

cart_model = chef.fit(chef_train, config={'algorithm': 'CART'})
chef.evaluate(cart_model, chef_test, task="test")
print('\n\n')
chef.evaluate(cart_model, inter_test, task="test")

[INFO]:  4 CPU cores will be allocated in parallel running
CART  tree is going to be built...
-------------------------
finished in  125.42141604423523  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  82.62044124268347 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[875, 150], [236, 960]]
Precision:  85.3659 %, Recall:  78.7579 %, F1:  81.9289 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  81.4748201438849 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[245, 58], [45, 208]]
Precision:  80.8581 %, Recall:  84.4828 %, F1:  82.6307 %



['loc', 'smellsCount', 'assertionRoulette', 'conditionalTestLogic', 'constructorInitialization', 'defaultTest', 'duplicateAssert', 'eagerTest', 'emptyTest', 'generalFixture', 'ignoredTest', 'lazyTest', 'magicNumberTest', 'mysteryGuest', 'printStatement', 'redundantAssertion', 'resourceOptimism', 'sensitiveEquality', 'sleepyTest', 'unknownTest', 'verbo

In [93]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0

chaid_model = chef.fit(chef_train, config={'algorithm': 'CHAID'})
chef.evaluate(chaid_model, chef_test, task="test")
print('\n\n')
chef.evaluate(chaid_model, inter_test, task="test")

[INFO]:  4 CPU cores will be allocated in parallel running
CHAID  tree is going to be built...
-------------------------
finished in  123.21616196632385  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  82.62044124268347 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[875, 150], [236, 960]]
Precision:  85.3659 %, Recall:  78.7579 %, F1:  81.9289 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  81.65467625899281 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[244, 56], [46, 210]]
Precision:  81.3333 %, Recall:  84.1379 %, F1:  82.7118 %



['loc', 'smellsCount', 'assertionRoulette', 'conditionalTestLogic', 'constructorInitialization', 'defaultTest', 'duplicateAssert', 'eagerTest', 'emptyTest', 'generalFixture', 'ignoredTest', 'lazyTest', 'magicNumberTest', 'mysteryGuest', 'printStatement', 'redundantAssertion', 'resourceOptimism', 'sensitiveEquality', 'sleepyTest', 'unknownTest', 'ver

## reduced features


In [94]:
#features = train.drop(['project', 'dataset', 'level_0', 'index'], axis=1)
#features['Decision'] = features['Decision'].astype(str)
#chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0

chef_train_reduced=chef_train.drop(columns=['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest'])
chef_test_reduced=chef_test.drop(columns=['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest'])

#inter_test = data.loc[data.dataset == 'idFlakies']
#inter_test = inter_test.reset_index()
#inter_test['Decision'] = inter_test['Decision'].astype(str)
#inter_test = inter_test.drop(['index', 'project', 'dataset', 'level_0'], axis=1)
inter_test_reduced = inter_test.drop(columns=['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest'])


In [97]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0
dropped_cols = ['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest']
chef_train_red=chef_train.drop(columns=dropped_cols)
chef_test_red=chef_test.drop(columns=dropped_cols)
inter_test_red = inter_test.drop(columns=dropped_cols)

id3_model_red = chef.fit(chef_train_red, config={'algorithm': 'ID3'})
chef.evaluate(id3_model_red, chef_test_red, task="test")
print('\n\n')
chef.evaluate(id3_model_red, inter_test_red, task="test")

[INFO]:  4 CPU cores will be allocated in parallel running
ID3  tree is going to be built...
-------------------------
finished in  46.815394163131714  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  79.15353444394417 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[799, 151], [312, 959]]
Precision:  84.1053 %, Recall:  71.9172 %, F1:  77.5352 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  79.31654676258992 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[243, 68], [47, 198]]
Precision:  78.135 %, Recall:  83.7931 %, F1:  80.8652 %



['loc', 'smellsCount', 'assertionRoulette', 'constructorInitialization', 'defaultTest', 'duplicateAssert', 'emptyTest', 'generalFixture', 'ignoredTest', 'lazyTest', 'mysteryGuest', 'printStatement', 'sensitiveEquality', 'sleepyTest', 'Prediction', 'Decision']
-------------------------
Evaluate  test set
-------------------------
Accuracy:  50.326797385

In [98]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0
dropped_cols = ['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest']
chef_train_red=chef_train.drop(columns=dropped_cols)
chef_test_red=chef_test.drop(columns=dropped_cols)
inter_test_red = inter_test.drop(columns=dropped_cols)

c45_model_red = chef.fit(chef_train_red, config={'algorithm': 'C4.5'})
chef.evaluate(c45_model_red, chef_test_red, task="test")
print('\n\n')
chef.evaluate(c45_model_red, inter_test_red, task="test")


[INFO]:  4 CPU cores will be allocated in parallel running
C4.5  tree is going to be built...
-------------------------
finished in  30.647319078445435  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  78.83836109860424 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[802, 161], [309, 949]]
Precision:  83.2814 %, Recall:  72.1872 %, F1:  77.3385 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  78.77697841726619 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[240, 68], [50, 198]]
Precision:  77.9221 %, Recall:  82.7586 %, F1:  80.2676 %



['loc', 'smellsCount', 'assertionRoulette', 'constructorInitialization', 'defaultTest', 'duplicateAssert', 'emptyTest', 'generalFixture', 'ignoredTest', 'lazyTest', 'mysteryGuest', 'printStatement', 'sensitiveEquality', 'sleepyTest', 'Prediction', 'Decision']
-------------------------
Evaluate  test set
-------------------------
Accuracy:  55.5555555

In [99]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0
dropped_cols = ['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest']
chef_train_red=chef_train.drop(columns=dropped_cols)
chef_test_red=chef_test.drop(columns=dropped_cols)
inter_test_red = inter_test.drop(columns=dropped_cols)

cart_model_red = chef.fit(chef_train_red, config={'algorithm': 'CART'})
chef.evaluate(cart_model_red, chef_test_red, task="test")
print('\n\n')
chef.evaluate(cart_model_red, inter_test_red, task="test")


[INFO]:  4 CPU cores will be allocated in parallel running
CART  tree is going to be built...
-------------------------
finished in  42.936814069747925  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  79.24358397118415 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[797, 147], [314, 963]]
Precision:  84.428 %, Recall:  71.7372 %, F1:  77.5669 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  79.85611510791367 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[247, 69], [43, 197]]
Precision:  78.1646 %, Recall:  85.1724 %, F1:  81.5182 %



['loc', 'smellsCount', 'assertionRoulette', 'constructorInitialization', 'defaultTest', 'duplicateAssert', 'emptyTest', 'generalFixture', 'ignoredTest', 'lazyTest', 'mysteryGuest', 'printStatement', 'sensitiveEquality', 'sleepyTest', 'Prediction', 'Decision']
-------------------------
Evaluate  test set
-------------------------
Accuracy:  48.36601307

In [100]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0
dropped_cols = ['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest']
chef_train_red=chef_train.drop(columns=dropped_cols)
chef_test_red=chef_test.drop(columns=dropped_cols)
inter_test_red = inter_test.drop(columns=dropped_cols)

chaid_model_red = chef.fit(chef_train_red, config={'algorithm': 'CHAID'})
chef.evaluate(chaid_model_red, chef_test_red, task="test")
print('\n\n')
chef.evaluate(chaid_model_red, inter_test_red, task="test")


[INFO]:  4 CPU cores will be allocated in parallel running
CHAID  tree is going to be built...
-------------------------
finished in  43.63708806037903  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  78.79333633498425 % on  2221  instances
Labels:  ['1' '0']
Confusion matrix:  [[803, 163], [308, 947]]
Precision:  83.1263 %, Recall:  72.2772 %, F1:  77.323 %
-------------------------
Evaluate  test set
-------------------------
Accuracy:  79.31654676258992 % on  556  instances
Labels:  ['0' '1']
Confusion matrix:  [[240, 65], [50, 201]]
Precision:  78.6885 %, Recall:  82.7586 %, F1:  80.6722 %



['loc', 'smellsCount', 'assertionRoulette', 'constructorInitialization', 'defaultTest', 'duplicateAssert', 'emptyTest', 'generalFixture', 'ignoredTest', 'lazyTest', 'mysteryGuest', 'printStatement', 'sensitiveEquality', 'sleepyTest', 'Prediction', 'Decision']
-------------------------
Evaluate  test set
-------------------------
Accuracy:  56.86274509

# Finetuning best model in intra-project context

#### Random Forest

In [None]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0

chaid_model = chef.fit(chef_train, config={'algorithm': 'CHAID', 'enableRandomForest': True})
chef.evaluate(chaid_model, chef_test, task="test")
print('\n\n')
chef.evaluate(chaid_model, inter_test, task="test")

In [None]:
# chaid_4_config = {'algorithm': 'CHAID', 'num_of_trees': 4}


In [15]:
# chaid_gb_config = {'algorithm': 'CHAID', 'enableGBM': True}


In [None]:
# chaid_multi_config = {'algorithm': 'CHAID', 'enableRandomForest': True,'enableMultitasking': True,'enableGBM': True,}


# Finetuning best model in inter-project context

In [None]:
chef_train, chef_test = train_test_split(features, test_size=0.2, random_state=1) #, random_state=0
dropped_cols = ['conditionalTestLogic', 'eagerTest', 'magicNumberTest', 'redundantAssertion', 'resourceOptimism', 'unknownTest', 'verboseTest']
chef_train_red=chef_train.drop(columns=dropped_cols)
chef_test_red=chef_test.drop(columns=dropped_cols)
inter_test_red = inter_test.drop(columns=dropped_cols)

chaid_model_red = chef.fit(chef_train_red, config={'algorithm': 'CHAID'})
chef.evaluate(chaid_model_red, chef_test_red, task="test")
print('\n\n')
chef.evaluate(chaid_model_red, inter_test_red, task="test")


#### Future/other types of trees to test out

C5.0
- https://topepo.github.io/C5.0/reference/C5.0.html
- not fully implemented yet in python, will need to figure this out

Supervised Learning in Ques (SLIQ)
- https://datascience.stackexchange.com/questions/109450/decision-tree-sprint-vs-sliq
- https://www.quora.com/What-is-the-best-way-to-implement-C4-5-or-C5-0-algorithm-using-Python
- https://kandi.openweaver.com/python/wilsonlj/SLIQ
- doesn't have a great implementation in python. Can use other people's code from github

Decision stump
- hard to find implementation

In [32]:
# Adaboost
# need to convert Object data type for 'Decision' category into numeric, still has errors



#np.random.seed(1)
#features_numeric = features
#features_numeric['Decision'] = pd.to_numeric(features_numeric['Decision'])
#features_numeric.dtypes

#X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.1, random_state=1)

#chaid_ada_config = {'algorithm': 'CHAID', 'enableAdaboost':True}
#chaid_ada_model = chef.fit(X_train, chaid_ada_config)
#chef.evaluate(chaid_ada_model, X_test, task="test")


Also tried, but got test/train accuracy same as what I already previously got:
- Multitasking: True
- max_depth: 5
- num_of_trees: 4
- num_of_trees: 2
- learning_rate: 2
- enableRandomForest: True + enableMultitasking: True + enableGBM: True,

In [63]:
#chaid_acc = pd.DataFrame({'test type': ['CHAID', 'CHAID + RandomForest',
#                                        'CHAID (3 trees)', 'CHAID (6 trees)',
#                                        'CHAID + Gradient Boosting', ],
#                          'train accuracy': [81.37, 76.89, 81.37, 81.37, 74.86],
#                          'test accuracy': [80.41, 75.00, 80.41, 80.41, 75.00]})
#model_acc = pd.concat([model_acc, entry_1], axis=0, ignore_index=True)
#chaid_acc.sort_values(by=['test accuracy','train accuracy'], ascending=False)

Unnamed: 0,test type,train accuracy,test accuracy
0,CHAID,81.37,80.41
2,CHAID (3 trees),81.37,80.41
3,CHAID (6 trees),81.37,80.41
1,CHAID + RandomForest,76.89,75.0
4,CHAID + Gradient Boosting,74.86,75.0


In [64]:
# CHAID without random forest or gradient boosting still performs the best