In [1]:
!git clone https://github.com/jsaj/MDS_CPDP.git

Cloning into 'MDS_CPDP'...
remote: Enumerating objects: 952, done.[K
remote: Counting objects: 100% (439/439), done.[K
remote: Compressing objects: 100% (285/285), done.[K
remote: Total 952 (delta 204), reused 315 (delta 136), pack-reused 513[K
Receiving objects: 100% (952/952), 7.73 MiB | 17.78 MiB/s, done.
Resolving deltas: 100% (404/404), done.


In [2]:
!pip install deslib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
from MDS_CPDP.mdscpdp import MDSCPDP

from deslib.des import KNORAU, KNORAE
from deslib.dcs import LCA, OLA

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

import pandas as pd
from glob import glob

import warnings
warnings.filterwarnings("ignore")

In [4]:

# path of datasets to predict
path = '/content/MDS_CPDP/benchmark-execution/benchmarks/datasets/RELINK/*'

# read and create dataframe (dataset) with all projects for predict
dataset = []
for project_url in glob(path):
  productName = project_url.split('/')[len(project_url.split('/'))-1].split('.csv')[0]
  df = pd.read_csv(project_url)
  df.insert(0, 'productName', productName)
  dataset.append(df)
dataset = pd.concat(dataset).reset_index(drop=True)

dataset

Unnamed: 0,productName,isDefective,CountLineCodeExe,AvgCyclomatic,AvgCyclomaticModified,AvgCyclomaticStrict,AvgEssential,AvgLine,AvgLineBlank,AvgLineCode,...,CountStmtDecl,CountStmtExe,MaxCyclomatic,MaxCyclomaticModified,MaxCyclomaticStrict,RatioCommentToCode,SumCyclomatic,SumCyclomaticModified,SumCyclomaticStrict,SumEssential
0,Apache2.0,1,907,12,11,14,4,82,5,61,...,178,738,51,51,56,0.28,221,197,246,81
1,Apache2.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0.00,0,0,0,0
2,Apache2.0,1,1096,8,7,9,4,54,4,49,...,172,877,45,26,45,0.04,256,217,277,128
3,Apache2.0,0,522,5,5,6,2,37,5,27,...,112,414,25,25,30,0.32,134,122,146,63
4,Apache2.0,0,26,2,2,2,1,21,1,9,...,10,20,4,5,4,0.73,9,10,9,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
644,openintents,0,18,1,1,1,1,41,4,29,...,13,14,1,1,1,0.25,1,1,1,1
645,openintents,0,22,7,7,7,1,50,10,38,...,13,18,7,7,7,0.27,7,7,7,1
646,openintents,0,64,1,1,1,1,5,0,5,...,40,66,6,6,7,0.01,41,41,42,23
647,openintents,0,85,7,7,11,1,21,3,17,...,27,90,19,19,31,0.16,47,47,71,6


In [5]:
ds_algorithms = [KNORAU(), KNORAE(), OLA(), LCA()]
clf_estimators = [DecisionTreeClassifier(), LogisticRegression()]
size_pools = [25, 50]

#create MDSCPDP object to predict dataset
obj = MDSCPDP(dataset,
              dynamic_algorithm=ds_algorithms,
              base_estimator=clf_estimators,
              size_pool=size_pools)

#get MDSCPDP performance after predict the dataset. Return a pandas dataframe
obj.performances

Unnamed: 0,productName,percentBugs,dynamic_selection,classifier,size_pool,fscore,auc,pf,gmean,precision,recall,accuracy,tn,fp,fn,tp
0,Apache2.0,50.52,OLA,LogisticRegression,25,0.75,0.75553,0.28125,0.741663,0.735294,0.765306,0.742268,69,27,23,75
1,openintents,39.29,KNORAU,DecisionTreeClassifier,25,0.5,0.83957,0.14706,0.590703,0.642857,0.409091,0.678571,29,5,13,9
2,zxing1.6,29.57,KNORAU,DecisionTreeClassifier,25,0.33493,0.64886,0.19929,0.487339,0.384615,0.29661,0.651629,225,56,83,35


In [6]:
# get MDSCPDP hyperparamters performances by training set. Return a pandas dataframe
obj.vector_models

Unnamed: 0,dynamic_selection,classifier,size_pool,fscore,auc,pf,gmean,precision,recall,accuracy,tn,fp,fn,tp
0,KNORAU,DecisionTreeClassifier,25,0.0,0.421224,0.00317,0.0,0.0,0.0,0.69011,314,1,140,0
1,KNORAU,DecisionTreeClassifier,50,0.0,0.408254,0.00317,0.0,0.0,0.0,0.69011,314,1,140,0
2,KNORAU,LogisticRegression,25,0.013986,0.543401,0.00635,0.084247,0.333333,0.007143,0.69011,313,2,139,1
3,KNORAU,LogisticRegression,50,0.014085,0.517438,0.00317,0.084381,0.5,0.007143,0.692308,314,1,139,1
4,KNORAE,DecisionTreeClassifier,25,0.0,0.409751,0.00952,0.0,0.0,0.0,0.685714,312,3,140,0
5,KNORAE,DecisionTreeClassifier,50,0.0,0.42924,0.00635,0.0,0.0,0.0,0.687912,313,2,140,0
6,KNORAE,LogisticRegression,25,0.027027,0.566757,0.01905,0.118379,0.25,0.014286,0.683516,309,6,138,2
7,KNORAE,LogisticRegression,50,0.014085,0.563265,0.00317,0.084381,0.5,0.007143,0.692308,314,1,139,1
8,OLA,DecisionTreeClassifier,25,0.0,0.511327,0.00317,0.0,0.0,0.0,0.69011,314,1,140,0
9,OLA,DecisionTreeClassifier,50,0.0,0.499161,0.00952,0.0,0.0,0.0,0.685714,312,3,140,0
