# SETUP

In [1]:
from requests import get
import requests
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from IPython.display import HTML
import pyodbc
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from pandas.io.json import json_normalize
import time
from sklearn.model_selection import GridSearchCV

In [3]:

casebase=list()
#Function used retrieve relavant predictions for a given evaluation case. We call the prediction scores "similarity" for reusability reasons. 
#Prediction instances are filtered to ensure that they are relevant to the given case.
def retrieve_c_att22(case):
    global casebase
    cases=casebase
    cases=cases[cases["IndustrySubgroupCode"].astype(float)==float(case["IndustrySubgroupCode"])]
    cases=cases[cases["MunicipalityNumber"].astype(int)==int(case["MunicipalityNumber"])]
    results = cases.apply(pd.to_numeric, errors='coerce').fillna(cases).sort_values(by='similarity', ascending=False)
    results['similarity'] = results['similarity'].astype(float)
    results['NonCompliance'] = results['NonCompliance'].astype(int)
    return results


In [4]:

casebase=list()
initCBCopy=casebase
currentCBCopy=casebase

#Fill casebase in case its empty
dataZero=()
def readData():
    global dataZero
    cnxn = pyodbc.connect('DRIVER={SQL Server Native Client 11.0};SERVER=localhost;DATABASE=IJCAI2022;Trusted_Connection=yes;')
    cursor = cnxn.cursor()
    dataZero=pd.read_sql("SELECT  * "+"FROM [IJCAI2022].[dbo].[BayesianDynamicChecklistLocalDb]"+
                      "left join (select NewID() as new, InspectionId as inspID from [IJCAI2022].[dbo].[BayesianDynamicChecklistLocalDb] group by InspectionId) as a on a.inspId=InspectionId"+
                      " where InspectionDateId<20190601 ORDER BY new",cnxn)
    #Query selects items in random order (via the newID method), grouped by the inspectionIDs. 
    #We are doing crossvalidation with random split based on the indices of the randomly retrieved inspection ids. 
    #Thus, we are evaluating outcomes of inspections. This method also prevents data bleed/leakage.

def FindBestParameters():
    global casebase
    global dataZero
    # Specifying the ODBC driver, server name, database, etc. directly

    cnxn = pyodbc.connect('DRIVER={SQL Server Native Client 11.0};SERVER=localhost;DATABASE=IJCAI2022;Trusted_Connection=yes;')
    cursor = cnxn.cursor()
    data0=pd.read_sql("SELECT  * "+"FROM [IJCAI2022].[dbo].[BayesianDynamicChecklistLocalDb]",cnxn)
    data=pd.DataFrame(data={'NonCompliance':data0['NonCompliance'],'IndustrySubgroupCode':data0['IndustrySubgroupCode'],'ControlPointText': data0["ControlPointText"],"InspectionDateId":data0["InspectionDateId"],"Municipality":data0["Municipality"]})
    cat_vars=['ControlPointText','IndustrySubgroupCode',"Municipality"]
    for var in cat_vars:
        cat_list='var'+'_'+var
        cat_list = pd.get_dummies(data[var], prefix=var)
        #print(cat_list)
        data1=data.join(cat_list)
        data=data1
         #checklist.loc[checklist["AntallBrudd"]==-1,"AntallBrudd"]=0
    cat_vars=['ControlPointText','IndustrySubgroupCode',"Municipality"]
    data_vars=data.columns.values.tolist()
    to_keep=[i for i in data_vars if i not in cat_vars]
    #print(to_keep)
    data=data[to_keep]
    cat_vars=['NonCompliance']
    data_vars=data.columns.values.tolist()
    to_keep=[i for i in data_vars if i not in cat_vars]
    data2=data[to_keep]


    data.loc[np.isnan(data['NonCompliance']),"NonCompliance"]=0
    data.loc[data['NonCompliance']<0,"NonCompliance"]=0
    data.loc[data['NonCompliance']>1,"NonCompliance"]=1

    y=data["NonCompliance"]
    data=data.drop(columns=["NonCompliance"])

    data=data.drop(columns=["InspectionDateId"])
    mlp_gs = MLPClassifier(max_iter=50)
    parameter_space = {
        'hidden_layer_sizes': [(10,),(20,),(50,),(100,)],
        'activation': ['relu','logistic'],
        'alpha': [0.0001, 0.05]
    }
    
    clf = GridSearchCV(mlp_gs, parameter_space, n_jobs=2, cv=4)
    clf.fit(data,y)

    return clf
 
    

def fillCaseBase(index):
    global casebase
    global dataZero
    # Specifying the ODBC driver, server name, database, etc. directly
    data0=dataZero
    data=pd.DataFrame(data={'NonCompliance':data0['NonCompliance'],'IndustrySubgroupCode':data0['IndustrySubgroupCode'],'ControlPointText': data0["ControlPointText"],"InspectionDateId":data0["InspectionDateId"],"Municipality":data0["Municipality"]})
    datatestout=data0.loc[round((len(data0)/8)*index):round((len(data0)/8)*(index+1))]
    #Select independent variable and convert to one-hot vectors
    cat_vars=['ControlPointText','IndustrySubgroupCode',"Municipality"]
    for var in cat_vars:
        cat_list='var'+'_'+var
        cat_list = pd.get_dummies(data[var], prefix=var)
        data1=data.join(cat_list)
        data=data1
    cat_vars=['ControlPointText','IndustrySubgroupCode',"Municipality"]
    data_vars=data.columns.values.tolist()
    to_keep=[i for i in data_vars if i not in cat_vars]
    data=data[to_keep]


    #Making sure that the target variable is binary
    data.loc[np.isnan(data['NonCompliance']),"NonCompliance"]=0
    data.loc[data['NonCompliance']<0,"NonCompliance"]=0
    data.loc[data['NonCompliance']>1,"NonCompliance"]=1

    #retrieving the test-fold out of the 8 crossvalidation folds (rest are training folds.)
    datatest=data.loc[round((len(data)/8)*index):round((len(data)/8)*(index+1))]
    datatesty=datatest["NonCompliance"]
    datatest=datatest.drop(columns=["NonCompliance"])
    #Retrieving the 7 training folds, leaving the test-fold out.
    datacp=data.copy()
    if index>0 and index <7:                   
        datacp=datacp.loc[0:round((len(datacp)/8)*index)-1]
        datacp=datacp.append(data.loc[round((len(data)/8)*(index+1)):len(data)])
    elif index<1:
        datacp=datacp.loc[round((len(datacp)/8))*(index+1):len(datacp)]
    else:
        datacp=datacp.loc[0:round((len(data)/8)*index)]
    
    data=datacp
    #Creating the target variable, dropping unwanted columns from the training set and training the model.    
    y=data["NonCompliance"]
    data=data.drop(columns=["NonCompliance"])
    data=data.drop(columns=["InspectionDateId"])
    datatest=datatest.drop(columns=["InspectionDateId"])
    clf = MLPClassifier(activation='logistic', hidden_layer_sizes=(20,),alpha=0.0001,max_iter=100)
    t=clf.fit(data,y)
    #generate prediction for the test data set. The test data set is used to create and evaluate checklists below.
    tes=clf.predict_proba(datatest)
    tes=(datatestout).assign(similarity=tes[:,1]) #We named the prediction score "similarity" for easy reuse in other scripts.
    tes=tes.assign(NonCompliance=datatesty)
    casebase=tes #Test data set is assigned to "casebase"


# Find the best parameters

In [16]:
start_time = time.time()
clf=FindBestParameters()
traintime=traintime+(time.time()-start_time)
print("Training time: "+ str(traintime))

print(clf.best_params_)
print(clf.best_estimator_)
fh2=open("Log_" + "NNChecklistsBestParameters"+ '.txt', 'w+')
fh2.write("Training time: "+ str(traintime))
fh2.write("Best parameters:"+ str(clf.best_params_))
fh2.write("Best estimator:"+ str(clf.best_estimator_))
fh2.close()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Training time: 214469.08248615265
{'activation': 'logistic', 'alpha': 0.0001, 'hidden_layer_sizes': (20,)}
MLPClassifier(activation='logistic', hidden_layer_sizes=(20,), max_iter=50)


# EXPERIMENT, STATISTICS FOR GENERATED CHECKLISTS

In [5]:

currentCBCopy=initCBCopy
readData()
timetotal=0
traintime=0
traintimetot=0
precctot=0
accctot=0
preccgttot=0
recctot=0
fh2=open("Log_" + "NNChecklists" + '.txt', 'w+')
#for-loop to perform 8-fold crossvalidation. ik is used as an index for the validation folds.
for ik in range(0,8):
    
    start_time = time.time()
    fillCaseBase(ik)
    initCBCopy=casebase
    currentCBCopy=initCBCopy

    traintime=(time.time()-start_time)
    traintimetot=traintimetot+(time.time()-start_time)
    print("Train time: "+str(traintime))
    noncompliance=0 #true positive(for ground truth labels only)
    controlpointsgtcount=0 
    noncompliancengt=0 #true positive (for statistical estimates, see main paper)
    controlpointscount=0 
    truepositiveval=0
    truepositive=0
    falsepositivengt=0
    recallval=0
    accuracyval=0
    precision=0
    precisiongt=0
    lengthprecgt=0
    recall=0
    accuracy=0
    lengthprec=0.00001
    lengthvalprec=0
    lengthrec=0.00001
    lengthvalrec=0
    lengthacc=0.00001
    lengthvalacc=0
    truenegative=0
    falsenegative=0
    similarity=0
    counter=0
    Kcp=15
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', -1)
    noncompliantInspection=0
    inspections=casebase.drop_duplicates(subset = ["InspectionId"])
    cases=casebase
    cases["NonCompliance"]=cases["NonCompliance"].astype(int)
    casestest=len(cases.drop_duplicates(subset = ["ControlPointText"]))
    instanceMatches=list()
    negatives=list()
    inspectionstot=0
    precchecklists=0
    recchecklists=0
    accchecklists=0
    precchecklistsval=0
    recchecklistsval=0
    accchecklistsval=0
    print(casestest)
    #Iterate through all the past inspections in the data set. 
    #"case" contains a single inspection (which in practice consists of a set of instances in the data set).
    for ind, case in inspections.iterrows():
   
        inspectionstot+=1
        uniquechecklistlengthval=0
        instanceMatches=list()
        counter+=1
        if counter>500:
            counter=0
            print(ind)
            


        #Retrieve the existing checklist. ExistingChecklist contains the checklist used in the current inspection.
        existingChecklist=cases[cases['InspectionId']==case['InspectionId']]
        #filter validation instances (cases) so that they match the industry code and municipality of the current inspection.
        filteredCases=cases[cases["IndustrySubgroupCode"].astype(float)==float(case["IndustrySubgroupCode"])]
        filteredCases=filteredCases[filteredCases["MunicipalityNumber"].astype(int)==int(case["MunicipalityNumber"])]
        filteredCases["NonCompliance"]=filteredCases["NonCompliance"].astype(float)
        #Temporary assigning the filtered cases. Another filter is applied further down to filter out positive cases.
        negatives=filteredCases
        if len(filteredCases)<=0:#Fail-safe mechanism. filteredCases should in practice never be 0.
            continue
        
        
        #Retrieve a constructed checklist
        checklist=retrieve_c_att22(case)#Retrieve control points that matches each inspection (input case). checklist=CL from paper

        #Drop any retrieved duplicates and remove select the top (Kcp) items with highest prediction score
        uniqueChecklist=checklist.drop_duplicates(subset = ["ControlPointText"])#Find all unique control points by removing duplicates
        uniqueChecklist=uniqueChecklist.head(Kcp)
        if len(uniqueChecklist)>(0):#Safe check to make sure that somethingis on the checklist.
            truepositive=0
            truepositiveval=0
            similarity+=uniqueChecklist["similarity"].sum() #Sums prediction score (prediction score=similarity)
            #Record statistics. sums up the ratio/fraction of non-compliance for each of the (unique) item in the checklist.
            precpercl=0
            lengthorgcl=0
            #Iterate through all the items on a constructed checklist
            for ind2, generatedChecklistControlpoint in uniqueChecklist.iterrows():#Find overlap between the existing and new generated checklist
                #find the predicted negative records for the constructed checklist. uniqueChecklist contains all the predicted positives.
                negatives=negatives[negatives["ControlPointText"]!=generatedChecklistControlpoint["ControlPointText"]]
                #excp contains the items that can be found on both the existing checklist and the generated checklist
                excp=existingChecklist[existingChecklist["ControlPointText"]==generatedChecklistControlpoint["ControlPointText"]]
                #calculate ground truth precision for if excp contains an checklist item.
                if len(excp)>0:
                    precpercl+=((excp["NonCompliance"].sum())/len(excp["NonCompliance"])) #used to calculate average ground truth precision
                    lengthorgcl+=1 #used to calculate average ground truth precision 
                    controlpointsgtcount+=1 #Used to find average number of items per checklist
                controlpointscount+=1
                #groups instances from the vaildation set by items and selects instances that matches the current item (generatedChecklistControlPoint)
                instancesMatchingCurrentGenClItem=filteredCases.groupby(["ControlPointText"],as_index=False).mean()
                instancesMatchingCurrentGenClItem=instancesMatchingCurrentGenClItem[instancesMatchingCurrentGenClItem["ControlPointText"]==generatedChecklistControlpoint["ControlPointText"]]
                #Finds the total number of instances in the validation set with positive labels.
                summ2=instancesMatchingCurrentGenClItem["NonCompliance"].sum() 
                #Calculate the number of positive instances in the validation set (that matches the item), divided by the total number of instances.
                #the expression summ2/len(instancesMatchingCurrentGenClItem["NonCompliance"]) is a float-number between [0,1]
                if len(instancesMatchingCurrentGenClItem["NonCompliance"])>0:
                    noncompliance+=(summ2/len(instancesMatchingCurrentGenClItem["NonCompliance"]))
                    truepositiveval+=(summ2/len(instancesMatchingCurrentGenClItem["NonCompliance"]))
                #test if there are at least 1 instance in the validation set that matches the current inspection and checklist item.
                matchlen=len(instancesMatchingCurrentGenClItem["NonCompliance"])
                if matchlen>0:
                    lengthvalprec+=1
                    uniquechecklistlengthval+=1
                
                if matchlen==0:#expression is true if validation set record of checklist item does not exist. This expression never comes true for any method other than BCBR/CBCBR.
                    instanceMatches=cases[cases["IndustrySubgroupCode"].astype(float)==float(generatedChecklistControlpoint["IndustrySubgroupCode"])]
                    instanceMatches=instanceMatches[instanceMatches["MunicipalityNumber"].astype(int)==int(generatedChecklistControlpoint["MunicipalityNumber"])]
                    instanceMatches=instanceMatches[instanceMatches["ControlPointText"]==generatedChecklistControlpoint["ControlPointText"]]
                    instanceMatches["NonCompliance"]=instanceMatches["NonCompliance"].astype(float)

                    matchesSum=instanceMatches["NonCompliance"].sum()


                    falsepositivengt+=(len(instanceMatches["NonCompliance"])-matchesSum)
                    if len(instanceMatches["NonCompliance"])>0:
                        noncompliancengt+=(matchesSum/len(instanceMatches["NonCompliance"]))
                        truepositive+=(matchesSum/len(instanceMatches["NonCompliance"]))
                        precision+=(matchesSum/len(instanceMatches["NonCompliance"]))
                        lengthprec+=1
                #End of for-loop
            if lengthorgcl>0:
                precisiongt+=(precpercl/lengthorgcl)
                lengthprecgt+=1      

            #Calculate statistics on checklists level
            uniquenegatives=len(negatives.drop_duplicates(subset = ["ControlPointText"]))
            negativecpy=negatives.copy()
            negativecpy["NonCompliance"]=negativecpy["NonCompliance"].astype(float)
            groupedby=negativecpy.groupby(["ControlPointText"],as_index=False).count()
            groupedbys=negativecpy.groupby(["ControlPointText"],as_index=False).sum()
            groupedby["NonCompliance"]=groupedbys["NonCompliance"]/groupedby["NonCompliance"]
            uniquecalcnegatives=groupedby.drop_duplicates(subset = ["ControlPointText"])
            noncompliancenegatives=uniquecalcnegatives["NonCompliance"].sum()

            falsenegativeelement=0
            if uniquenegatives>0:
                falsenegativeelement=noncompliancenegatives/uniquenegatives #To avoid selection bias effects
            truenegativeelement=1-falsenegativeelement

            truepositivesprchecklistval=(truepositiveval)
            falsepositivesprchecklistval=(uniquechecklistlengthval-truepositivesprchecklistval)

            truepositivesprchecklist=(truepositive+truepositiveval)
            falsepositivesprchecklist=(len(uniqueChecklist)-(truepositive+truepositiveval))
            truenegativesprchecklist=truenegativeelement*uniquenegatives
            falsenegativesprchecklist=falsenegativeelement*uniquenegatives


            precprchecklist=truepositivesprchecklist/len(uniqueChecklist)
            precchecklists+=precprchecklist
            precprchecklistval=0
            if uniquechecklistlengthval>0:
                precprchecklistval=truepositivesprchecklistval/uniquechecklistlengthval
            precchecklistsval+=precprchecklistval

            recprchecklist=0
            if (truepositivesprchecklist+falsenegativesprchecklist)>0:
                recprchecklist=truepositivesprchecklist/(truepositivesprchecklist+falsenegativesprchecklist)
            recchecklists+=recprchecklist

            recprchecklistval=0
            if (truepositivesprchecklistval+falsenegativesprchecklist)>0:
                recprchecklistval=truepositivesprchecklistval/(truepositivesprchecklistval+falsenegativesprchecklist)
            recchecklistsval+=recprchecklistval

            accprchecklist=0
            if (len(uniqueChecklist)+uniquenegatives)>0:
                accprchecklist=(truepositivesprchecklist+truenegativesprchecklist)/(len(uniqueChecklist)+uniquenegatives)
            accchecklists+=accprchecklist

            accprchecklistval=0
            if (uniquechecklistlengthval+uniquenegatives)>0:
                accprchecklistval=(truepositivesprchecklistval+truenegativesprchecklist)/(uniquechecklistlengthval+uniquenegatives)
            accchecklistsval+=accprchecklistval

            #|true positives for each checklist|=sum(true positive elements in checklist)
            #|false positives for each checklist|=sum(false positive elements in checklist)
            #|true negatives for each checklist|=|true unique negative elements not in checklist|
            #|false negatives for each checklist|=|false unique negative elements not in checklist|
        #End of inner for loop
    #Recording statistics for the current cross-validation fold.
    if inspectionstot>0:
        precctot+=precchecklists/inspectionstot
        accctot+=accchecklists/inspectionstot
        preccgttot+=precisiongt/lengthprecgt
        recctot+=recchecklists/inspectionstot
    timetotal=timetotal+(time.time()-start_time)
    print("Current avg time:"+ str(timetotal/(ik+1)))
    
    print("Precision (gt): "+str(precisiongt/lengthprecgt))
    print("Precision: "+str((precchecklists)/(inspectionstot)))
    print("Recall: "+str((recchecklists)/(inspectionstot)))
    print("Accuracy: "+str((accchecklists)/(inspectionstot)))
    
    #Additional information
    print("Average number of items per checklist(gt): "+str(controlpointsgtcount/inspectionstot))
    print("Average number of items per checklist: "+str((controlpointscount)/inspectionstot))
    print("Average number of non-compliant items per checklist(gt): "+str(precisiongt/inspectionstot))
    print("Average number of non-compliant items per checklist: "+str((noncompliance+noncompliancengt)/inspectionstot))

    #number of common control points between CBR generated and original lists divided by the number of control points in the CBR generated list

    print("Average similarity: "+str(similarity/(lengthprecgt+lengthvalprec)))
    print("\nTrain time: "+str(traintime))



print("\nAverage Traintime: "+str(traintimetot/8))                                
print("\nAverage Accuracy: "+str(accctot/8))
print("\nAverage Prec: "+str(precctot/8))
print("\nAverage Rec: "+str(recctot/8))
print("\nAverage Precgt: "+str(preccgttot/8))
fh2.write("\nAverage Traintime: "+str(traintimetot/8))                                
fh2.write("\nAverage Accuracy: "+str(accctot/8))
fh2.write("\nAverage Prec: "+str(precctot/8))
fh2.write("\nAverage Rec: "+str(recctot/8))
fh2.write("\nAverage Precgt: "+str(preccgttot/8))
fh2.close()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


0
138939




Current avg. training time:12262.864439725876
944


  pd.set_option('display.max_colwidth', -1)


9401
19030
28295
37591
46953
56085
65396
74848
84197
93589
102919
112240
121465
130658
Current avg time:12830.108131885529
Precision (gt): 0.2572581136860293
Precision(val): 0.2905995349504363
Precision: 0.2905995349504363
Recall(val): 0.6819022720078182
Recall: 0.6819022720078182
Accuracy(val): 0.5243886898950308
Accuracy: 0.5243886898950308
Average number of control points per checklist(gt): 9.737238044062332
Average number of control points per checklist: 14.250268672756583
Average number of non-compliant control points per checklist(gt): 0.24402196947031876
Average number of non-compliant control points per checklist(val): 4.202341105514333
Average number of non-compliant control points per checklist: 4.202341105514333
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6833020673271807
Average similarity: 0.2838827172708729


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


138939
277878




Current avg. training time:12530.290630459785
971


  pd.set_option('display.max_colwidth', -1)


148302
157969
166907
175988
185323
194698
204186
213882
223042
232448
241653
250747
259899
269053
Current avg time:13096.317440271378
Precision (gt): 0.25711664815148677
Precision(val): 0.29051473010818973
Precision: 0.29051473010818973
Recall(val): 0.6858006358325665
Recall: 0.6858006358325665
Accuracy(val): 0.5206322662646964
Accuracy: 0.5206322662646964
Average number of control points per checklist(gt): 9.648811114079615
Average number of control points per checklist: 14.172855997862676
Average number of non-compliant control points per checklist(gt): 0.24303465165908633
Average number of non-compliant control points per checklist(val): 4.173181461813473
Average number of non-compliant control points per checklist: 4.173181461813473
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6807951139512526
Average similarity: 0.2859854134628234


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


277878
416816




Current avg. training time:12612.616060495377
964


  pd.set_option('display.max_colwidth', -1)


286694
296138
305186
314684
324140
333702
343133
352589
362086
371035
380392
389757
399299
408667
Current avg time:13133.242645661036
Precision (gt): 0.2535781647016315
Precision(val): 0.2836596695282049
Precision: 0.2836596695282049
Recall(val): 0.6884491655876447
Recall: 0.6884491655876447
Accuracy(val): 0.5225163521333075
Accuracy: 0.5225163521333075
Average number of control points per checklist(gt): 9.782118405155055
Average number of control points per checklist: 14.21103503826017
Average number of non-compliant control points per checklist(gt): 0.2411188267662312
Average number of non-compliant control points per checklist(val): 4.081797237475254
Average number of non-compliant control points per checklist: 4.081797237475254
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6883466530635379
Average similarity: 0.2853856237109038


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


416816
555755




Current avg. training time:12592.174218177795
966


  pd.set_option('display.max_colwidth', -1)


425949
435293
444632
453914
463145
472535
481779
490910
500135
509500
518525
528088
537510
546681
Current avg time:13107.056175291538
Precision (gt): 0.2519243342858724
Precision(val): 0.28377082461827524
Precision: 0.28377082461827524
Recall(val): 0.6912171172229014
Recall: 0.6912171172229014
Accuracy(val): 0.5162802674018667
Accuracy: 0.5162802674018667
Average number of control points per checklist(gt): 9.791527907286532
Average number of control points per checklist: 14.207406420674037
Average number of non-compliant control points per checklist(gt): 0.24115202693196738
Average number of non-compliant control points per checklist(val): 4.087503197017904
Average number of non-compliant control points per checklist: 4.087503197017904
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6891847545825325
Average similarity: 0.2806198158596847


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


555755
694694




Current avg. training time:12624.896219444276
977


  pd.set_option('display.max_colwidth', -1)


564941
574079
583594
592591
602070
611362
620548
629859
639234
648639
657848
666961
676410
685834
Current avg time:13123.53253312111
Precision (gt): 0.2566672675842544
Precision(val): 0.29637158910129396
Precision: 0.29637158910129396
Recall(val): 0.6822679705447489
Recall: 0.6822679705447489
Accuracy(val): 0.5255838566539912
Accuracy: 0.5255838566539912
Average number of control points per checklist(gt): 9.579509071504802
Average number of control points per checklist: 14.154082177161152
Average number of non-compliant control points per checklist(gt): 0.24252591466105572
Average number of non-compliant control points per checklist(val): 4.26670069858479
Average number of non-compliant control points per checklist: 4.26670069858479
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6768018548713937
Average similarity: 0.28800364237722675


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


694694
833632




Current avg. training time:12653.887159744898
983


  pd.set_option('display.max_colwidth', -1)


703729
713143
722308
731640
740712
749479
758415
767757
776813
786167
795637
805098
814243
823535
832505
Current avg time:13142.628243605295
Precision (gt): 0.2561926546253888
Precision(val): 0.2922806497264695
Precision: 0.2922806497264695
Recall(val): 0.6899872654209647
Recall: 0.6899872654209647
Accuracy(val): 0.5179638523895288
Accuracy: 0.5179638523895288
Average number of control points per checklist(gt): 9.718094232545862
Average number of control points per checklist: 14.150191368615546
Average number of non-compliant control points per checklist(gt): 0.24378369273446657
Average number of non-compliant control points per checklist(val): 4.204289450627621
Average number of non-compliant control points per checklist: 4.204289450627621
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6867818236084167
Average similarity: 0.288262150311963


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


833632
972571




Current avg. training time:12666.040735040393
978


  pd.set_option('display.max_colwidth', -1)


843054
852324
861680
871066
879924
889241
898758
908130
917542
927136
936363
945678
955216
964778
Current avg time:13164.5820750509
Precision (gt): 0.2543678931434775
Precision(val): 0.28695824831217326
Precision: 0.28695824831217326
Recall(val): 0.6906393512317762
Recall: 0.6906393512317762
Accuracy(val): 0.5239936758833563
Accuracy: 0.5239936758833563
Average number of control points per checklist(gt): 9.710137133638074
Average number of control points per checklist: 14.203011562247916
Average number of non-compliant control points per checklist(gt): 0.24229584873911514
Average number of non-compliant control points per checklist(val): 4.139444257134749
Average number of non-compliant control points per checklist: 4.139444257134749
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6836674807368281
Average similarity: 0.28587921242718267


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


972571
1111510




Current avg. training time:12671.418245702982
969


  pd.set_option('display.max_colwidth', -1)


981668
990870
999821
1008750
1017812
1026757
1035910
1045148
1054309
1063594
1072893
1081903
1091378
1100560
1110012
Current avg time:13181.992590785027
Precision (gt): 0.25894127787994564
Precision(val): 0.2864673392479684
Precision: 0.2864673392479684
Recall(val): 0.6867073193898997
Recall: 0.6867073193898997
Accuracy(val): 0.5195933865553424
Accuracy: 0.5195933865553424
Average number of control points per checklist(gt): 9.683774180813264
Average number of control points per checklist: 14.132780629030135
Average number of non-compliant control points per checklist(gt): 0.24834373380563807
Average number of non-compliant control points per checklist(val): 4.110320161109689
Average number of non-compliant control points per checklist: 4.110320161109689
Percentage of control points in the intersection between the CBR generated and the original checklists: 0.6851994971832953
Average similarity: 0.2828139821132096
