In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [56]:
def evaluate_rule_conditions(row, condition_part):

    # Checks if any of the conditions in the rule are satisfied
    if all(check_condition(row, part) for part in condition_part.split(" and ")):
        return True  # Return True if all conditions in the rule are satisfied
    
    return False  # Return False if any of the conditions in the rule is not satisfied


def check_condition(row, condition_part):
    # Check if a single condition part is satisfied
    
    #parts = [part.strip('()') for part in condition_part.split()]
    #parts = [part for part in condition_part.split()]
    # Use regular expressions to properly parse the condition
    parts = re.split(r'\s*(==|<=|>=|<|>|!=)\s*', condition_part)
    
    #print("parts: ", parts)
    if len(parts) == 3:
        column, op, value = parts
        return eval(f"{row[column]} {op} {value}")
    # handle the case of a 2-thresholds conditions of the kind: a < Column <= b
    elif len(parts) == 5:
        val1,op1,column,op2,val2 = parts
        # Use the original condition from the rule
        return eval(f"{val1} {op1} {row[column]} {op2} {val2}")        
    else:
        RaiseValueError("Bad condition formatting!")


def evaluate_rules(data, N1, tuned_rules, output, dataset_name = None):

    idx_rules = 0
    satisfiedMat = np.zeros((len(data),len(tuned_rules)))
    coveringlist = []
    errorlist = []
    precisionlist = []
    for i, rule in tuned_rules.iterrows():
        pred_rule = []
        tptot = 0
        tntot = 0
        fntot = 0
        fptot = 0
        
        idx_data = 0
        tuned_antecedent = rule['Rule'].strip()
        print(f"rule: {tuned_antecedent}")
        for j, row in data.iterrows():
            print(row)
            y_row = row[output]
            tp = 0
            tn = 0
            fn = 0
            fp = 0
            # check if the point row satifies rule 
            if evaluate_rule_conditions(row, tuned_antecedent):
                # rule is satisfied
                satisfied = True              
                satisfiedMat[idx_data,idx_rules] = 1
                print("satisfied")
            else:
                satisfied = False
                print("not satisfied")
    

            if satisfied and y_row == 1:
                tp=1
            elif satisfied and y_row == 0:
                fp=1
            elif not satisfied and y_row == 0:
                tn=1
            
            elif not satisfied and y_row == 1:
                fn=1

            tptot+=tp
            tntot+=tn
            fptot+=fp
            fntot+=fn
            idx_data+=1
        if (tptot+fptot) == 0 and (fptot+tntot)==0: 
            coveringlist.append(tptot/(tptot+fntot))
            errorlist.append(0)
            precisionlist.append(0)
            print(f"tp = {tptot}, fp = {fptot}, tn = {tntot}, fn = {fntot}")
            print(f"covering = {cov}, error = 0, precision = 0")
            #continue
        elif (tptot+fptot) == 0:
            err = fptot/(tntot+fptot)
            coveringlist.append(tptot/(tptot+fntot))
            print(f"tp = {tptot}, fp = {fptot}, tn = {tntot}, fn = {fntot}")
            print(f"covering = {cov}, error = {err}, precision = 0")
            errorlist.append(err)
            precisionlist.append(0)
            #continue
        elif (tntot+fptot)==0:
            errorlist.append(0)
            cov = tptot/(tptot+fntot)
            prec = tptot/(tptot+fptot)
            print(f"tp = {tptot}, fp = {fptot}, tn = {tntot}, fn = {fntot}")
            print(f"covering = {cov}, error = 0, precision = {prec}")
            coveringlist.append(cov)
            precisionlist.append(prec)
            #continue
        elif (tptot+fntot)==0:
            coveringlist.append(0)
            err = fptot/(tntot+fptot)
            prec = tptot/(tptot+fptot)
            print(f"tp = {tptot}, fp = {fptot}, tn = {tntot}, fn = {fntot}")
            print(f"covering = 0, error = {err}, precision = {prec}")
            errorlist.append(err)
            precisionlist.append(prec)
            #continue
        else:
            
            #cov = tptot/N1
            cov = tptot/(tptot+fntot)
            err = fptot/(tntot+fptot)
            prec = tptot/(tptot+fptot)
            print(f"tp = {tptot}, fp = {fptot}, tn = {tntot}, fn = {fntot}")
            print(f"covering = {cov}, error = {err}, precision = {prec}")
            coveringlist.append(cov)
            errorlist.append(err)
            precisionlist.append(prec)
            #continue
        idx_rules+=1
        print("\n")
    
    return satisfiedMat, coveringlist, precisionlist, errorlist




In [23]:
def ReformattingRuleset(rules, output_label):
    #print(output_label)
    # adjust columns values
    for i in range(len(rules)+1):   
        rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("RULE {}: ".format(i),""))
    rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("AND","and"))
    rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("{",""))
    rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("}",""))
    rules["Rule"] = rules["Rule"].apply(lambda x: x.replace(f"{output_label} in ",f"{output_label} = "))
    rules["Covering"] = rules["Covering"].apply(lambda x: x.replace("COVERING: ",""))
    rules["Error"] = rules["Error"].apply(lambda x: x.replace("ERROR: ",""))
    rules['Output'] = rules['Rule'].str.extract(rf'{output_label} = (\d+)', expand=False).astype(int)
    rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("IF ",""))
    rules["Rule"] = rules["Rule"].apply(lambda x: x.replace(x[x.find("THEN"):],""))
    
    return rules

In [4]:
def calculate_metrics(data, output_label, reapply_on_gt = False, gt_label = None):
    #print(data[output_label])
    #print(data["pred("+ output_label+ ")"])
    if reapply_on_gt:
        cm = confusion_matrix(data[gt_label], data["pred("+ output_label+ ")"], labels=[0, 1])
    else:
        cm = confusion_matrix(data[output_label], data["pred("+ output_label+ ")"], labels=[0, 1])
    #print(cm)
    TN, FP, FN, TP = cm.ravel()
    FNR = FN / (FN + TP) if TP + FN != 0 else 0
    TNR = TN / (TN + FP) if TN + FP != 0 else 0
    FPR = FP / (FP + TN) if TN + FP != 0 else 0
    TPR = TP / (TP + FN) if TP + FN != 0 else 0
    Acc = (TP + TN) / (TP + TN + FP + FN)
    f1score = (2 * TP) / (2 * TP + FP + FN)
    PPV = TP / (TP + FP) if TP + FP != 0 else 0
    NPV = TN / (TN + FN) if TN + FN != 0 else 0
    return FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV

In [6]:
datasetlist = ['ssh', 'p2p', 'smoking', 'cardio', 'platooning', 'rul', 'eeg', 'mqttset', 'telescope', 'fire']
outputlabels = ['g', 'g', 'smoking', 'cardio', 'collision', 'RUL_binary', 'eyeDetection', 'output', 'class', 'Fire Alarm']

# Performance of the LLM 

**1 ) Original test data;**

**2) New rules trained on CCS points, with original output labels**

**3) New rules trained with respect to belonging or not to CCS**


In [None]:

data = []

for dataset_name, output_label in zip(datasetlist, outputlabels):
    dataORIG = pd.read_excel(f"results_rev_paper_20240325/results_eps005_{dataset_name}.xlsx")
    FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV = calculate_metrics(dataORIG, output_label)
    data.append([dataset_name, 'Original', FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV])

    dataCCS = pd.read_excel(f"results_rev_paper_20240325/prove_ccs/{dataset_name}_ccs_testset.xlsx")
    FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV = calculate_metrics(dataCCS, output_label)
    data.append([dataset_name, 'New rules on ccs for original output', FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV])

    dataCCS2 = pd.read_excel(f"results_rev_paper_20240325/prove_ccs/{dataset_name}_testset.xlsx")
    FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV = calculate_metrics(dataCCS2, "ConformalPoint")
    data.append([dataset_name, 'New rules for ccs output', FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV])

df = pd.DataFrame(data, columns=['Dataset', 'Rules', 'FNR', 'TNR', 'TPR', 'FPR', 'Accuracy', 'F1 Score', 'Precision', 'NPV'])

# Save DataFrame to an Excel file
#df.to_excel("metrics_table.xlsx", index=False)

In [15]:
df.to_excel("results_rev_paper_20240325/prove_ccs/metrics_LLM_CCS.xlsx", index=False)

**Original rules vs rules predicting ccs/non ccs evaluated on ccs/non ccs labels or ground truth** 

In [17]:
data = []

for dataset_name, output_label in zip(datasetlist, outputlabels):
    dataORIG = pd.read_excel(f"results_rev_paper_20240325/results_eps005_{dataset_name}.xlsx")
    FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV = calculate_metrics(dataORIG, output_label)
    data.append([dataset_name, 'Original', FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV])

    dataCCS2 = pd.read_excel(f"results_rev_paper_20240325/prove_ccs/{dataset_name}_testset.xlsx")
    FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV = calculate_metrics(dataCCS2, "ConformalPoint")
    data.append([dataset_name, 'New rules for ccs output', FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV])

    dataCCS3 = pd.read_excel(f"results_rev_paper_20240325/prove_ccs/{dataset_name}_testset.xlsx")
    FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV = calculate_metrics(dataCCS3, "ConformalPoint", reapply_on_gt = True, gt_label = output_label)
    data.append([dataset_name, 'New rules for ccs output - test on original output', FNR, TNR, TPR, FPR, Acc, f1score, PPV, NPV])

df2 = pd.DataFrame(data, columns=['Dataset', 'Rules', 'FNR', 'TNR', 'TPR', 'FPR', 'Accuracy', 'F1 Score', 'Precision', 'NPV'])
df2

Unnamed: 0,Dataset,Rules,FNR,TNR,TPR,FPR,Accuracy,F1 Score,Precision,NPV
0,ssh,Original,0.180249,0.941411,0.819751,0.058589,0.8808,0.87265,0.932846,0.84027
1,ssh,New rules for ccs output,0.151596,0.953526,0.848404,0.046474,0.914,0.881215,0.916667,0.912577
2,ssh,New rules for ccs output - test on original ou...,0.358268,0.955285,0.641732,0.044715,0.796,0.761682,0.936782,0.720859
3,p2p,Original,0.0,0.972898,1.0,0.027102,0.9864,0.986535,0.973427,1.0
4,p2p,New rules for ccs output,0.0,0.953878,1.0,0.046122,0.978,0.979401,0.959633,1.0
5,p2p,New rules for ccs output - test on original ou...,0.0,0.924797,1.0,0.075203,0.963,0.964862,0.93211,1.0
6,smoking,Original,0.187746,0.650419,0.812254,0.349581,0.708,0.664368,0.562038,0.862495
7,smoking,New rules for ccs output,0.152,0.766857,0.848,0.233143,0.777,0.487356,0.341935,0.972464
8,smoking,New rules for ccs output - test on original ou...,0.445087,0.819572,0.554913,0.180428,0.728,0.585366,0.619355,0.776812
9,cardio,Original,0.307516,0.754519,0.692484,0.245481,0.722,0.723108,0.756565,0.690119


In [19]:
df2.to_excel("results_rev_paper_20240325/prove_ccs/metrics_LLM_CCS_v2.xlsx", index=False)

## Rule-by-rule performance for rules predicting the $\in \mathcal{S}_\varepsilon$ class

**1) Conformal Output**

In [None]:
rulevalid = []
dataset_name = "ssh"
output_label = "g"
#for dataset_name, output_label in zip(datasetlist, outputlabels):
print(f"DATASET: {dataset_name.upper()}")
data = pd.read_excel(f"results_rev_paper_20240325/prove_ccs/{dataset_name}_testset.xlsx")

rules = pd.read_csv(f"results_rev_paper_20240325/prove_ccs/ruleset_ccs/{dataset_name}_ccs_approx_rules.csv", header=None, names=["Rule", "Covering", "Error"])
rules = ReformattingRuleset(rules, "ConformalPoint")

N1 = len(data[data[output_label] == 1])

rules_targetcls = rules[rules["Output"]==1]

_, coveringvaluesCCS,precisionCCS,errorCCS = evaluate_rules(data, N1, rules_targetcls, "ConformalPoint",dataset_name = dataset_name)
_, coveringvaluesCCS2,precisionCCS2,errorCCS2 = evaluate_rules(data, N1, rules_targetcls, output_label,dataset_name = dataset_name)
    
df = pd.DataFrame(data = zip([dataset_name]*len(rules_targetcls),list(rules_targetcls["Rule"]),coveringvaluesCCS,coveringvaluesCCS2,precisionCCS,precisionCCS2,errorCCS,errorCCS2), columns = ["Dataset","Rule","CoveringCCS","CoveringGT","PrecisionCCS","PrecisionGT","ErrorCCS","ErrorGT"])
rulevalid.append(df)

mA                      -1.128277
mQ                       2.654966
mDt                      0.196057
vA                      -1.023756
vQ                       3.374217
vDt                     -0.115699
sA                       1.241052
sQ                       1.102082
sDt                     -0.785768
kA                       1.525656
kQ                      -0.083620
kDt                     -0.488013
g                        0.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.072313
rule(ConformalPoint)     5.000000
nrule(ConformalPoint)    1.000000
err(ConformalPoint)      0.000000
rule-1                   5.000000
out-1                    0.000000
conf-1                   0.072313
rule-2                        NaN
out-2                         NaN
conf-2                        NaN
rule-3                        NaN
out-3                         NaN
conf-3                        NaN
rule-4                        NaN
out-4         

mA                       0.993238
mQ                      -0.061212
mDt                      0.072245
vA                       1.991709
vQ                      -0.422519
vDt                     -0.115820
sA                      -0.513426
sQ                      -0.566490
sDt                     -0.649468
kA                      -0.653021
kQ                      -0.083620
kDt                     -0.442791
g                        0.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.292197
rule(ConformalPoint)     1.000000
nrule(ConformalPoint)    2.000000
err(ConformalPoint)      0.000000
rule-1                   1.000000
out-1                    0.000000
conf-1                   0.232592
rule-2                   6.000000
out-2                    0.000000
conf-2                   0.059605
rule-3                        NaN
out-3                         NaN
conf-3                        NaN
rule-4                        NaN
out-4         

mA                        0.931176
mQ                        2.387281
mDt                       0.265229
vA                        1.108549
vQ                       -0.366184
vDt                      -0.115434
sA                       -1.000860
sQ                       -0.323633
sDt                      -0.825461
kA                       -1.074983
kQ                       -0.083620
kDt                      -0.501638
g                         1.000000
ConformalPoint            0.000000
pred(ConformalPoint)      1.000000
conf(ConformalPoint)      0.747275
rule(ConformalPoint)      9.000000
nrule(ConformalPoint)     4.000000
err(ConformalPoint)       1.000000
rule-1                    9.000000
out-1                     1.000000
conf-1                    0.397849
rule-2                   10.000000
out-2                     1.000000
conf-2                    0.339758
rule-3                    6.000000
out-3                     0.000000
conf-3                    0.059605
rule-4              

mA                       -0.986416
mQ                        0.110056
mDt                       0.365674
vA                       -0.864481
vQ                       -0.408873
vDt                      -0.115191
sA                       -1.150356
sQ                       -0.526184
sDt                      -0.852809
kA                       -0.995119
kQ                       -0.083620
kDt                      -0.511678
g                         1.000000
ConformalPoint            1.000000
pred(ConformalPoint)      1.000000
conf(ConformalPoint)      0.737606
rule(ConformalPoint)      9.000000
nrule(ConformalPoint)     2.000000
err(ConformalPoint)       0.000000
rule-1                    9.000000
out-1                     1.000000
conf-1                    0.397849
rule-2                   10.000000
out-2                     1.000000
conf-2                    0.339758
rule-3                         NaN
out-3                          NaN
conf-3                         NaN
rule-4              

mA                       0.714833
mQ                       1.648293
mDt                     -0.301489
vA                       2.012587
vQ                       3.380818
vDt                     -0.116715
sA                       0.961332
sQ                       1.103342
sDt                      0.048909
kA                       0.999824
kQ                      -0.083620
kDt                     -0.136812
g                        0.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.362162
rule(ConformalPoint)     2.000000
nrule(ConformalPoint)    3.000000
err(ConformalPoint)      0.000000
rule-1                   2.000000
out-1                    0.000000
conf-1                   0.230244
rule-2                   5.000000
out-2                    0.000000
conf-2                   0.072313
rule-3                   6.000000
out-3                    0.000000
conf-3                   0.059605
rule-4                        NaN
out-4         

mA                      -0.411962
mQ                      -0.110055
mDt                     -0.042283
vA                      -0.433546
vQ                      -0.421511
vDt                     -0.116201
sA                       0.283712
sQ                      -0.557290
sDt                     -0.620103
kA                       0.484408
kQ                      -0.083620
kDt                     -0.437440
g                        0.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.652239
rule(ConformalPoint)     1.000000
nrule(ConformalPoint)    3.000000
err(ConformalPoint)      0.000000
rule-1                   1.000000
out-1                    0.000000
conf-1                   0.232592
rule-2                   2.000000
out-2                    0.000000
conf-2                   0.230244
rule-3                   3.000000
out-3                    0.000000
conf-3                   0.189402
rule-4                        NaN
out-4         

mA                        2.380510
mQ                       -0.038376
mDt                      -0.034356
vA                        2.388833
vQ                       -0.415407
vDt                      -0.116216
sA                       -0.330831
sQ                       -0.473698
sDt                      -0.589940
kA                        0.084905
kQ                       -0.083620
kDt                      -0.425631
g                         1.000000
ConformalPoint            1.000000
pred(ConformalPoint)      1.000000
conf(ConformalPoint)      0.030194
rule(ConformalPoint)     13.000000
nrule(ConformalPoint)     1.000000
err(ConformalPoint)       0.000000
rule-1                   13.000000
out-1                     1.000000
conf-1                    0.030194
rule-2                         NaN
out-2                          NaN
conf-2                         NaN
rule-3                         NaN
out-3                          NaN
conf-3                         NaN
rule-4              

mA                      -0.967078
mQ                       0.620687
mDt                      0.010592
vA                      -1.301651
vQ                       0.853710
vDt                     -0.115996
sA                       1.080384
sQ                       2.248317
sDt                     -0.580587
kA                       1.596252
kQ                      -0.083620
kDt                     -0.416140
g                        0.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.072313
rule(ConformalPoint)     5.000000
nrule(ConformalPoint)    1.000000
err(ConformalPoint)      0.000000
rule-1                   5.000000
out-1                    0.000000
conf-1                   0.072313
rule-2                        NaN
out-2                         NaN
conf-2                        NaN
rule-3                        NaN
out-3                         NaN
conf-3                        NaN
rule-4                        NaN
out-4         

mA                       0.480656
mQ                       0.150018
mDt                      0.309637
vA                       2.244052
vQ                      -0.407381
vDt                     -0.115247
sA                       2.480527
sQ                      -0.549854
sDt                     -0.832831
kA                       2.841986
kQ                      -0.083620
kDt                     -0.505139
g                        0.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.672719
rule(ConformalPoint)     1.000000
nrule(ConformalPoint)    4.000000
err(ConformalPoint)      0.000000
rule-1                   1.000000
out-1                    0.000000
conf-1                   0.232592
rule-2                   2.000000
out-2                    0.000000
conf-2                   0.230244
rule-3                   4.000000
out-3                    0.000000
conf-3                   0.150278
rule-4                   6.000000
out-4         

mA                        2.319625
mQ                       -0.680947
mDt                      -0.341779
vA                        1.465585
vQ                       -0.413782
vDt                      -0.116739
sA                       -1.051376
sQ                       -0.493910
sDt                       0.408403
kA                       -0.859556
kQ                       -0.083620
kDt                       0.092653
g                         1.000000
ConformalPoint            1.000000
pred(ConformalPoint)      1.000000
conf(ConformalPoint)      0.737606
rule(ConformalPoint)      9.000000
nrule(ConformalPoint)     2.000000
err(ConformalPoint)       0.000000
rule-1                    9.000000
out-1                     1.000000
conf-1                    0.397849
rule-2                   10.000000
out-2                     1.000000
conf-2                    0.339758
rule-3                         NaN
out-3                          NaN
conf-3                         NaN
rule-4              

mA                        0.724008
mQ                        1.097699
mDt                      -0.128239
vA                        0.506083
vQ                       -0.407098
vDt                      -0.116360
sA                       -1.138434
sQ                       -0.523973
sDt                      -0.402243
kA                       -1.114241
kQ                       -0.083620
kDt                      -0.348949
g                         1.000000
ConformalPoint            1.000000
pred(ConformalPoint)      1.000000
conf(ConformalPoint)      0.737606
rule(ConformalPoint)      9.000000
nrule(ConformalPoint)     2.000000
err(ConformalPoint)       0.000000
rule-1                    9.000000
out-1                     1.000000
conf-1                    0.397849
rule-2                   10.000000
out-2                     1.000000
conf-2                    0.339758
rule-3                         NaN
out-3                          NaN
conf-3                         NaN
rule-4              

mA                        2.280525
mQ                        0.028228
mDt                      -0.052396
vA                        2.382781
vQ                       -0.415323
vDt                      -0.116249
sA                        0.062036
sQ                       -0.478211
sDt                      -0.559398
kA                        0.555420
kQ                       -0.083620
kDt                      -0.413887
g                         1.000000
ConformalPoint            1.000000
pred(ConformalPoint)      0.000000
conf(ConformalPoint)      0.159207
rule(ConformalPoint)      3.000000
nrule(ConformalPoint)     2.000000
err(ConformalPoint)       1.000000
rule-1                    3.000000
out-1                     0.000000
conf-1                    0.189402
rule-2                   13.000000
out-2                     1.000000
conf-2                    0.030194
rule-3                         NaN
out-3                          NaN
conf-3                         NaN
rule-4              

mA                      -0.729467
mQ                      -0.451956
mDt                     -0.215428
vA                      -0.571912
vQ                      -0.400328
vDt                     -0.116442
sA                       0.148604
sQ                      -0.531189
sDt                     -0.130848
kA                      -0.129601
kQ                      -0.083620
kDt                     -0.211578
g                        0.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.652239
rule(ConformalPoint)     1.000000
nrule(ConformalPoint)    3.000000
err(ConformalPoint)      0.000000
rule-1                   1.000000
out-1                    0.000000
conf-1                   0.232592
rule-2                   2.000000
out-2                    0.000000
conf-2                   0.230244
rule-3                   3.000000
out-3                    0.000000
conf-3                   0.189402
rule-4                        NaN
out-4         

mA                       0.189406
mQ                      -1.229003
mDt                     -0.320336
vA                      -0.330705
vQ                      -0.412105
vDt                     -0.116700
sA                      -0.342787
sQ                      -0.556335
sDt                      0.288882
kA                      -0.249622
kQ                      -0.083620
kDt                      0.041010
g                        1.000000
ConformalPoint           0.000000
pred(ConformalPoint)     0.000000
conf(ConformalPoint)     0.462837
rule(ConformalPoint)     1.000000
nrule(ConformalPoint)    2.000000
err(ConformalPoint)      0.000000
rule-1                   1.000000
out-1                    0.000000
conf-1                   0.232592
rule-2                   2.000000
out-2                    0.000000
conf-2                   0.230244
rule-3                        NaN
out-3                         NaN
conf-3                        NaN
rule-4                        NaN
out-4         

KeyboardInterrupt: 

In [27]:
ValDf = pd.concat(rulevalid, ignore_index = True)
ValDf

Unnamed: 0,Dataset,Rule,CoveringCCS,CoveringGT,PrecisionCCS,PrecisionGT,ErrorCCS,ErrorGT
0,ssh,sA <= -0.770336,0.702128,0.525591,0.939502,0.950178,0.027244,0.028455
1,ssh,sA <= -0.467081 and sQ > -0.528809,0.587766,0.448819,0.917012,0.946058,0.032051,0.026423
2,ssh,-1.852861 < mQ <= 0.169999 and vA <= 0.536997 ...,0.297872,0.246063,0.751678,0.838926,0.059295,0.04878
3,ssh,-0.433821 < sQ <= 0.880432,0.114362,0.15748,0.52439,0.97561,0.0625,0.004065
4,ssh,mA > 0.732053 and -0.086947 < kA <= 1.001915,0.071809,0.064961,0.613636,0.75,0.027244,0.022358
5,p2p,-0.451570 < sQ <= 1.888989,0.996176,0.996063,0.983019,0.954717,0.018868,0.04878
6,p2p,mQ > -0.385606,0.952199,0.950787,0.972656,0.943359,0.02935,0.058943
7,smoking,age <= 1.103201 and height(cm) > -0.233761 and...,0.264,0.153179,0.423077,0.679487,0.051429,0.038226
8,smoking,-1.796235 < age <= 1.103201 and height(cm) > 0...,0.208,0.104046,0.4,0.553846,0.044571,0.044343
9,smoking,-0.967826 < age <= 1.103201 and height(cm) > -...,0.176,0.124277,0.366667,0.716667,0.043429,0.025994


In [30]:
ValDf.to_excel("results_rev_paper_20240325/prove_ccs/ruleset_ccs/RuleByRuleValidCCS.xlsx", index = False)

## Evaluate the original rules on the (new) test sets

In [None]:
rulevalid = []
#dataset_name = "ssh"
#output_label = "g"
for dataset_name, output_label in zip(datasetlist, outputlabels):
    print(f"DATASET: {dataset_name.upper()}")
    data = pd.read_excel(f"results_rev_paper_20240325/prove_ccs/{dataset_name}_testset.xlsx")
    
    rules = pd.read_csv(f"results_rev_paper_20240325/prove_ccs/ruleset_ccs/{dataset_name}_ccs_approx_rules.csv", header=None, names=["Rule", "Covering", "Error"])
    rules = ReformattingRuleset(rules, "ConformalPoint")
    
    N1 = len(data[data[output_label] == 1])
    
    rules_targetcls = rules[rules["Output"]==1]
    
    _, coveringvaluesCCS,precisionCCS,errorCCS = evaluate_rules(data, N1, rules_targetcls, "ConformalPoint",dataset_name = dataset_name)
    _, coveringvaluesCCS2,precisionCCS2,errorCCS2 = evaluate_rules(data, N1, rules_targetcls, output_label,dataset_name = dataset_name)
        
    df = pd.DataFrame(data = zip([dataset_name]*len(rules_targetcls),list(rules_targetcls["Rule"]),coveringvaluesCCS,coveringvaluesCCS2,precisionCCS,precisionCCS2,errorCCS,errorCCS2), columns = ["Dataset","Rule","CoveringCCS","CoveringGT","PrecisionCCS","PrecisionGT","ErrorCCS","ErrorGT"])
    rulevalid.append(df)