# Parse experiment results
This notebook extracts the results of the experiments from each of the experiment notebooks and summarizes them in a table, which is saved to `results_comparison.csv` together with the results reported by the authors of the paper.

In [2]:
import os 
all_experiment_notebook_paths = [f for f in os.listdir(os.getcwd()) if ("exp" in f and f[5] in str(list(range(2, 7))))]
all_experiment_notebook_paths

['exp002_D22-Wine-Quality-White-3-9-vs-5_Reproduction.ipynb',
 'exp003_D9-Abalone-9-vs-16_Reproduction.ipynb',
 'exp004_D14-Abalone-13-vs-R_Reproduction.ipynb',
 'exp005_D12-Yeast-1-vs-7_Reproduction.ipynb',
 'exp006_D8-Yeast-0-3-5-9-vs-7-8_Reproduction.ipynb']

In [28]:
import pandas as pd
import codecs
import json

current_classifier = None
current_augmentation_method = None
results_list = []

for path in all_experiment_notebook_paths:
    # Read experiment notebook 
    f = codecs.open(path, 'r')
    source = f.read()

    y = json.loads(source)
    f.close()

    
    for cell in y["cells"]:
        if cell['cell_type'] == 'markdown':
            markdown_header = cell['source'][0]
            # Extract classifier name
            if markdown_header[:3] == "## ":
                current_classifier = markdown_header[3:].strip()

            # Extract augmentation method name
            if markdown_header[:4] == "### ":
                if "ADASYN" in markdown_header:
                    current_augmentation_method = "ADASYN"
                if "Random" in markdown_header:
                    current_augmentation_method = "Random Oversampling"
                if "Baseline" in markdown_header:
                    current_augmentation_method = "Baseline"
            
            # Distinguish CFA type
            if markdown_header[:5] == "#### ":
                if "Approach 1" in markdown_header:
                    current_augmentation_method = "CFA_noval"
                elif "Approach 2" in markdown_header:
                    current_augmentation_method = "CFA_val"
                

        # Extract AUC score 
        if cell['cell_type'] == 'code':
            if len(cell['outputs']):
                text_outputs = cell['outputs'][0].get("text", None)
                if text_outputs:
                    for output in text_outputs:
                        if "AUC" in output:
                            results_list.append({
                                "Dataset": path.split("_")[1],
                                "Classifier": current_classifier,
                                "Augmentation Method": current_augmentation_method,
                                "AUC (Ours)": float(output.split()[-1]),
                            })

results_df = pd.DataFrame(results_list)
results_df

Unnamed: 0,Dataset,Classifier,Augmentation Method,AUC (Ours)
0,D22-Wine-Quality-White-3-9-vs-5,Random Forest,Baseline,0.832356
1,D22-Wine-Quality-White-3-9-vs-5,Random Forest,CFA_noval,0.717917
2,D22-Wine-Quality-White-3-9-vs-5,Random Forest,CFA_val,0.832356
3,D22-Wine-Quality-White-3-9-vs-5,Random Forest,ADASYN,0.778991
4,D22-Wine-Quality-White-3-9-vs-5,Random Forest,Random Oversampling,0.799898
5,D22-Wine-Quality-White-3-9-vs-5,Logistic Regression,Baseline,0.766821
6,D22-Wine-Quality-White-3-9-vs-5,Logistic Regression,CFA_noval,0.723013
7,D22-Wine-Quality-White-3-9-vs-5,Logistic Regression,CFA_val,0.763078
8,D22-Wine-Quality-White-3-9-vs-5,Logistic Regression,ADASYN,0.747171
9,D22-Wine-Quality-White-3-9-vs-5,Logistic Regression,Random Oversampling,0.76336


In [29]:
results_df.pivot(index=["Classifier", "Dataset"], columns=["Augmentation Method"], values="AUC (Ours)")[['Baseline', 'ADASYN', 'CFA_val']]

Unnamed: 0_level_0,Augmentation Method,Baseline,ADASYN,CFA_val
Classifier,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Logistic Regression,D12-Yeast-1-vs-7,0.873755,0.867525,0.867344
Logistic Regression,D14-Abalone-13-vs-R,0.734243,0.737987,0.734243
Logistic Regression,D22-Wine-Quality-White-3-9-vs-5,0.766821,0.747171,0.763078
Logistic Regression,D8-Yeast-0-3-5-9-vs-7-8,0.811189,0.806119,0.812104
Logistic Regression,D9-Abalone-9-vs-16,0.937908,0.938532,0.938998
Random Forest,D12-Yeast-1-vs-7,0.852983,0.791815,0.844198
Random Forest,D14-Abalone-13-vs-R,0.739565,0.717229,0.739565
Random Forest,D22-Wine-Quality-White-3-9-vs-5,0.832356,0.778991,0.832356
Random Forest,D8-Yeast-0-3-5-9-vs-7-8,0.823343,0.778921,0.820092
Random Forest,D9-Abalone-9-vs-16,0.895435,0.889868,0.889524


In [19]:
paper_results_list = [
    {
        "Classifier": "Random Forest",
        "Dataset": "D8-Yeast-0-3-5-9-vs-7-8",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.7869,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D8-Yeast-0-3-5-9-vs-7-8",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.8002,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D8-Yeast-0-3-5-9-vs-7-8",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.9373,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D9-Abalone-9-vs-16",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.8897,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D9-Abalone-9-vs-16",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.9169,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D9-Abalone-9-vs-16",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.9939,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D12-Yeast-1-vs-7",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.8489,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D12-Yeast-1-vs-7",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.8529,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D12-Yeast-1-vs-7",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.9827,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D14-Abalone-13-vs-R",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.7497,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D14-Abalone-13-vs-R",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.7627,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D14-Abalone-13-vs-R",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.9801,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D22-Wine-Quality-White-3-9-vs-5",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.8058,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D22-Wine-Quality-White-3-9-vs-5",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.7988,
    }, {
        "Classifier": "Random Forest",
        "Dataset": "D22-Wine-Quality-White-3-9-vs-5",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.9993,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D8-Yeast-0-3-5-9-vs-7-8",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.7919,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D8-Yeast-0-3-5-9-vs-7-8",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.7962,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D8-Yeast-0-3-5-9-vs-7-8",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.7880,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D9-Abalone-9-vs-16",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.9453,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D9-Abalone-9-vs-16",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.9511,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D9-Abalone-9-vs-16",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.9897,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D12-Yeast-1-vs-7",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.8619,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D12-Yeast-1-vs-7",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.8634,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D12-Yeast-1-vs-7",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.9215,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D14-Abalone-13-vs-R",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.7374,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D14-Abalone-13-vs-R",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.7474,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D14-Abalone-13-vs-R",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.8384,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D22-Wine-Quality-White-3-9-vs-5",
        "Augmentation Method": "Baseline",
        "AUC (Paper)": 0.7425,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D22-Wine-Quality-White-3-9-vs-5",
        "Augmentation Method": "ADASYN",
        "AUC (Paper)": 0.6771,
    }, {
        "Classifier": "Logistic Regression",
        "Dataset": "D22-Wine-Quality-White-3-9-vs-5",
        "Augmentation Method": "CFA",
        "AUC (Paper)": 0.6923,
    }
]
paper_results_df = pd.DataFrame(paper_results_list).replace(
    {
        "Baseline": "Baseline (Paper)",
        "ADASYN": "ADASYN (Paper)",
        "CFA": "CFA (Paper)"
    }
)
paper_results_df.pivot(index=["Classifier", "Dataset"], columns=["Augmentation Method"], values="AUC (Paper)")

Unnamed: 0_level_0,Augmentation Method,ADASYN (Paper),Baseline (Paper),CFA (Paper)
Classifier,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Logistic Regression,D12-Yeast-1-vs-7,0.8634,0.8619,0.9215
Logistic Regression,D14-Abalone-13-vs-R,0.7474,0.7374,0.8384
Logistic Regression,D22-Wine-Quality-White-3-9-vs-5,0.6771,0.7425,0.6923
Logistic Regression,D8-Yeast-0-3-5-9-vs-7-8,0.7962,0.7919,0.788
Logistic Regression,D9-Abalone-9-vs-16,0.9511,0.9453,0.9897
Random Forest,D12-Yeast-1-vs-7,0.8529,0.8489,0.9827
Random Forest,D14-Abalone-13-vs-R,0.7627,0.7497,0.9801
Random Forest,D22-Wine-Quality-White-3-9-vs-5,0.7988,0.8058,0.9993
Random Forest,D8-Yeast-0-3-5-9-vs-7-8,0.8002,0.7869,0.9373
Random Forest,D9-Abalone-9-vs-16,0.9169,0.8897,0.9939


In [48]:
all_results_df = pd.concat([results_df.rename({"AUC (Ours)": "AUC"}, axis=1), 
                            paper_results_df.rename({"AUC (Paper)": "AUC"}, axis=1)])
all_results_df_pivot = all_results_df.pivot(
    index=["Classifier", "Dataset"], 
    columns=["Augmentation Method"], 
    values="AUC"
)[["Baseline (Paper)", "ADASYN (Paper)", "CFA (Paper)", "Baseline", "ADASYN", "CFA_val"]].copy()
all_results_df_pivot

Unnamed: 0_level_0,Augmentation Method,Baseline (Paper),ADASYN (Paper),CFA (Paper),Baseline,ADASYN,CFA_val
Classifier,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Logistic Regression,D12-Yeast-1-vs-7,0.8619,0.8634,0.9215,0.873755,0.867525,0.867344
Logistic Regression,D14-Abalone-13-vs-R,0.7374,0.7474,0.8384,0.734243,0.737987,0.734243
Logistic Regression,D22-Wine-Quality-White-3-9-vs-5,0.7425,0.6771,0.6923,0.766821,0.747171,0.763078
Logistic Regression,D8-Yeast-0-3-5-9-vs-7-8,0.7919,0.7962,0.788,0.811189,0.806119,0.812104
Logistic Regression,D9-Abalone-9-vs-16,0.9453,0.9511,0.9897,0.937908,0.938532,0.938998
Random Forest,D12-Yeast-1-vs-7,0.8489,0.8529,0.9827,0.852983,0.791815,0.844198
Random Forest,D14-Abalone-13-vs-R,0.7497,0.7627,0.9801,0.739565,0.717229,0.739565
Random Forest,D22-Wine-Quality-White-3-9-vs-5,0.8058,0.7988,0.9993,0.832356,0.778991,0.832356
Random Forest,D8-Yeast-0-3-5-9-vs-7-8,0.7869,0.8002,0.9373,0.823343,0.778921,0.820092
Random Forest,D9-Abalone-9-vs-16,0.8897,0.9169,0.9939,0.895435,0.889868,0.889524


In [49]:
all_results_df_pivot["Reported Improvement"] =  all_results_df_pivot["CFA (Paper)"] - all_results_df_pivot["Baseline (Paper)"]
all_results_df_pivot["Actual Improvement"] =  all_results_df_pivot["CFA_val"] - all_results_df_pivot["Baseline"]
all_results_df_pivot.round(3)

Unnamed: 0_level_0,Augmentation Method,Baseline (Paper),ADASYN (Paper),CFA (Paper),Baseline,ADASYN,CFA_val,Reported Improvement,Actual Improvement
Classifier,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Logistic Regression,D12-Yeast-1-vs-7,0.862,0.863,0.922,0.874,0.868,0.867,0.06,-0.006
Logistic Regression,D14-Abalone-13-vs-R,0.737,0.747,0.838,0.734,0.738,0.734,0.101,0.0
Logistic Regression,D22-Wine-Quality-White-3-9-vs-5,0.742,0.677,0.692,0.767,0.747,0.763,-0.05,-0.004
Logistic Regression,D8-Yeast-0-3-5-9-vs-7-8,0.792,0.796,0.788,0.811,0.806,0.812,-0.004,0.001
Logistic Regression,D9-Abalone-9-vs-16,0.945,0.951,0.99,0.938,0.939,0.939,0.044,0.001
Random Forest,D12-Yeast-1-vs-7,0.849,0.853,0.983,0.853,0.792,0.844,0.134,-0.009
Random Forest,D14-Abalone-13-vs-R,0.75,0.763,0.98,0.74,0.717,0.74,0.23,0.0
Random Forest,D22-Wine-Quality-White-3-9-vs-5,0.806,0.799,0.999,0.832,0.779,0.832,0.194,0.0
Random Forest,D8-Yeast-0-3-5-9-vs-7-8,0.787,0.8,0.937,0.823,0.779,0.82,0.15,-0.003
Random Forest,D9-Abalone-9-vs-16,0.89,0.917,0.994,0.895,0.89,0.89,0.104,-0.006


In [50]:
all_results_df_pivot.round(3).to_csv("result_comparison.csv")