In [51]:
# Analysis4NLP.ipynb for SUPS and DeepEST

# making aggregarion

import os
import pandas as pd

# Define the base directory and folder names
base_dir = 'Results/Classification'
folders = ['SUPS', 'DeepEST']
datasets = ['imdb300AuxDS', 'imdbAuxDS', 'SSTIMDB3000AuxDS', 'SSTtestAuxDS']
aux_vars = ['confidence', 'dsa', 'entropy', 'lsa', 'similarity']
budgets = [50, 100, 200, 400, 800]

# Define the output directory for aggregated results
output_dir = 'DS4NLP_results'

# Create the output directory if it does not exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Function to read data based on file extension
def read_data(file_path):
    if file_path.endswith('.csv'):
        return pd.read_csv(file_path)
    elif file_path.endswith('.txt'):
        return pd.read_csv(file_path, delimiter=',')
    else:
        raise ValueError(f"Unsupported file format: {file_path}")

# Iterate over each folder, dataset, and auxiliary variable
for folder in folders:
    folder_path = os.path.join(base_dir, folder)
    
    # Create a subdirectory in the output directory for each folder
    output_folder_path = os.path.join(output_dir, folder)
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    for dataset in datasets:
        for aux_var in aux_vars:
            # Prepare a list to collect dataframes for aggregation
            dataframes = []
            for budget in budgets:
                # Construct file names differently for DeepEST and others
                if folder == 'DeepEST':
                    file_name = f"{dataset}.{aux_var}_{budget}.csv"  # Use period for DeepEST
                else:
                    file_name = f"{dataset}_{aux_var}_{budget}.txt"  # Use underscore for others

                file_path = os.path.join(folder_path, file_name)
                
                # Check if the file exists
                if os.path.exists(file_path):
                    # Read the data and add a budget column
                    try:
                        data = read_data(file_path)
                        data['budget'] = budget
                        dataframes.append(data)
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
                        continue
                else:
                    print(f"File not found: {file_name} in {folder}")
                    continue

            # Concatenate all dataframes for this dataset and auxiliary variable
            if dataframes:
                aggregated_data = pd.concat(dataframes, ignore_index=True)
                # Save the aggregated data to a new CSV file
                output_file = f"{dataset}_{aux_var}_agg.csv"
                output_path = os.path.join(output_folder_path, output_file)
                try:
                    aggregated_data.to_csv(output_path, index=False)
                    print(f"Aggregated data saved to {output_path}")
                except Exception as e:
                    print(f"Error saving aggregated data for {dataset} - {aux_var}: {e}")

print("Data aggregation completed.")

Aggregated data saved to DS4NLP_results/SUPS/imdb300AuxDS_confidence_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdb300AuxDS_dsa_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdb300AuxDS_entropy_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdb300AuxDS_lsa_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdb300AuxDS_similarity_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdbAuxDS_confidence_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdbAuxDS_dsa_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdbAuxDS_entropy_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdbAuxDS_lsa_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/imdbAuxDS_similarity_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/SSTIMDB3000AuxDS_confidence_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/SSTIMDB3000AuxDS_dsa_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/SSTIMDB3000AuxDS_entropy_agg.csv
Aggregated data saved to DS4NLP_results/SUPS/S

In [52]:
# for 'GBS', 'RHC-S', 'SSRS'
# making aggregarion

import os
import pandas as pd

# Define the base directory and folder names
base_dir = 'Results/Classification'
folders_to_process = ['GBS', 'RHC-S', 'SSRS','SRS', '2-UPS']

# Define the datasets and auxiliary variables
datasets = ['imdb300AuxDS', 'imdbAuxDS', 'SSTtestAuxDS']
aux_vars = ['Confidence_Score', 'DSA', 'LSA', 'Prediction_Entropy', 'Similarity_Score']
budgets = [50, 100, 200, 400, 800]

# Define the output directory for aggregated results
output_dir = 'DS4NLP_results'

# Create the output directory if it does not exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Function to read data based on file extension
def read_data(file_path):
    if file_path.endswith('.csv'):
        return pd.read_csv(file_path)
    elif file_path.endswith('.txt'):
        return pd.read_csv(file_path, delimiter=',')
    else:
        raise ValueError(f"Unsupported file format: {file_path}")

# Iterate over each specified folder
for folder in folders_to_process:
    folder_path = os.path.join(base_dir, folder)
    
    # Create a subdirectory in the output directory for each folder
    output_folder_path = os.path.join(output_dir, folder)
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    for dataset in datasets:
        for aux_var in aux_vars:
            # Prepare a list to collect dataframes for aggregation
            dataframes = []
            for budget in budgets:
                # Construct file name for txt files
                file_name = f"{dataset}_{aux_var}_{budget}.txt"
                file_path = os.path.join(folder_path, file_name)
                
                # Check if the file exists
                if os.path.exists(file_path):
                    # Read the data and add a budget column
                    try:
                        data = read_data(file_path)
                        data['budget'] = budget
                        dataframes.append(data)
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
                        continue
                else:
                    print(f"File not found: {file_name} in {folder}")
                    continue

            # Concatenate all dataframes for this dataset and auxiliary variable
            if dataframes:
                aggregated_data = pd.concat(dataframes, ignore_index=True)
                # Save the aggregated data to a new CSV file
                output_file = f"{dataset}_{aux_var}_agg.csv"
                output_path = os.path.join(output_folder_path, output_file)
                try:
                    aggregated_data.to_csv(output_path, index=False)
                    print(f"Aggregated data saved to {output_path}")
                except Exception as e:
                    print(f"Error saving aggregated data for {dataset} - {aux_var}: {e}")

print("Data aggregation completed.")

Aggregated data saved to DS4NLP_results/GBS/imdb300AuxDS_Confidence_Score_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdb300AuxDS_DSA_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdb300AuxDS_LSA_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdb300AuxDS_Prediction_Entropy_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdb300AuxDS_Similarity_Score_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdbAuxDS_Confidence_Score_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdbAuxDS_DSA_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdbAuxDS_LSA_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdbAuxDS_Prediction_Entropy_agg.csv
Aggregated data saved to DS4NLP_results/GBS/imdbAuxDS_Similarity_Score_agg.csv
Aggregated data saved to DS4NLP_results/GBS/SSTtestAuxDS_Confidence_Score_agg.csv
Aggregated data saved to DS4NLP_results/GBS/SSTtestAuxDS_DSA_agg.csv
Aggregated data saved to DS4NLP_results/GBS/SSTtestAuxDS_LSA_agg.csv
Aggregated data saved t

## RQ1,2 : RMSE, RMedSE, Mean Failure, STD Failures

In [54]:
import os
import pandas as pd
import numpy as np

# Define the base path and methods
base_path = "DS4NLP_results"
methods = ["DeepEST", "GBS", "RHC-S", "SSRS", "SUPS", 'SRS', '2-UPS']

# True accuracies for each dataset
true_accuracies = {
    "imdb300AuxDS": 0.8990,
    "imdbAuxDS": 0.8896,
    "SSTtestAuxDS": 0.9225700164744646
}

# Auxiliary variable mapping
aux_var_mapping = {
    "confidence": "Confidence_Score",
    "Confidence_Score": "Confidence_Score",
    "entropy": "Prediction_Entropy",
    "prediction": "Prediction_Entropy",  
    "Prediction_Entropy": "Prediction_Entropy",
    "similarity": "Similarity_Score",
    "Similarity_Score": "Similarity_Score",
    "dsa": "DSA",
    "DSA": "DSA",
    "lsa": "LSA",
    "LSA": "LSA"
}

# Function to calculate RMSE and RMedSE
def calculate_rmse_rmedse(accuracies, true_accuracy):
    squared_errors = (accuracies - true_accuracy) ** 2
    rmse = np.sqrt(np.mean(squared_errors))
    rmedse = np.sqrt(np.median(squared_errors))
    return rmse, rmedse

results = []

# Process each method and its associated files
for method in methods:
    method_path = os.path.join(base_path, method)
    if not os.path.exists(method_path):
        print(f"Directory not found: {method_path}")
        continue

    # List all files in the method directory
    for file_name in os.listdir(method_path):
        if not file_name.endswith(".csv"):
            continue

        dataset = file_name.split('_')[0]
        
        if dataset not in true_accuracies:
            continue

        file_path = os.path.join(method_path, file_name)
        
        try:
            data = pd.read_csv(file_path)
            if data.empty:
                print(f"No data in {file_name}")
                continue

            # Processing accuracies
            accuracies = data['accuracy'].values
            rmse, rmedse = calculate_rmse_rmedse(accuracies, true_accuracies[dataset])
            
            # Processing failures
            mean_failures = data['failures'].mean()
            std_failures = data['failures'].std()
            
            # Map auxiliary variable name
            aux_var_key = file_name.split('_')[1].split('.')[0].lower()
            aux_var = aux_var_mapping.get(aux_var_key, "Unknown Variable")

            results.append({
                "method": method,
                "dataset": dataset,
                "aux_var": aux_var,
                "RMSE": rmse,
                "RMedSE": rmedse,
                "Mean Failures": mean_failures,
                "STD Failures": std_failures
            })
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

# Convert results to DataFrame and display
results_df = pd.DataFrame(results)
print(results_df)

# Save the results to a CSV file
results_df.to_csv("rmse_rmedse_failures.csv", index=False)

      method       dataset           aux_var      RMSE    RMedSE  \
0    DeepEST     imdbAuxDS               LSA  0.006781  0.003408   
1    DeepEST  imdb300AuxDS  Confidence_Score  0.049166  0.023258   
2    DeepEST  imdb300AuxDS  Similarity_Score  0.057486  0.023240   
3    DeepEST  imdb300AuxDS               LSA  0.007841  0.002724   
4    DeepEST  SSTtestAuxDS  Confidence_Score  0.041553  0.015825   
..       ...           ...               ...       ...       ...   
100    2-UPS     imdbAuxDS  Confidence_Score  0.028804  0.015395   
101    2-UPS  SSTtestAuxDS  Similarity_Score  0.024862  0.012570   
102    2-UPS  imdb300AuxDS               DSA  0.042787  0.029765   
103    2-UPS  SSTtestAuxDS  Confidence_Score  0.028390  0.013943   
104    2-UPS     imdbAuxDS               DSA  0.048547  0.029793   

     Mean Failures  STD Failures  
0       254.460000    224.883493  
1        67.366667     38.147087  
2         7.040000      6.780232  
3       175.446667    106.900602  
4       

In [55]:
results_df

Unnamed: 0,method,dataset,aux_var,RMSE,RMedSE,Mean Failures,STD Failures
0,DeepEST,imdbAuxDS,LSA,0.006781,0.003408,254.460000,224.883493
1,DeepEST,imdb300AuxDS,Confidence_Score,0.049166,0.023258,67.366667,38.147087
2,DeepEST,imdb300AuxDS,Similarity_Score,0.057486,0.023240,7.040000,6.780232
3,DeepEST,imdb300AuxDS,LSA,0.007841,0.002724,175.446667,106.900602
4,DeepEST,SSTtestAuxDS,Confidence_Score,0.041553,0.015825,42.060000,18.766753
...,...,...,...,...,...,...,...
100,2-UPS,imdbAuxDS,Confidence_Score,0.028804,0.015395,34.466667,31.643498
101,2-UPS,SSTtestAuxDS,Similarity_Score,0.024862,0.012570,23.946667,21.695819
102,2-UPS,imdb300AuxDS,DSA,0.042787,0.029765,38.073333,35.706878
103,2-UPS,SSTtestAuxDS,Confidence_Score,0.028390,0.013943,26.173333,23.922343


In [71]:
datasets = {
    "DeepEST": [
        "imdb300AuxDS_confidence_agg.csv",
        "imdb300AuxDS_dsa_agg.csv",
        "imdb300AuxDS_entropy_agg.csv",
        "imdb300AuxDS_lsa_agg.csv",
        "imdb300AuxDS_similarity_agg.csv",
        "imdbAuxDS_confidence_agg.csv",
        "imdbAuxDS_dsa_agg.csv",
        "imdbAuxDS_entropy_agg.csv",
        "imdbAuxDS_lsa_agg.csv",
        "imdbAuxDS_similarity_agg.csv",
        "SSTtestAuxDS_confidence_agg.csv",
        "SSTtestAuxDS_dsa_agg.csv",
        "SSTtestAuxDS_entropy_agg.csv",
        "SSTtestAuxDS_lsa_agg.csv",
        "SSTtestAuxDS_similarity_agg.csv"
    ],
    
    "GBS" : [
        "imdb300AuxDS_Confidence_Score_agg.csv",
        "imdb300AuxDS_dsa_agg.csv",
        "imdb300AuxDS_lsa_agg.csv",
        "imdb300AuxDS_Prediction_Entropy_agg.csv",
        "imdb300AuxDS_Similarity_Score_agg.csv",
        "imdbAuxDS_Confidence_Score_agg.csv",
        "imdbAuxDS_dsa_agg.csv",
        "imdbAuxDS_lsa_agg.csv",
        "imdbAuxDS_Prediction_Entropy_agg.csv",
        "imdbAuxDS_Similarity_Score_agg.csv",
        "SSTtestAuxDS_Confidence_Score_agg.csv",
        "SSTtestAuxDS_dsa_agg.csv",
        "SSTtestAuxDS_lsa_agg.csv",
        "SSTtestAuxDS_Prediction_Entropy_agg.csv",
        "SSTtestAuxDS_Similarity_Score_agg.csv"
         ]
    ,
    "RHC-S": [
        "imdb300AuxDS_Confidence_Score_agg.csv",
        "imdb300AuxDS_dsa_agg.csv",
        "imdb300AuxDS_lsa_agg.csv",
        "imdb300AuxDS_Prediction_Entropy_agg.csv",
        "imdb300AuxDS_Similarity_Score_agg.csv",
        "imdbAuxDS_Confidence_Score_agg.csv",
        "imdbAuxDS_dsa_agg.csv",
        "imdbAuxDS_lsa_agg.csv",
        "imdbAuxDS_Prediction_Entropy_agg.csv",
        "imdbAuxDS_Similarity_Score_agg.csv",
        "SSTtestAuxDS_Confidence_Score_agg.csv",
        "SSTtestAuxDS_dsa_agg.csv",
        "SSTtestAuxDS_lsa_agg.csv",
        "SSTtestAuxDS_Prediction_Entropy_agg.csv",
        "SSTtestAuxDS_Similarity_Score_agg.csv"
    ],
    "SSRS": [
        "imdb300AuxDS_Confidence_Score_agg.csv",
        "imdb300AuxDS_dsa_agg.csv",
        "imdb300AuxDS_lsa_agg.csv",
        "imdb300AuxDS_Prediction_Entropy_agg.csv",
        "imdb300AuxDS_Similarity_Score_agg.csv",
        "imdbAuxDS_Confidence_Score_agg.csv",
        "imdbAuxDS_dsa_agg.csv",
        "imdbAuxDS_lsa_agg.csv",
        "imdbAuxDS_Prediction_Entropy_agg.csv",
        "imdbAuxDS_Similarity_Score_agg.csv",
        "SSTtestAuxDS_Confidence_Score_agg.csv",
        "SSTtestAuxDS_dsa_agg.csv",
        "SSTtestAuxDS_lsa_agg.csv",
        "SSTtestAuxDS_Prediction_Entropy_agg.csv",
        "SSTtestAuxDS_Similarity_Score_agg.csv"
    ],
    "SUPS": [
        "imdb300AuxDS_confidence_agg.csv",
        "imdb300AuxDS_dsa_agg.csv",
        "imdb300AuxDS_entropy_agg.csv",
        "imdb300AuxDS_lsa_agg.csv",
        "imdb300AuxDS_similarity_agg.csv",
        "imdbAuxDS_confidence_agg.csv",
        "imdbAuxDS_dsa_agg.csv",
        "imdbAuxDS_entropy_agg.csv",
        "imdbAuxDS_lsa_agg.csv",
        "imdbAuxDS_similarity_agg.csv",
        "SSTtestAuxDS_confidence_agg.csv",
        "SSTtestAuxDS_dsa_agg.csv",
        "SSTtestAuxDS_entropy_agg.csv",
        "SSTtestAuxDS_lsa_agg.csv",
        "SSTtestAuxDS_similarity_agg.csv"
    ]
    ,
    "SRS" : [
        "imdb300AuxDS_Confidence_Score_agg.csv",
        "imdb300AuxDS_DSA_agg.csv",
        "imdb300AuxDS_LSA_agg.csv",
        "imdb300AuxDS_Prediction_Entropy_agg.csv",
        "imdb300AuxDS_Similarity_Score_agg.csv",
        "imdbAuxDS_Confidence_Score_agg.csv",
        "imdbAuxDS_DSA_agg.csv",
        "imdbAuxDS_LSA_agg.csv",
        "imdbAuxDS_Prediction_Entropy_agg.csv",
        "imdbAuxDS_Similarity_Score_agg.csv",
        "SSTtestAuxDS_Confidence_Score_agg.csv",
        "SSTtestAuxDS_DSA_agg.csv",
        "SSTtestAuxDS_LSA_agg.csv",
        "SSTtestAuxDS_Prediction_Entropy_agg.csv",
        "SSTtestAuxDS_Similarity_Score_agg.csv"
    ]
    , 
    "2-UPS" : [
        "imdb300AuxDS_Confidence_Score_agg.csv",
        "imdb300AuxDS_DSA_agg.csv",
        "imdb300AuxDS_LSA_agg.csv",
        "imdb300AuxDS_Prediction_Entropy_agg.csv",
        "imdb300AuxDS_Similarity_Score_agg.csv",
        "imdbAuxDS_Confidence_Score_agg.csv",
        "imdbAuxDS_DSA_agg.csv",
        "imdbAuxDS_LSA_agg.csv",
        "imdbAuxDS_Prediction_Entropy_agg.csv",
        "imdbAuxDS_Similarity_Score_agg.csv",
        "SSTtestAuxDS_Confidence_Score_agg.csv",
        "SSTtestAuxDS_DSA_agg.csv",
        "SSTtestAuxDS_LSA_agg.csv",
        "SSTtestAuxDS_Prediction_Entropy_agg.csv",
        "SSTtestAuxDS_Similarity_Score_agg.csv"
         ]
}

In [72]:
import os
import pandas as pd
import numpy as np

# Define the base path and methods
base_path = "DS4NLP_results"
methods = ["DeepEST", "GBS", "RHC-S", "SSRS", "SUPS", 'SRS', '2-UPS']

# True accuracies for each dataset
true_accuracies = {
    "imdb300AuxDS": 0.8990,
    "imdbAuxDS": 0.8896,
    "SSTtestAuxDS": 0.9225700164744646
}

# Auxiliary variable mapping
aux_var_mapping = {
    "confidence": "Confidence_Score",
    "Confidence_Score": "Confidence_Score",
    "entropy": "Prediction_Entropy",
    "prediction": "Prediction_Entropy",  
    "Prediction_Entropy": "Prediction_Entropy",
    "similarity": "Similarity_Score",
    "Similarity_Score": "Similarity_Score",
    "dsa": "DSA",
    "DSA": "DSA",
    "lsa": "LSA",
    "LSA": "LSA"
}

# Function to calculate RMSE and RMedSE
def calculate_rmse_rmedse(accuracies, true_accuracy):
    squared_errors = (accuracies - true_accuracy) ** 2
    rmse = np.sqrt(np.mean(squared_errors))
    rmedse = np.sqrt(np.median(squared_errors))
    return rmse, rmedse

results = []

# Iterate over each method and dataset
for method in methods:
    for file_name in datasets[method]:
        dataset = file_name.split('_')[0]
        
        if dataset not in true_accuracies:
            continue

        file_path = os.path.join(base_path, method, file_name)
        
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue

        # Read the data
        try:
            data = pd.read_csv(file_path)
            data = data[data['budget'] == 200]  # Focus on budget 200
            if data.empty:
                print(f"No records with budget 200 in {file_name}")
                continue
            print(f"Processing file: {file_path}")
            
            accuracies = data['accuracy'].values
            rmse, rmedse = calculate_rmse_rmedse(accuracies, true_accuracies[dataset])
            failures_mean = data['failures'].mean()
            failures_std = data['failures'].std()
            
            results.append({
                "method": method,
                "dataset": dataset,
                "aux_var": aux_var_mapping.get(file_name.split('_')[1].split('.')[0].lower(), "Unknown Variable"),
                "RMSE": rmse,
                "RMedSE": rmedse,
                "failures_mean": failures_mean,
                "failures_std": failures_std
            })
            
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

# Convert results to DataFrame and display
results_df = pd.DataFrame(results)
print(results_df)

# Save the results to a CSV file for records with budget 200
results_df.to_csv("rmse_rmedse_failures200.csv", index=False)

Processing file: DS4NLP_results/DeepEST/imdb300AuxDS_confidence_agg.csv
Processing file: DS4NLP_results/DeepEST/imdb300AuxDS_dsa_agg.csv
Processing file: DS4NLP_results/DeepEST/imdb300AuxDS_entropy_agg.csv
Processing file: DS4NLP_results/DeepEST/imdb300AuxDS_lsa_agg.csv
Processing file: DS4NLP_results/DeepEST/imdb300AuxDS_similarity_agg.csv
Processing file: DS4NLP_results/DeepEST/imdbAuxDS_confidence_agg.csv
Processing file: DS4NLP_results/DeepEST/imdbAuxDS_dsa_agg.csv
Processing file: DS4NLP_results/DeepEST/imdbAuxDS_entropy_agg.csv
Processing file: DS4NLP_results/DeepEST/imdbAuxDS_lsa_agg.csv
Processing file: DS4NLP_results/DeepEST/imdbAuxDS_similarity_agg.csv
Processing file: DS4NLP_results/DeepEST/SSTtestAuxDS_confidence_agg.csv
Processing file: DS4NLP_results/DeepEST/SSTtestAuxDS_dsa_agg.csv
Processing file: DS4NLP_results/DeepEST/SSTtestAuxDS_entropy_agg.csv
Processing file: DS4NLP_results/DeepEST/SSTtestAuxDS_lsa_agg.csv
Processing file: DS4NLP_results/DeepEST/SSTtestAuxDS_simil

In [73]:
results_df

Unnamed: 0,method,dataset,aux_var,RMSE,RMedSE,failures_mean,failures_std
0,DeepEST,imdb300AuxDS,Confidence_Score,0.046567,0.026221,72.233333,2.967448
1,DeepEST,imdb300AuxDS,DSA,0.016135,0.008057,20.300000,3.281610
2,DeepEST,imdb300AuxDS,Prediction_Entropy,0.041714,0.026660,73.300000,3.621297
3,DeepEST,imdb300AuxDS,LSA,0.007369,0.002580,161.533333,5.975429
4,DeepEST,imdb300AuxDS,Similarity_Score,0.033537,0.019886,4.333333,1.422318
...,...,...,...,...,...,...,...
100,2-UPS,SSTtestAuxDS,Confidence_Score,0.017719,0.012720,16.566667,3.297683
101,2-UPS,SSTtestAuxDS,DSA,0.058226,0.052578,24.600000,6.703576
102,2-UPS,SSTtestAuxDS,LSA,0.072877,0.072430,0.933333,0.827682
103,2-UPS,SSTtestAuxDS,Prediction_Entropy,0.060796,0.059982,3.466667,1.870521


In [76]:
#budget 200, fridman dunn
import os
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp

# Define the base path and methods
base_path = "DS4NLP_results"
methods = ["DeepEST", "GBS", "RHC-S", "SSRS", "SUPS", 'SRS', '2-UPS']



# Auxiliary variable mapping
aux_var_mapping = {
    "confidence": "Confidence_Score",
    "Confidence_Score": "Confidence_Score",
    "entropy": "Prediction_Entropy",
    "prediction": "Prediction_Entropy",
    "Prediction_Entropy": "Prediction_Entropy",
    "similarity": "Similarity_Score",
    "Similarity_Score": "Similarity_Score",
    "dsa": "DSA",
    "DSA": "DSA",
    "lsa": "LSA",
    "LSA": "LSA"
}

# True accuracies for each dataset
true_accuracies = {
    "imdb300AuxDS": 0.8990,
    "imdbAuxDS": 0.8896,
    "SSTtestAuxDS": 0.9226
}

def calculate_rmse_rmedse(accuracies, true_accuracy):
    squared_errors = (accuracies - true_accuracy) ** 2
    rmse = np.sqrt(np.mean(squared_errors))
    rmedse = np.sqrt(np.median(squared_errors))
    return rmse, rmedse

results = []

# Processing each dataset for each method
for method in methods:
    dataset_count = len(datasets[method])
    print(f"Processing {dataset_count} datasets for method: {method}")
    for file_name in datasets[method]:
        dataset = file_name.split('_')[0]
        true_accuracy = true_accuracies.get(dataset, None)
        if not true_accuracy:
            continue

        file_path = os.path.join(base_path, method, file_name)
        if not os.path.exists(file_path):
            continue

        try:
            data = pd.read_csv(file_path)
            data = data[data['budget'] == 200]  # Filter for budget 200
            if data.empty:
                continue
            accuracies = data['accuracy'].apply(lambda x: float(str(x).split(":")[-1])).values
            aux_var_key = file_name.split('_')[1].replace(".csv", "").lower()
            aux_var = aux_var_mapping.get(aux_var_key, "Unknown Variable")
            rmse, rmedse = calculate_rmse_rmedse(accuracies, true_accuracy)

            results.append({
                "method": method,
                "dataset": dataset,
                "aux_var": aux_var,
                "RMSE": rmse,
                "RMedSE": rmedse
            })
        except Exception as e:
            continue

results_df = pd.DataFrame(results)
pivot_df = results_df.pivot_table(index=['dataset', 'aux_var'], columns='method', values='RMSE', aggfunc=np.mean)

# Perform the Friedman test
stat, p_value = friedmanchisquare(*[pivot_df[method].dropna().values for method in methods])
print(f'Friedman test statistic: {stat}, p-value: {p_value}')

# If the Friedman test is significant, proceed with Dunn's test
if p_value < 0.05:
    melted_df = pivot_df.reset_index().melt(id_vars=['dataset', 'aux_var'], var_name='method', value_name='RMSE')
    dunn_results = sp.posthoc_dunn(melted_df, val_col='RMSE', group_col='method', p_adjust='bonferroni')
    print("Dunn's posthoc test results:")
    print(dunn_results)

Processing 15 datasets for method: DeepEST
Processing 15 datasets for method: GBS
Processing 15 datasets for method: RHC-S
Processing 15 datasets for method: SSRS
Processing 15 datasets for method: SUPS
Processing 15 datasets for method: SRS
Processing 15 datasets for method: 2-UPS
Friedman test statistic: 22.415770609319, p-value: 0.0010177298285168515
Dunn's posthoc test results:
            2-UPS   DeepEST       GBS    RHC-S       SRS      SSRS      SUPS
2-UPS    1.000000  1.000000  1.000000  1.00000  0.387895  0.500097  1.000000
DeepEST  1.000000  1.000000  1.000000  1.00000  1.000000  0.009094  1.000000
GBS      1.000000  1.000000  1.000000  1.00000  1.000000  0.013886  1.000000
RHC-S    1.000000  1.000000  1.000000  1.00000  1.000000  0.115680  1.000000
SRS      0.387895  1.000000  1.000000  1.00000  1.000000  0.000082  0.834856
SSRS     0.500097  0.009094  0.013886  0.11568  0.000082  1.000000  0.219897
SUPS     1.000000  1.000000  1.000000  1.00000  0.834856  0.219897  1.000000

In [77]:
import os
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp

# Define paths, methods, and other configuration
base_path = "DS4NLP_results"
methods = ["DeepEST", "GBS", "RHC-S", "SSRS", "SUPS", 'SRS', '2-UPS']


# Define mappings and true accuracies
aux_var_mapping = {
    "confidence": "Confidence_Score",
    "Confidence_Score": "Confidence_Score",
    "entropy": "Prediction_Entropy",
    "prediction": "Prediction_Entropy",
    "Prediction_Entropy": "Prediction_Entropy",
    "similarity": "Similarity_Score",
    "Similarity_Score": "Similarity_Score",
    "dsa": "DSA",
    "DSA": "DSA",
    "lsa": "LSA",
    "LSA": "LSA"
}

# True accuracies for each dataset
true_accuracies = {
    "imdb300AuxDS": 0.8990,
    "imdbAuxDS": 0.8896,
    "SSTtestAuxDS": 0.9226
}

def calculate_rmse_rmedse(accuracies, true_accuracy):
    squared_errors = (accuracies - true_accuracy) ** 2
    rmse = np.sqrt(np.mean(squared_errors))
    rmedse = np.sqrt(np.median(squared_errors))
    return rmse, rmedse

results = []

# Process each dataset for each method
for method in methods:
    for file_name in datasets.get(method, []):
        dataset = file_name.split('_')[0]
        true_accuracy = true_accuracies.get(dataset)
        if not true_accuracy:
            continue  # Skip if no true accuracy is defined

        file_path = os.path.join(base_path, method, file_name)
        try:
            data = pd.read_csv(file_path)
            accuracies = data['accuracy'].apply(lambda x: float(str(x).split(":")[-1])).values
            aux_var_key = file_name.split('_')[1].replace(".csv", "").lower()
            aux_var = aux_var_mapping.get(aux_var_key, "Unknown Variable")
            rmse, rmedse = calculate_rmse_rmedse(accuracies, true_accuracy)
            results.append({
                "method": method,
                "dataset": dataset,
                "aux_var": aux_var,
                "RMSE": rmse,
                "RMedSE": rmedse
            })
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

# Create DataFrame from results
results_df = pd.DataFrame(results)
if not results_df.empty:
    pivot_df = results_df.pivot_table(index=['dataset', 'aux_var'], columns='method', values='RMSE', aggfunc=np.mean)

    # Filter rows where all methods have data
    pivot_df = pivot_df.dropna()

    # Run Friedman test if there are enough data
    if not pivot_df.empty and len(pivot_df.columns) >= 3:
        try:
            stat, p_value = friedmanchisquare(*[pivot_df[method].values for method in pivot_df.columns])
            print(f'Friedman test statistic: {stat}, p-value: {p_value}')

            if p_value < 0.05:
                melted_df = pivot_df.reset_index().melt(id_vars=['dataset', 'aux_var'], var_name='method', value_name='RMSE')
                dunn_results = sp.posthoc_dunn(melted_df, val_col='RMSE', group_col='method', p_adjust='bonferroni')
                print("Dunn's posthoc test results:")
                print(dunn_results)
        except Exception as e:
            print(f"Error running Friedman test: {e}")
    else:
        print("Not enough data to perform Friedman test.")
else:
    print("No results to process.")

Friedman test statistic: 21.96414852752881, p-value: 0.0012291198830390786
Dunn's posthoc test results:
            2-UPS  DeepEST       GBS     RHC-S       SRS      SSRS      SUPS
2-UPS    1.000000  1.00000  1.000000  1.000000  0.320634  0.384304  1.000000
DeepEST  1.000000  1.00000  1.000000  1.000000  1.000000  0.031250  1.000000
GBS      1.000000  1.00000  1.000000  1.000000  1.000000  0.025383  1.000000
RHC-S    1.000000  1.00000  1.000000  1.000000  0.233496  0.518115  1.000000
SRS      0.320634  1.00000  1.000000  0.233496  1.000000  0.000036  1.000000
SSRS     0.384304  0.03125  0.025383  0.518115  0.000036  1.000000  0.091429
SUPS     1.000000  1.00000  1.000000  1.000000  1.000000  0.091429  1.000000


In [78]:
import os
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp

# Define paths and methods
base_path = "DS4NLP_results"
methods = ["DeepEST", "GBS", "RHC-S", "SSRS", "SUPS", 'SRS', '2-UPS']

# Simulated data for illustration
results = []
for method in methods:
    for dataset_category in ["imdb300AuxDS", "imdbAuxDS", "SSTtestAuxDS"]:
        # Ensure multiple entries per method per dataset category to facilitate the Friedman test
        results.extend([
            {"method": method, "dataset_category": dataset_category, "RMSE": np.random.normal(0.1, 0.01)}
            for _ in range(10)  # Generating multiple data points per method
        ])

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Perform statistical tests for each major dataset category
for dataset in ["imdb300AuxDS", "imdbAuxDS", "SSTtestAuxDS"]:
    print(f"\nResults for dataset: {dataset}")
    dataset_df = results_df[results_df['dataset_category'] == dataset]
    
    if dataset_df['method'].nunique() >= 3:
        pivot_df = dataset_df.pivot_table(index='method', values='RMSE', aggfunc=np.mean)

        # The key is to ensure the pivot table is structured with one row per method
        if pivot_df.shape[0] >= 3:  # Ensuring at least three methods are present
            try:
                # The Friedman test expects each array of values to compare across methods
                stat, p_value = friedmanchisquare(*[pivot_df.loc[method].values for method in methods if method in pivot_df.index])
                print(f"Friedman test for {dataset}: Statistic={stat}, p-value={p_value}")

                if p_value < 0.05:
                    dunn_results = sp.posthoc_dunn(dataset_df, val_col='RMSE', group_col='method', p_adjust='bonferroni')
                    print(f"Dunn's posthoc test results for {dataset}:\n{dunn_results}")
            except Exception as e:
                print(f"Error in statistical testing for {dataset}: {e}")
        else:
            print(f"Not enough methods with valid data for statistical testing for {dataset}.")
    else:
        print(f"Not enough methods with data for {dataset}.")


Results for dataset: imdb300AuxDS
Friedman test for imdb300AuxDS: Statistic=6.0, p-value=0.42319008112684364

Results for dataset: imdbAuxDS
Friedman test for imdbAuxDS: Statistic=6.0, p-value=0.42319008112684364

Results for dataset: SSTtestAuxDS
Friedman test for SSTtestAuxDS: Statistic=6.0, p-value=0.42319008112684364


In [79]:
import os
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp

# Define paths and methods
base_path = "DS4NLP_results"
methods = ["DeepEST", "GBS", "RHC-S", "SSRS", "SUPS", 'SRS', '2-UPS']

# Auxiliary variable mapping
aux_var_mapping = {
    "confidence": "Confidence_Score",
    "Confidence_Score": "Confidence_Score",
    "entropy": "Prediction_Entropy",
    "prediction": "Prediction_Entropy",
    "Prediction_Entropy": "Prediction_Entropy",
    "similarity": "Similarity_Score",
    "Similarity_Score": "Similarity_Score",
    "dsa": "DSA",
    "DSA": "DSA",
    "lsa": "LSA",
    "LSA": "LSA"
}

# Simulated loading of data
results = []
for method in methods:
    method_path = os.path.join(base_path, method)
    for filename in os.listdir(method_path):
        parts = filename.replace('.csv', '').split('_')
        if len(parts) >= 2:
            dataset_category = parts[0]
            aux_var_key = parts[1].lower()
            aux_var = aux_var_mapping.get(aux_var_key, "Unknown Variable")
            if aux_var != "Unknown Variable":
                # Simulate more realistic RMSE calculations
                simulated_rmse = np.random.normal(0.05, 0.01) + np.random.rand() * 0.01
                results.append({
                    "method": method,
                    "dataset_category": dataset_category,
                    "aux_var": aux_var,
                    "RMSE": simulated_rmse
                })

# Convert the list of dictionaries to a DataFrame
results_df = pd.DataFrame(results)

# Perform statistical tests for each auxiliary variable across all datasets
for aux in set(results_df['aux_var']):
    if aux == "Unknown Variable":
        continue
    print(f"\nResults for auxiliary variable: {aux}")
    aux_df = results_df[results_df['aux_var'] == aux]
    
    for dataset in set(aux_df['dataset_category']):
        print(f"Analyzing {aux} in {dataset}")
        dataset_df = aux_df[aux_df['dataset_category'] == dataset]

        if dataset_df['method'].nunique() >= 3:
            pivot_df = dataset_df.pivot_table(index='method', values='RMSE', aggfunc='mean')

            if not all(m in pivot_df.index for m in methods):
                print("Missing data for some methods, skipping statistical tests.")
                continue

            try:
                stats = friedmanchisquare(*(pivot_df.loc[m].values for m in pivot_df.index))
                stat, p_value = stats
                print(f"Friedman test for {aux} in {dataset}: Statistic={stat}, p-value={p_value}")

                if p_value < 0.05:
                    dunn_results = sp.posthoc_dunn(dataset_df, val_col='RMSE', group_col='method', p_adjust='bonferroni')
                    print(f"Dunn's posthoc test results for {aux} in {dataset}:\n{dunn_results}")
            except Exception as e:
                print(f"Error in statistical testing for {aux} in {dataset}: {e}")
        else:
            print(f"Not enough methods with data for {aux} in {dataset}.")


Results for auxiliary variable: Confidence_Score
Analyzing Confidence_Score in imdbAuxDS
Friedman test for Confidence_Score in imdbAuxDS: Statistic=6.0, p-value=0.42319008112684364
Analyzing Confidence_Score in SSTIMDB3000AuxDS
Missing data for some methods, skipping statistical tests.
Analyzing Confidence_Score in SSTtestAuxDS
Friedman test for Confidence_Score in SSTtestAuxDS: Statistic=6.0, p-value=0.42319008112684364
Analyzing Confidence_Score in imdb300AuxDS
Friedman test for Confidence_Score in imdb300AuxDS: Statistic=6.0, p-value=0.42319008112684364

Results for auxiliary variable: Prediction_Entropy
Analyzing Prediction_Entropy in imdbAuxDS
Friedman test for Prediction_Entropy in imdbAuxDS: Statistic=6.0, p-value=0.42319008112684364
Analyzing Prediction_Entropy in imdb300AuxDS
Friedman test for Prediction_Entropy in imdb300AuxDS: Statistic=6.0, p-value=0.42319008112684364
Analyzing Prediction_Entropy in SSTIMDB3000AuxDS
Missing data for some methods, skipping statistical tests

## RQ3

In [80]:
import os
import pandas as pd
import numpy as np

# Define the base path, methods, and budget levels
base_path = "DS4NLP_results"
methods = ["DeepEST", "GBS", "RHC-S", "SSRS", "SUPS", 'SRS', '2-UPS']
budgets = [50, 100, 200, 400, 800]

# True accuracies for each dataset
true_accuracies = {
    "imdb300AuxDS": 0.8990,
    "imdbAuxDS": 0.8896,
    "SSTtestAuxDS": 0.9225700164744646
}

# Mapping of aux variable names to standard names
aux_var_mapping = {
    "confidence": "Confidence_Score",
    "Confidence_Score": "Confidence_Score",
    "entropy": "Prediction_Entropy",
    "prediction": "Prediction_Entropy",
    "Prediction_Entropy": "Prediction_Entropy",
    "similarity": "Similarity_Score",
    "Similarity_Score": "Similarity_Score",
    "dsa": "DSA",
    "DSA": "DSA",
    "lsa": "LSA",
    "LSA": "LSA"
}

# Function to calculate RMSE and RMedSE
def calculate_rmse_rmedse(accuracies, true_accuracy):
    squared_errors = (accuracies - true_accuracy) ** 2
    rmse = np.sqrt(np.mean(squared_errors))
    rmedse = np.sqrt(np.median(squared_errors))
    return rmse, rmedse

# Iterate over each budget
for budget in budgets:
    results = []

    # Process each method and dataset
    for method in methods:
        for file_name in datasets[method]:
            dataset = file_name.split('_')[0]

            if dataset not in true_accuracies:
                continue

            file_path = os.path.join(base_path, method, file_name)

            if not os.path.exists(file_path):
                print(f"File not found: {file_path}")
                continue

            try:
                data = pd.read_csv(file_path)
                data = data[data['budget'] == budget]
                if data.empty:
                    print(f"No records with budget {budget} in {file_name}")
                    continue

                accuracies = data['accuracy'].values
                rmse, rmedse = calculate_rmse_rmedse(accuracies, true_accuracies[dataset])
                failures_mean = data['failures'].mean()
                failures_std = data['failures'].std()

                results.append({
                    "method": method,
                    "dataset": dataset,
                    "aux_var": aux_var_mapping.get(file_name.split('_')[1].split('.')[0].lower(), "Unknown Variable"),
                    "RMSE": rmse,
                    "RMedSE": rmedse,
                    "failures_mean": failures_mean,
                    "failures_std": failures_std
                })

            except Exception as e:
                print(f"Error processing {file_path}: {e}")

    # Convert results to DataFrame and display
    results_df = pd.DataFrame(results)
    print(f"Results for budget {budget}:")
    print(results_df)

    # Save the results to a CSV file for each budget
    results_df.to_csv(f"rmse_rmedse_failures{budget}.csv", index=False)

Results for budget 50:
      method       dataset             aux_var      RMSE    RMedSE  \
0    DeepEST  imdb300AuxDS    Confidence_Score  0.082051  0.077985   
1    DeepEST  imdb300AuxDS                 DSA  0.048502  0.039416   
2    DeepEST  imdb300AuxDS  Prediction_Entropy  0.089145  0.068531   
3    DeepEST  imdb300AuxDS                 LSA  0.010321  0.004452   
4    DeepEST  imdb300AuxDS    Similarity_Score  0.103185  0.098213   
..       ...           ...                 ...       ...       ...   
100    2-UPS  SSTtestAuxDS    Confidence_Score  0.042546  0.033479   
101    2-UPS  SSTtestAuxDS                 DSA  0.088936  0.052977   
102    2-UPS  SSTtestAuxDS                 LSA  0.074649  0.077430   
103    2-UPS  SSTtestAuxDS  Prediction_Entropy  0.061844  0.057430   
104    2-UPS  SSTtestAuxDS    Similarity_Score  0.043404  0.020164   

     failures_mean  failures_std  
0        17.166667      2.742807  
1         5.300000      2.451600  
2        18.433333      2.95580

In [81]:
import pandas as pd

# Define the base path and budgets
base_path = "./"  # Update this to the location of your datasets
budgets = [50, 100, 200, 400, 800]
file_names = [f"rmse_rmedse_failures{budget}.csv" for budget in budgets]

all_data = []

# Load each dataset and add the 'budget' column
for file_name, budget in zip(file_names, budgets):
    file_path = f"{base_path}/{file_name}"
    df = pd.read_csv(file_path)
    df['budget'] = budget
    all_data.append(df)

# Concatenate all datasets into a single DataFrame
combined_df = pd.concat(all_data, ignore_index=True)

# Now you can work with the combined DataFrame
print(combined_df.head())  # To check the top rows of the combined DataFrame
# Optionally save the combined DataFrame to a CSV file
combined_df.to_csv(f"{base_path}/combined_datasetsRQ3.csv", index=False)

    method       dataset             aux_var      RMSE    RMedSE  \
0  DeepEST  imdb300AuxDS    Confidence_Score  0.082051  0.077985   
1  DeepEST  imdb300AuxDS                 DSA  0.048502  0.039416   
2  DeepEST  imdb300AuxDS  Prediction_Entropy  0.089145  0.068531   
3  DeepEST  imdb300AuxDS                 LSA  0.010321  0.004452   
4  DeepEST  imdb300AuxDS    Similarity_Score  0.103185  0.098213   

   failures_mean  failures_std  budget  
0      17.166667      2.742807      50  
1       5.300000      2.451600      50  
2      18.433333      2.955805      50  
3      41.066667      2.851900      50  
4       1.266667      1.014833      50  
