In [15]:
import pandas as pd
import ast

# Path to your CSV file
file_path = './outputs/results/metrics/bso_text_data_metics.csv'

# Read CSV
df = pd.read_csv(file_path)

# Safely parse the 'fitness_curve' column
df['fitness_curve'] = df['fitness_curve'].apply(ast.literal_eval)

# Fix and parse the malformed 'selectedFeatures' column
def fix_array_string(s):
    try:
        s = s.strip().replace('\n', ' ')
        s = s.replace('[', '').replace(']', '')
        s = ','.join(s.split())
        return ast.literal_eval(f"[{s}]")
    except:
        return []

df['selectedFeatures'] = df['selectedFeatures'].apply(fix_array_string)

# Paired datasets
paired_datasets = [
    ['BreastEW_50_samples', 'BreastEW_100_samples'],
    ['CongressEW_50_samples', 'CongressEW_100_samples'],
    ['Sonar_50_samples', 'Sonar_100_samples']
]

# Initialize results list
results = []

# Extract fitness and feature count metrics
def extract_metrics(record):
    fit = round(record['fitness_curve'][-1], 3)
    acc = round(1 - fit, 2)
    fc_count = sum(1 for v in record['selectedFeatures'] if v > 0.5)
    return fit, acc, fc_count

# Process each dataset pair
for pair in paired_datasets:
    base_name = pair[0].replace('_50_samples', '').replace('_100_samples', '')

    # --- No-Threshold version ---
    r1 = df.query("Dataset == @pair[0] and method == 'BSO' and classifier == 'KNN' and threshold_only == False and using == 'gbest'").iloc[0]
    r2 = df.query("Dataset == @pair[1] and method == 'BSO' and classifier == 'KNN' and threshold_only == False and using == 'gbest'").iloc[0]

    fit_50, acc_50, fc_50 = extract_metrics(r1)
    fit_100, acc_100, fc_100 = extract_metrics(r2)
    avg_fc = round((fc_50 + fc_100) / 2, 2)

    results.append({
        'Dataset': base_name,
        'method': 'HBEOSA-BSO',
        'acc-50': acc_50,
        'acc-100': acc_100,
        'fit-50': fit_50,
        'fit-100': fit_100,
        'FC': avg_fc
    })

    # --- Threshold version ---
    r3 = df.query("Dataset == @pair[0] and method == 'BSO' and classifier == 'KNN' and threshold_only == True and using == 'gbest'").iloc[0]
    r4 = df.query("Dataset == @pair[1] and method == 'BSO' and classifier == 'KNN' and threshold_only == True and using == 'gbest'").iloc[0]

    fit_50_t, acc_50_t, fc_50_t = extract_metrics(r3)
    fit_100_t, acc_100_t, fc_100_t = extract_metrics(r4)
    avg_fc_t = round((fc_50_t + fc_100_t) / 2, 2)

    results.append({
        'Dataset': base_name,
        'method': 'HBEOSA-BSO-NT',
        'acc-50': acc_50_t,
        'acc-100': acc_100_t,
        'fit-50': fit_50_t,
        'fit-100': fit_100_t,
        'FC': avg_fc_t
    })

# Convert results to DataFrame
final_df = pd.DataFrame(results)
final_df.to_csv('./outputs/results/summary_table.csv', index=False)
final_df.to_excel('./outputs/results/summary_table.xlsx', index=False)



# Display final results table
print(final_df.to_string(index=False))


   Dataset        method  acc-50  acc-100  fit-50  fit-100   FC
  BreastEW    HBEOSA-BSO    1.00     0.96   0.003    0.041  6.5
  BreastEW HBEOSA-BSO-NT    1.00     0.96   0.001    0.040  2.0
CongressEW    HBEOSA-BSO    1.00     0.96   0.001    0.040  1.5
CongressEW HBEOSA-BSO-NT    1.00     0.96   0.001    0.040  1.0
     Sonar    HBEOSA-BSO    0.92     0.92   0.077    0.081  8.5
     Sonar HBEOSA-BSO-NT    0.92     0.96   0.078    0.042 11.5


In [17]:
# for  aco
import pandas as pd
import ast

# Path to your CSV file
file_path = './outputs/results/metrics/aco_text_data_metrics.csv'

# Read CSV
df = pd.read_csv(file_path)

# Safely parse the 'fitness_curve' column
df['fitness_curve'] = df['fitness_curve'].apply(ast.literal_eval)

# Fix and parse the malformed 'selectedFeatures' column
def fix_array_string(s):
    try:
        s = s.strip().replace('\n', ' ')
        s = s.replace('[', '').replace(']', '')
        s = ','.join(s.split())
        return ast.literal_eval(f"[{s}]")
    except:
        return []

df['selectedFeatures'] = df['selectedFeatures'].apply(fix_array_string)

# Paired datasets
paired_datasets = [
    ['BreastEW_50_samples', 'BreastEW_100_samples'],
    ['CongressEW_50_samples', 'CongressEW_100_samples'],
    ['Sonar_50_samples', 'Sonar_100_samples']
]

# Initialize results list
results = []

# Extract fitness and feature count metrics
def extract_metrics(record):
    fit = round(record['fitness_curve'][-1], 3)
    acc = round(1 - fit, 2)
    fc_count = sum(1 for v in record['selectedFeatures'] if v > 0.5)
    return fit, acc, fc_count

# Process each dataset pair
for pair in paired_datasets:
    base_name = pair[0].replace('_50_samples', '').replace('_100_samples', '')

    # --- No-Threshold version ---
    r1 = df.query("Dataset == @pair[0] and method == 'ACO' and classifier == 'KNN' and threshold_only == False and using == 'gbest'").iloc[0]
    r2 = df.query("Dataset == @pair[1] and method == 'ACO' and classifier == 'KNN' and threshold_only == False and using == 'gbest'").iloc[0]

    fit_50, acc_50, fc_50 = extract_metrics(r1)
    fit_100, acc_100, fc_100 = extract_metrics(r2)
    avg_fc = round((fc_50 + fc_100) / 2, 2)

    results.append({
        'Dataset': base_name,
        'method': 'HBEOSA-ACO',
        'acc-50': acc_50,
        'acc-100': acc_100,
        'fit-50': fit_50,
        'fit-100': fit_100,
        'FC': avg_fc
    })

    # --- Threshold version ---
    r3 = df.query("Dataset == @pair[0] and method == 'ACO' and classifier == 'KNN' and threshold_only == True and using == 'gbest'").iloc[0]
    r4 = df.query("Dataset == @pair[1] and method == 'ACO' and classifier == 'KNN' and threshold_only == True and using == 'gbest'").iloc[0]

    fit_50_t, acc_50_t, fc_50_t = extract_metrics(r3)
    fit_100_t, acc_100_t, fc_100_t = extract_metrics(r4)
    avg_fc_t = round((fc_50_t + fc_100_t) / 2, 2)

    results.append({
        'Dataset': base_name,
        'method': 'HBEOSA-ACO-NT',
        'acc-50': acc_50_t,
        'acc-100': acc_100_t,
        'fit-50': fit_50_t,
        'fit-100': fit_100_t,
        'FC': avg_fc_t
    })

# Convert results to DataFrame
final_df = pd.DataFrame(results)
final_df.to_csv('./outputs/results/summary_table.csv', index=False)
final_df.to_excel('./outputs/results/summary_table.xlsx', index=False)



# Display final results table
print(final_df.to_string(index=False))


   Dataset        method  acc-50  acc-100  fit-50  fit-100  FC
  BreastEW    HBEOSA-ACO    0.92     0.96   0.077    0.040 2.0
  BreastEW HBEOSA-ACO-NT    0.92     0.96   0.077    0.040 2.0
CongressEW    HBEOSA-ACO    1.00     0.96   0.001    0.040 1.5
CongressEW HBEOSA-ACO-NT    1.00     0.96   0.001    0.040 1.0
     Sonar    HBEOSA-ACO    0.85     0.92   0.153    0.081 5.0
     Sonar HBEOSA-ACO-NT    0.85     0.96   0.152    0.042 7.5
