In [98]:
import pandas as pd
import glob
import os
import re

# =====================================
# Collect all learning CSVs
# =====================================
learning_files = sorted(glob.glob("*.csv"))

print("Found files:")
for f in learning_files:
    print(" ", f)

dfs = []

# =====================================
# Load each CSV as one column
# =====================================
for f in learning_files:

    name = os.path.splitext(os.path.basename(f))[0]   # column name

    df = pd.read_csv(f)

    # use instance filename as key
    df = df[['file','quality']]

    df = df.rename(columns={'quality': name})

    dfs.append(df.set_index('file'))


# =====================================
# Merge column-wise on file
# =====================================
result = pd.concat(dfs, axis=1)

# =====================================
# Optional: sort rows
# =====================================
result = result.sort_index()

print("\nFinal dataframe:")
print(result.head())


# =====================================
# Save
# =====================================
result.to_csv("summary_learning_objectives_matrix.txt")

#print("\nSaved learning_objectives_matrix.csv")


Found files:
  IMSBS-1000_heuristic-h5_imbs-iters-500_num-roots-10.csv
  IMSBS-2000_heuristic-h5_imbs-iters-500_num-roots-10.csv
  IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10.csv
  IMSBS-500_heuristic-h5_imbs-iters-500_num-roots-10.csv
  LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A1_F3.csv
  LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1.csv
  LIMSBS-2000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1.csv
  LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A2_F1.csv
  learning_U10_10_10_A1_F1.csv
  learning_U10_10_10_A1_F2.csv
  learning_U10_10_10_A1_F3.csv
  learning_U10_10_10_A2_F1.csv
  learning_U10_10_10_A2_F2.csv
  learning_U10_10_10_A2_F3.csv
  learning_U10_10_10_A3_F1.csv
  learning_U10_10_10_A3_F2.csv
  learning_U10_10_10_A3_F3.csv
  learning_U10_10_A1_F1.csv
  learning_U10_10_A1_F2.csv
  learning_U10_10_A1_F3.csv
  learning_U10_10_A2_F1.csv
  learning_U10_10_A2_F2.csv
  learning_U10_10_A2_F3.csv
  learning_U10_10_A3_F1.csv
  learning_U10_10_A3_F2.csv
  learning_U10_10_A

In [99]:
# =====================================
# Load previous result
# =====================================
df = pd.read_csv("summary_learning_objectives_matrix.txt")

# first column is 'file'
df = df.set_index('file')


# =====================================
# Extract category = first three numbers
# =====================================
def extract_category(name):
    # matches ..._10_10_10.3  or ..._10_10_10_3
    m = re.search(r"(\d+)_(\d+)_(\d+)[\._]", str(name))
    if m:
        return f"{m.group(1)}_{m.group(2)}_{m.group(3)}"
    return None


df['category'] = df.index.map(extract_category)

# =====================================
# Group by category and average instances
# =====================================
grouped = df.groupby('category').mean(numeric_only=True)

#print(grouped.head())


# =====================================
# Save
# =====================================
#grouped.to_csv("summary_learning_objectives_grouped.csv")

#print("\nSaved learning_objectives_grouped.csv")


In [100]:
grouped.mean().head(50)

IMSBS-1000_heuristic-h5_imbs-iters-500_num-roots-10     64.400000
IMSBS-2000_heuristic-h5_imbs-iters-500_num-roots-10     64.684375
IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10      60.431250
IMSBS-500_heuristic-h5_imbs-iters-500_num-roots-10      63.668750
LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A1_F3    65.115625
LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1    63.887500
LIMSBS-2000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1    64.571875
LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A2_F1     64.150000
learning_U10_10_10_A1_F1                                57.615625
learning_U10_10_10_A1_F2                                61.928125
learning_U10_10_10_A1_F3                                60.525000
learning_U10_10_10_A2_F1                                61.428125
learning_U10_10_10_A2_F2                                61.903125
learning_U10_10_10_A2_F3                                61.903125
learning_U10_10_10_A3_F1                                61.465625
learning_U

In [101]:
#learning_U5_5_5_A2_F1, 
grouped=grouped.reset_index()
grouped.columns

Index(['category', 'IMSBS-1000_heuristic-h5_imbs-iters-500_num-roots-10',
       'IMSBS-2000_heuristic-h5_imbs-iters-500_num-roots-10',
       'IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10',
       'IMSBS-500_heuristic-h5_imbs-iters-500_num-roots-10',
       'LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A1_F3',
       'LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1',
       'LIMSBS-2000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1',
       'LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A2_F1',
       'learning_U10_10_10_A1_F1', 'learning_U10_10_10_A1_F2',
       'learning_U10_10_10_A1_F3', 'learning_U10_10_10_A2_F1',
       'learning_U10_10_10_A2_F2', 'learning_U10_10_10_A2_F3',
       'learning_U10_10_10_A3_F1', 'learning_U10_10_10_A3_F2',
       'learning_U10_10_10_A3_F3', 'learning_U10_10_A1_F1',
       'learning_U10_10_A1_F2', 'learning_U10_10_A1_F3',
       'learning_U10_10_A2_F1', 'learning_U10_10_A2_F2',
       'learning_U10_10_A2_F3', 'learning_U10_10_A3_F1',
       '

In [102]:
#THE BEST CONFIGS OF FFNN: learning_U5_5_5_A1_F3 (62.45), learning_U5_5_5_A2_F1   62.175000, learning_U5_5_5_A3_F1 62.18
grouped_compare = grouped[[ "category", "learning_U5_5_5_A3_F1", "learning_U5_5_5_A3_F1", "learning_U5_5_5_A1_F3", "IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10" ]] #"learning_basic_imsbs"]]

In [103]:
grouped_compare

Unnamed: 0,category,learning_U5_5_5_A3_F1,learning_U5_5_5_A3_F1.1,learning_U5_5_5_A1_F3,IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10
0,10_100_2,9.5,9.5,9.1,8.6
1,10_100_4,7.0,7.0,7.2,6.2
2,10_200_2,10.8,10.8,10.5,10.3
3,10_200_4,6.3,6.3,6.3,6.2
4,10_500_2,10.2,10.2,10.2,9.5
5,10_500_4,6.8,6.8,6.6,6.1
6,10_50_2,9.5,9.5,9.5,9.1
7,10_50_4,5.9,5.9,5.9,6.0
8,2_100_2,72.6,72.6,72.6,72.8
9,2_100_4,61.7,61.7,61.7,61.7


In [104]:
grouped_compare.mean()

category                                              3.156316e+157
learning_U5_5_5_A3_F1                                  6.201875e+01
learning_U5_5_5_A3_F1                                  6.201875e+01
learning_U5_5_5_A1_F3                                  6.245625e+01
IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10     6.043125e+01
dtype: float64