In [249]:
import pandas as pd
import glob
import os
import re

# =====================================
# Collect all learning CSVs
# =====================================
learning_files = sorted(glob.glob("*.csv"))

print("Found files:")
for f in learning_files:
    print(" ", f)

dfs = []

# =====================================
# Load each CSV as one column
# =====================================
for f in learning_files:

    name = os.path.splitext(os.path.basename(f))[0]   # column name

    df = pd.read_csv(f)

    # use instance filename as key
    df = df[['file','quality']]

    df = df.rename(columns={'quality': name})

    dfs.append(df.set_index('file'))


# =====================================
# Merge column-wise on file
# =====================================
result = pd.concat(dfs, axis=1)

# =====================================
# Optional: sort rows
# =====================================
result = result.sort_index()

print("\nFinal dataframe:")
print(result.head())


# =====================================
# Save
# =====================================
result.to_csv("summary_learning_objectives_matrix.txt")

#print("\nSaved learning_objectives_matrix.csv")


Found files:
  IMSBS-1000_heuristic-h5_imbs-iters-500_num-roots-10.csv
  IMSBS-2000_heuristic-h5_imbs-iters-500_num-roots-10.csv
  IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10.csv
  IMSBS-500_heuristic-h5_imbs-iters-500_num-roots-10.csv
  LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A1_F3.csv
  LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1.csv
  LIMSBS-2000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1.csv
  LIMSBS-500_imbs-iters-500_num-roots-10_U10_5_5_A2_F3.csv
  LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A1_F3.csv
  LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A2_F1.csv
  learning_U10_10_10_A1_F1.csv
  learning_U10_10_10_A1_F2.csv
  learning_U10_10_10_A1_F3.csv
  learning_U10_10_10_A2_F1.csv
  learning_U10_10_10_A2_F2.csv
  learning_U10_10_10_A2_F3.csv
  learning_U10_10_10_A3_F1.csv
  learning_U10_10_10_A3_F2.csv
  learning_U10_10_10_A3_F3.csv
  learning_U10_10_5_A1_F1.csv
  learning_U10_10_5_A1_F2.csv
  learning_U10_10_5_A1_F3.csv
  learning_U10_10_5_A2_F1.csv
  lear

In [250]:
# =====================================
# Load previous result
# =====================================
df = pd.read_csv("summary_learning_objectives_matrix.txt")

# first column is 'file'
df = df.set_index('file')


# =====================================
# Extract category = first three numbers
# =====================================
def extract_category(name):
    # matches ..._10_10_10.3  or ..._10_10_10_3
    m = re.search(r"(\d+)_(\d+)_(\d+)[\._]", str(name))
    if m:
        return f"{m.group(1)}_{m.group(2)}_{m.group(3)}"
    return None


df['category'] = df.index.map(extract_category)

# =====================================
# Group by category and average instances
# =====================================
grouped = df.groupby('category').mean(numeric_only=True)

#print(grouped.head())


# =====================================
# Save
# =====================================
#grouped.to_csv("summary_learning_objectives_grouped.csv")

#print("\nSaved learning_objectives_grouped.csv")


In [251]:
grouped.mean().head(60)

IMSBS-1000_heuristic-h5_imbs-iters-500_num-roots-10     64.400000
IMSBS-2000_heuristic-h5_imbs-iters-500_num-roots-10     64.684375
IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10      60.431250
IMSBS-500_heuristic-h5_imbs-iters-500_num-roots-10      63.668750
LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A1_F3    65.115625
LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1    63.887500
LIMSBS-2000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1    64.571875
LIMSBS-500_imbs-iters-500_num-roots-10_U10_5_5_A2_F3    64.943750
LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A1_F3     57.750000
LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A2_F1     64.150000
learning_U10_10_10_A1_F1                                61.015625
learning_U10_10_10_A1_F2                                58.971875
learning_U10_10_10_A1_F3                                58.168750
learning_U10_10_10_A2_F1                                59.615625
learning_U10_10_10_A2_F2                                62.012500
learning_U

In [252]:
#learning_U5_5_5_A2_F1, 
grouped=grouped.reset_index()
grouped.columns

Index(['category', 'IMSBS-1000_heuristic-h5_imbs-iters-500_num-roots-10',
       'IMSBS-2000_heuristic-h5_imbs-iters-500_num-roots-10',
       'IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10',
       'IMSBS-500_heuristic-h5_imbs-iters-500_num-roots-10',
       'LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A1_F3',
       'LIMSBS-1000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1',
       'LIMSBS-2000_imbs-iters-500_num-roots-10_U5_5_5_A2_F1',
       'LIMSBS-500_imbs-iters-500_num-roots-10_U10_5_5_A2_F3',
       'LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A1_F3',
       'LIMSBS-500_imbs-iters-500_num-roots-10_U5_5_5_A2_F1',
       'learning_U10_10_10_A1_F1', 'learning_U10_10_10_A1_F2',
       'learning_U10_10_10_A1_F3', 'learning_U10_10_10_A2_F1',
       'learning_U10_10_10_A2_F2', 'learning_U10_10_10_A2_F3',
       'learning_U10_10_10_A3_F1', 'learning_U10_10_10_A3_F2',
       'learning_U10_10_10_A3_F3', 'learning_U10_10_5_A1_F1',
       'learning_U10_10_5_A1_F2', 'learning_U10_10_5

In [253]:
#THE BEST CONFIGS OF FFNN: learning_U5_5_5_A1_F3 (62.45), learning_U5_5_5_A2_F1   62.175000, learning_U5_5_5_A3_F1 62.18
grouped_compare = grouped[[ "category", "learning_U5_5_5_A3_F1", "learning_U5_5_5_A3_F1", "learning_U5_5_5_A1_F3", "IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10" ]] #"learning_basic_imsbs"]]

In [254]:
grouped_compare

Unnamed: 0,category,learning_U5_5_5_A3_F1,learning_U5_5_5_A3_F1.1,learning_U5_5_5_A1_F3,IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10
0,10_100_2,9.2,9.2,9.2,8.6
1,10_100_4,6.8,6.8,6.9,6.2
2,10_200_2,11.3,11.3,10.8,10.3
3,10_200_4,6.6,6.6,6.4,6.2
4,10_500_2,10.0,10.0,10.0,9.5
5,10_500_4,7.1,7.1,6.7,6.1
6,10_50_2,10.0,10.0,9.9,9.1
7,10_50_4,6.3,6.3,6.1,6.0
8,2_100_2,72.4,72.4,71.8,72.8
9,2_100_4,57.2,57.2,59.2,61.7


In [255]:
grouped_compare.mean()

category                                              3.156316e+157
learning_U5_5_5_A3_F1                                  5.988438e+01
learning_U5_5_5_A3_F1                                  5.988438e+01
learning_U5_5_5_A1_F3                                  5.966562e+01
IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10     6.043125e+01
dtype: float64

In [256]:
extract_results = grouped[[ "category", "learning_U5_5_5_A1_F3", "IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10" ]] #"learning_basic_imsbs"]]

In [257]:
extract_results.to_latex("results_preliminary.tex", index=False)

  extract_results.to_latex("results_preliminary.tex", index=False)


In [258]:
extract_results

Unnamed: 0,category,learning_U5_5_5_A1_F3,IMSBS-500_heuristic-h5_imbs-iters-100_num-roots-10
0,10_100_2,9.2,8.6
1,10_100_4,6.9,6.2
2,10_200_2,10.8,10.3
3,10_200_4,6.4,6.2
4,10_500_2,10.0,9.5
5,10_500_4,6.7,6.1
6,10_50_2,9.9,9.1
7,10_50_4,6.1,6.0
8,2_100_2,71.8,72.8
9,2_100_4,59.2,61.7
