In [5]:
import os
import pandas as pd
import json
import re

In [97]:
# Function to extract information from config.json
def extract_info_from_config(config_path):
    with open(config_path, 'r') as config_file:
        config_data = json.load(config_file)
        dataset = config_data.get('dataset', '')
        feature = config_data.get('feature', '')
        mode = config_data.get('mode', '')
        param = config_data.get('d', '') if mode == 'mesh' else config_data.get('n_pins', '')
    return dataset, feature, mode, param

# Function to parse the test_PinMNIST files
def parse_test(file_path):
    with open(file_path, 'r') as file:
        content = file.read().strip()
        parts = content.split(';')
        mse = float(parts[0].split()[1])
        npp_parts = parts[1].split(',')
    return mse, float(npp_parts[0].split()[1]), float(npp_parts[1].split()[0])

def parse_res(file_path):
    with open(file_path, 'r') as file:
        content = file.read()  # Read the entire file content as a single string
    
    # Define the regular expression pattern to capture MSE, sigma, and the two float values
    pattern = r"MSE: ([\d.]+) \| NPP \(sigma ([\d.]+)\): ([\d.]+) , GP: ([\d.]+)"
    
    # Find all matches in the content
    match = re.findall(pattern, content)[0]
    mse = float(match[0])  # Convert MSE value to float
    sigma = float(match[1])  # Convert sigma value to float
    npp = float(match[2])  # Convert NPP value to float
    gp = float(match[3])  # Convert GP value to float
    
    return mse, sigma, npp, gp

def append(df, pp, mse, sigma, npp, gp, dataset, feature, mode, param):
    return pd.concat([df, pd.DataFrame({
        'Dataset': [dataset],
        'Feature': [feature],
        'Mode': [mode],
        'D/NPins': [param],
        'Partial Percent': [pp],
        'MSE': [mse],
        'Sigma': [sigma],
        'NPP': [npp],
        'GP': [gp]})], ignore_index=True)

In [112]:
# Initialize an empty DataFrame
df = pd.DataFrame(columns=['Dataset', 'Feature', 'Mode', 'D/NPins', 'Partial Percent', 'MSE', 'Sigma', 'NPP', 'GP'])

# Directory containing the folders
base_folder = './history'

# Loop through each directory in the base folder
for dir_name in os.listdir(base_folder):
    dir_path = os.path.join(base_folder, dir_name)
    
    # Check if the item in the base folder is a directory
    if os.path.isdir(dir_path):
        # Get config.json info
        config_path = os.path.join(dir_path, 'config.json')
        dataset, feature, mode, param = extract_info_from_config(config_path)
        mse, sigma, npp, gp = parse_res(os.path.join(dir_path, 'results.txt'))
        df = append(df, 0.0, mse, sigma, npp, gp, dataset, feature, mode, param)
        
        # Loop through files starting with 'test_PinMNIST'
        for file_name in os.listdir(dir_path):
            if file_name.startswith('test_PinMNIST'):
                pp = float(file_name.split('_')[-1][:-4])
                file_path = os.path.join(dir_path, file_name)
                mse, npp, gp = parse_test(file_path)
                df = append(df, pp, mse, sigma, npp, gp, dataset, feature, mode, param)

df.sort_values(by=['D/NPins', 'Partial Percent'], inplace=True, ascending=False, ignore_index=True)
df.sort_values(by=['Feature', 'Mode'], inplace=True, ignore_index=True)

In [113]:
df

Unnamed: 0,Dataset,Feature,Mode,D/NPins,Partial Percent,MSE,Sigma,NPP,GP
0,PinMNIST,AE,mesh,10,1.0,70.908794,0.5,0.39735,0.411586
1,PinMNIST,AE,mesh,10,0.75,86.392427,0.5,0.461449,0.462395
2,PinMNIST,AE,mesh,10,0.5,129.252859,0.5,0.691621,0.679808
3,PinMNIST,AE,mesh,10,0.25,91.439378,0.5,0.513744,0.502053
4,PinMNIST,AE,mesh,3,1.0,0.730613,2.0,0.750291,0.754146
5,PinMNIST,AE,mesh,3,0.75,0.21471,2.0,0.221937,0.215284
6,PinMNIST,AE,mesh,3,0.5,0.773431,2.0,0.757827,0.751134
7,PinMNIST,AE,mesh,3,0.25,0.939234,2.0,0.962333,0.968475
8,PinMNIST,AE,random,100,1.0,1.035002,0.1,1.081253,1.064827
9,PinMNIST,AE,random,100,0.75,1.073427,0.1,1.124931,0.875204


In [None]:
# Save DataFrame to a CSV file
df.to_csv('output.csv', index=False)