In [23]:
import os
import pandas as pd
import json
import re

In [24]:
# Function to extract information from config.json
def extract_info_from_config(config_path):
    with open(config_path, 'r') as config_file:
        config_data = json.load(config_file)
        dataset = config_data.get('dataset', '')
        feature = config_data.get('feature', '')
        mode = config_data.get('mode', '')
        param = config_data.get('n_pins', '')
        deeper = config_data.get('deeper', '')
    return dataset, feature, mode, param, deeper

# Function to parse the test_PinMNIST files
def parse_test(file_path):
    with open(file_path, 'r') as file:
        content = file.read().strip()
        parts = content.split('|')
        mse = float(parts[0].split(',')[0].split()[1])
        r2mse = float(parts[0].split(',')[1].split()[1])
        npp_part = parts[1].split(';')[0]
        gp_part = parts[1].split(';')[1]
        npp = float(npp_part.split(',')[0].split()[1])
        r2npp = float(npp_part.split(',')[1].split()[1])
        gp = float(gp_part.split(',')[0].split()[1])
        r2gp = float(gp_part.split(',')[1].split()[1])
    return mse, r2mse, npp, r2npp, gp, r2gp

def parse_res(file_path):
    with open(file_path, 'r') as file:
        content = file.read()  # Read the entire file content as a single string
    
    # Define the regular expression pattern to capture MSE, sigma, and the two float values
    pattern = r"MSE: ([\d.]+), R2: ([\d.-]+) \| NPP \(sigma ([\d.]+)\): ([\d.]+), R2: ([\d.-]+); GP: ([\d.]+), R2: ([\d.-]+)"
    
    # Find all matches in the content
    match = re.findall(pattern, content)[0]
    mse = float(match[0])  # Convert MSE value to float
    r2mse = float(match[1])
    sigma = float(match[2])  # Convert sigma value to float
    npp = float(match[3])  # Convert NPP value to float
    r2npp = float(match[4])
    gp = float(match[5])  # Convert GP value to float
    r2gp = float(match[6])
    
    return mse, r2mse, sigma, npp, r2npp, gp, r2gp

def append(df, pp, mse, r2mse, sigma, npp, r2npp, gp, r2gp, dataset, feature, mode, param, deeper):
    return pd.concat([df, pd.DataFrame({
        'Dataset': [dataset],
        'Feature': [feature],
        'Extra layers': [deeper],
        'Mode': [mode],
        'NPins': [param],
        'Partial Percent': [pp],
        'MSE': [mse],
        'R2 MSE': [r2mse], 
        'Sigma': [sigma],
        'NPP': [npp],
        'R2 NPP': [r2npp],
        'GP': [gp],
        'R2 GP': [r2gp]})], ignore_index=True)

In [35]:
# Initialize an empty DataFrame
df = pd.DataFrame(columns=['Dataset', 'Feature', 'Extra layers', 'Mode', 'NPins', 'Partial Percent', 'MSE', 'R2 MSE', 'Sigma', 'NPP', 'R2 NPP', 'GP', 'R2 GP'])

# Directory containing the folders
base_folder = './history/exp'

# Loop through each directory in the base folder
for dir_name in os.listdir(base_folder):
    if dir_name.startswith('17'):
        dir_path = os.path.join(base_folder, dir_name)

        # Check if the item in the base folder is a directory
        if os.path.isdir(dir_path):
            # Get config.json info
            config_path = os.path.join(dir_path, 'config.json')
            dataset, feature, mode, param, deeper = extract_info_from_config(config_path)
            mse, r2mse, sigma, npp, r2npp, gp, r2gp = parse_res(os.path.join(dir_path, 'results.txt'))
            df = append(df, 0.0, mse, r2mse, sigma, npp, r2npp, gp, r2gp, dataset, feature, mode, param, deeper)

            # Loop through files starting with 'test_PinMNIST'
            for file_name in os.listdir(dir_path):
                if file_name.startswith('test_PinMNIST') or file_name.startswith('test_Synthetic'):
                    pp = float(file_name.split('_')[-1][:-4])
                    file_path = os.path.join(dir_path, file_name)
                    mse, r2mse, npp, r2npp, gp, r2gp = parse_test(file_path)
                    df = append(df, pp, mse, r2mse, sigma, npp, r2npp, gp, r2gp, dataset, feature, mode, param, deeper)

df.sort_values(by=['NPins', 'Partial Percent', 'NPP'], inplace=True, ascending=False, ignore_index=True)
df.sort_values(by=['Dataset', 'Feature', 'Mode'], inplace=True, ignore_index=True)
df.drop_duplicates(subset=['Dataset', 'Feature', 'Extra layers', 'Mode', 'NPins', 'Partial Percent'], keep='last', inplace=True)
df.head()

Unnamed: 0,Dataset,Feature,Extra layers,Mode,NPins,Partial Percent,MSE,R2 MSE,Sigma,NPP,R2 NPP,GP,R2 GP
1,PinMNIST,AE,False,mesh,100,1.0,0.180671,0.996895,0.2,0.166074,0.997135,0.165522,0.997145
3,PinMNIST,AE,True,mesh,100,1.0,24.873579,0.620728,2.0,0.072513,0.998785,0.071589,0.998801
5,PinMNIST,AE,False,mesh,100,0.75,0.180671,0.996895,0.2,0.166074,0.997135,0.165615,0.997143
7,PinMNIST,AE,True,mesh,100,0.75,24.873579,0.620728,2.0,0.072513,0.998785,0.071807,0.998797
9,PinMNIST,AE,False,mesh,100,0.5,0.180671,0.996895,0.2,0.166074,0.997135,0.165733,0.997141


In [40]:
df[(df['Dataset'] == 'PinMNIST') & (df['Extra layers'] == False)].drop(labels='Extra layers', axis=1).reset_index(drop=True).to_csv('pinMNIST.csv', index=False)
df[(df['Dataset'] == 'PinMNIST') & (df['Extra layers'] == False)].drop(labels='Extra layers', axis=1).reset_index(drop=True)

Unnamed: 0,Dataset,Feature,Mode,NPins,Partial Percent,MSE,R2 MSE,Sigma,NPP,R2 NPP,GP,R2 GP
0,PinMNIST,AE,mesh,100,1.0,0.180671,0.996895,0.2,0.166074,0.997135,0.165522,0.997145
1,PinMNIST,AE,mesh,100,0.75,0.180671,0.996895,0.2,0.166074,0.997135,0.165615,0.997143
2,PinMNIST,AE,mesh,100,0.5,0.180671,0.996895,0.2,0.166074,0.997135,0.165733,0.997141
3,PinMNIST,AE,mesh,100,0.25,0.180671,0.996895,0.2,0.166074,0.997135,0.165913,0.997138
4,PinMNIST,AE,mesh,100,0.0,0.180671,0.996895,0.2,0.166074,0.997135,0.166074,0.997135
5,PinMNIST,AE,mesh,9,1.0,118.606788,-0.641429,0.2,0.06684,0.994037,0.06684,0.971716
6,PinMNIST,AE,mesh,9,0.75,118.606788,-0.641429,0.2,0.06684,0.994037,0.06684,0.971716
7,PinMNIST,AE,mesh,9,0.5,118.606788,-0.641429,0.2,0.06684,0.994037,0.06684,0.97618
8,PinMNIST,AE,mesh,9,0.25,118.606788,-0.641429,0.2,0.06684,0.994037,0.06684,0.97618
9,PinMNIST,AE,mesh,9,0.0,118.606788,-0.641429,0.2,0.06684,0.994037,0.06684,0.994037


In [41]:
df[(df['Dataset'] == 'Synthetic') & (df['Extra layers'] == False)].drop(labels='Extra layers', axis=1).reset_index(drop=True).to_csv('Synthetic.csv', index=False)
df[(df['Dataset'] == 'Synthetic') & (df['Extra layers'] == False)].drop(labels='Extra layers', axis=1).reset_index(drop=True)

Unnamed: 0,Dataset,Feature,Mode,NPins,Partial Percent,MSE,R2 MSE,Sigma,NPP,R2 NPP,GP,R2 GP
0,Synthetic,AE,mesh,100,1.0,13.901962,0.782908,0.5,14.245438,0.777145,14.081051,0.779755
1,Synthetic,AE,mesh,100,0.75,13.901962,0.782908,0.5,14.245438,0.777145,14.123055,0.779089
2,Synthetic,AE,mesh,100,0.5,13.901962,0.782908,0.5,14.245438,0.777145,14.157693,0.77854
3,Synthetic,AE,mesh,100,0.25,13.901962,0.782908,0.5,14.245438,0.777145,14.206275,0.777771
4,Synthetic,AE,mesh,100,0.0,13.901962,0.782908,0.5,14.245438,0.777145,14.245438,0.777145
5,Synthetic,AE,mesh,9,1.0,12.467432,0.552807,0.1,12.470502,0.552756,12.470502,0.552756
6,Synthetic,AE,mesh,9,0.75,12.467432,0.552807,0.1,12.470502,0.552756,12.470502,0.552756
7,Synthetic,AE,mesh,9,0.5,12.467432,0.552807,0.1,12.470502,0.552756,12.470502,0.552756
8,Synthetic,AE,mesh,9,0.25,12.467432,0.552807,0.1,12.470502,0.552756,12.470502,0.552756
9,Synthetic,AE,mesh,9,0.0,12.467432,0.552807,0.1,12.470502,0.552756,12.470502,0.552756


TESTING

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
import numpy as np
import os
import json
from tools.plot_utils import plot_and_save
from tools.data_utils import *
from tools.losses import NPPLoss
from tools.models import Autoencoder
from tools.optimization import EarlyStoppingCallback, evaluate_model
import matplotlib.pyplot as plt
import argparse
import time
from tools.models import *

experiment_id = "1709622898" # 1709521764
experiment_folder = "./history/exp_def"
dataset = "Synthetic"
feature = "DDPM"
mode = "random"
feature_extracted = True if feature == "DDPM" else False
mesh = True if mode == "mesh" else False
d = 3
n_pins = 100
partial_percent = 0.25
r = 3
batch_size = 32

num_kernels_encoder = [32, 16]
num_kernels_decoder = [32]


# Set a random seed for PyTorch
seed = 4  # You can use any integer value as the seed
torch.manual_seed(seed)
# Set a random seed for NumPy (if you're using NumPy operations)
np.random.seed(seed)

if dataset == "Synthetic":
    input_channel = 3 
elif dataset == "PinMNIST":
    input_channel = 1
elif dataset == "Building":
    input_channel = 4

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if feature_extracted:
    folder = f"{dataset}_ddpm"
else:
    folder = f"{dataset}"

if dataset == "PinMNIST":
    if mesh:
        data_folder = f"./data/{folder}/mesh_{d}step_{28}by{28}pixels_{r}radius_{seed}seed"
        config['n_pins'] = (28//d + 1)**2
    else:
        data_folder = f"./data/{folder}/random_fixedTrue_{n_pins}pins_{28}by{28}pixels_{r}radius_{seed}seed"
elif dataset == "Synthetic":
    folder += "/28by28pixels_1000images_123456seed"
    if mesh:
        data_folder = f"./data/{folder}/mesh_{d}step_pins"
        config['n_pins'] = (28//d + 1)**2
    else:
        data_folder = f"./data/{folder}/random_{n_pins}pins"
else: # dataset == "Building"
    raise Exception("Building option is still not implemented.")

transform = transforms.Compose([
    ToTensor(),         # Convert to tensor (as you were doing)
    Resize()  # Resize to 100x100
])

transformed_dataset = PinDataset(csv_file=f"{data_folder}/pins.csv",
                                      root_dir=f"./data/{folder}/images/",
                                      transform=transform)

dataset_size = len(transformed_dataset)
train_size = int(0.7 * dataset_size)
val_size = int(0.10 * dataset_size)
test_size = dataset_size - train_size - val_size

# Split the dataset into train, validation, and test sets
train_dataset, val_dataset, test_dataset = random_split(
    transformed_dataset, [train_size, val_size, test_size]
)

# Create your DataLoader with the custom_collate_fn
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)        

In [None]:
# Testing
dataloader = train_loader

if not os.path.exists(f'{experiment_folder}/{experiment_id}'):
    raise Exception(f"Could not find experiment with id: {experiment_id}")
else:
    autoencoder_MSE = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    autoencoder_NPP = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    # Load models
    try:
        autoencoder_MSE.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_MSE.pth', map_location=device))
        autoencoder_MSE.eval()
        autoencoder_NPP.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_NPP.pth', map_location=device))
        autoencoder_NPP.eval()
    except:
        raise Exception("The model you provided does not correspond with the selected architecture. Please revise and try again.")

        
hidden_samples = 0.5
for model in [autoencoder_MSE, autoencoder_NPP]:
    total_loss = 0.0
    criterion = NPPLoss(identity=True).to(device)

    with torch.no_grad():
        for batch in dataloader:
            x_test = batch['image'][:, :input_channel, :, :].to(device)
            p_test = [tensor.to(device) for tensor in batch['pins']]
            y_test = [tensor.to(device) for tensor in batch['outputs']]
            test_outputs = model(x_test.float())

            for i in range(len(x_test)):      
                num_samples = int(len(p_test[i]) * hidden_samples)
                p_sample = p_test[i][num_samples:]
                y_sample = y_test[i][num_samples:]
                mu_sample = (test_outputs[i].squeeze())[p_sample[:, 0], p_sample[:, 1]]
                if i == 0:
                    print('GT: ', y_sample)
                    print('OUTPUT: ', mu_sample)
            loss = criterion(y_test, test_outputs, p_test)

            total_loss += loss.item()

    total_loss /= len(dataloader)
    print('TEST LOSS: ', total_loss, '\n\n\n\n')

In [None]:
# Testing
experiment_id = 1709000615
dataloader = test_loader

if not os.path.exists(f'{experiment_folder}/{experiment_id}'):
    raise Exception(f"Could not find experiment with id: {experiment_id}")
else:
    autoencoder_MSE = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    autoencoder_NPP = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    # Load models
    try:
        autoencoder_MSE.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_MSE.pth', map_location=device))
        autoencoder_MSE.eval()
        autoencoder_NPP.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_NPP.pth', map_location=device))
        autoencoder_NPP.eval()
    except:
        raise Exception("The model you provided does not correspond with the selected architecture. Please revise and try again.")

        
hidden_samples = 0.5
for model in [autoencoder_MSE, autoencoder_NPP]:
    total_loss = 0.0
    criterion = NPPLoss(identity=True).to(device)

    with torch.no_grad():
        for batch in dataloader:
            x_test = batch['image'][:, :input_channel, :, :].to(device)
            p_test = [tensor.to(device) for tensor in batch['pins']]
            y_test = [tensor.to(device) for tensor in batch['outputs']]
            test_outputs = model(x_test.float())

            for i in range(len(x_test)):      
                num_samples = int(len(p_test[i]) * hidden_samples)
                p_sample = p_test[i][num_samples:]
                y_sample = y_test[i][num_samples:]
                mu_sample = (test_outputs[i].squeeze())[p_sample[:, 0], p_sample[:, 1]]
                if i == 0:
                    print('GT: ', y_sample)
                    print('OUTPUT: ', mu_sample)
            loss = criterion(y_test, test_outputs, p_test)

            total_loss += loss.item()

    total_loss /= len(dataloader)
    print('TEST LOSS: ', total_loss, '\n\n\n\n')

In [None]:
# Testing
if not os.path.exists(f'{experiment_folder}/{experiment_id}'):
    raise Exception(f"Could not find experiment with id: {experiment_id}")
else:
    autoencoder_MSE = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    autoencoder_NPP = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    # Load models
    try:
        autoencoder_MSE.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_MSE.pth'))
        autoencoder_NPP.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_NPP.pth'))
    except:
        raise Exception("The model you provided does not correspond with the selected architecture. Please revise and try again.")
    # NPP
    for percent in [0.25, 0.50, 0.75, 1.00]:
        print(f'Percent testing {percent}')
        best_MSE_test_loss = evaluate_model(autoencoder_MSE, test_loader, input_channel, device, partial_label_GP=False, partial_percent=percent)
        best_NPP_test_loss = evaluate_model(autoencoder_NPP, test_loader, input_channel, device, partial_label_GP=False, partial_percent=percent)
        try:
            GP_best_NPP_test_loss = evaluate_model(autoencoder_NPP, test_loader, input_channel, device, partial_label_GP=True, partial_percent=percent)
            # Write output into file
            filename = f"test_{folder.split('/')[0]}_{percent}.txt"
            with open(f"{experiment_folder}/{experiment_id}/{filename}", "w") as f:
                f.write(f"MSE {best_MSE_test_loss}; NPP {best_NPP_test_loss}, {GP_best_NPP_test_loss} (GP)")
        except Exception as Error:
            print(Error)