In [1]:
import os
import pandas as pd
import json
import re



In [2]:
# Function to extract information from config.json
def extract_info_from_config(config_path):
    with open(config_path, 'r') as config_file:
        config_data = json.load(config_file)
        dataset = config_data.get('dataset', '')
        feature = config_data.get('feature', '')
        mode = config_data.get('mode', '')
        param = config_data.get('n_pins', '')
    return dataset, feature, mode, param

# Function to parse the test_PinMNIST files
def parse_test(file_path):
    with open(file_path, 'r') as file:
        content = file.read().strip()
        parts = content.split(';')
        mse = float(parts[0].split()[1])
        npp_parts = parts[1].split(',')
    return mse, float(npp_parts[0].split()[1]), float(npp_parts[1].split()[0])

def parse_res(file_path):
    with open(file_path, 'r') as file:
        content = file.read()  # Read the entire file content as a single string
    
    # Define the regular expression pattern to capture MSE, sigma, and the two float values
    pattern = r"MSE: ([\d.]+) \| NPP \(sigma ([\d.]+)\): ([\d.]+) , GP: ([\d.]+)"
    
    # Find all matches in the content
    match = re.findall(pattern, content)[0]
    mse = float(match[0])  # Convert MSE value to float
    sigma = float(match[1])  # Convert sigma value to float
    npp = float(match[2])  # Convert NPP value to float
    gp = float(match[3])  # Convert GP value to float
    
    return mse, sigma, npp, gp

def append(df, pp, mse, sigma, npp, gp, dataset, feature, mode, param):
    return pd.concat([df, pd.DataFrame({
        'Dataset': [dataset],
        'Feature': [feature],
        'Mode': [mode],
        'NPins': [param],
        'Partial Percent': [pp],
        'MSE': [mse],
        'Sigma': [sigma],
        'NPP': [npp],
        'GP': [gp]})], ignore_index=True)

In [25]:
# Initialize an empty DataFrame
df = pd.DataFrame(columns=['Dataset', 'Feature', 'Mode', 'D/NPins', 'Partial Percent', 'MSE', 'Sigma', 'NPP', 'GP'])

# Directory containing the folders
base_folder = './history/experiment'

# Loop through each directory in the base folder
for dir_name in os.listdir(base_folder):
    dir_path = os.path.join(base_folder, dir_name)

    # Check if the item in the base folder is a directory
    if os.path.isdir(dir_path):
        # Get config.json info
        config_path = os.path.join(dir_path, 'config.json')
        dataset, feature, mode, param = extract_info_from_config(config_path)
        mse, sigma, npp, gp = parse_res(os.path.join(dir_path, 'results.txt'))
        df = append(df, 0.0, mse, sigma, npp, gp, dataset, feature, mode, param)

        # Loop through files starting with 'test_PinMNIST'
        for file_name in os.listdir(dir_path):
            if file_name.startswith('test_PinMNIST') or file_name.startswith('test_Synthetic'):
                pp = float(file_name.split('_')[-1][:-4])
                file_path = os.path.join(dir_path, file_name)
                mse, npp, gp = parse_test(file_path)
                df = append(df, pp, mse, sigma, npp, gp, dataset, feature, mode, param)

df.sort_values(by=['NPins', 'Partial Percent'], inplace=True, ascending=False, ignore_index=True)
df.sort_values(by=['Dataset', 'Feature', 'Mode'], inplace=True, ignore_index=True)
df.head()

Unnamed: 0,Dataset,Feature,Mode,D/NPins,Partial Percent,MSE,Sigma,NPP,GP
0,PinMNIST,AE,mesh,10,0.75,86.392427,0.5,0.461449,0.462395
1,PinMNIST,AE,mesh,10,0.5,129.252859,0.5,0.691621,0.679808
2,PinMNIST,AE,mesh,10,0.25,91.439378,0.5,0.513744,0.502053
3,PinMNIST,AE,mesh,10,0.0,70.908794,0.5,0.39735,0.411586
4,PinMNIST,AE,mesh,3,0.75,0.21471,2.0,0.221937,0.215284


In [26]:
df[df['Dataset'] == 'PinMNIST']

Unnamed: 0,Dataset,Feature,Mode,D/NPins,Partial Percent,MSE,Sigma,NPP,GP
0,PinMNIST,AE,mesh,10,0.75,86.392427,0.5,0.461449,0.462395
1,PinMNIST,AE,mesh,10,0.5,129.252859,0.5,0.691621,0.679808
2,PinMNIST,AE,mesh,10,0.25,91.439378,0.5,0.513744,0.502053
3,PinMNIST,AE,mesh,10,0.0,70.908794,0.5,0.39735,0.411586
4,PinMNIST,AE,mesh,3,0.75,0.21471,2.0,0.221937,0.215284
5,PinMNIST,AE,mesh,3,0.5,0.773431,2.0,0.757827,0.751134
6,PinMNIST,AE,mesh,3,0.25,0.939234,2.0,0.962333,0.968475
7,PinMNIST,AE,mesh,3,0.0,0.730613,2.0,0.750291,0.754146
8,PinMNIST,AE,random,100,0.75,1.073427,0.1,1.124931,0.875204
9,PinMNIST,AE,random,100,0.5,1.045323,0.1,1.077134,0.898306


In [24]:
df[df['Dataset'] == 'PinMNIST']

Unnamed: 0,Dataset,Feature,Mode,D/NPins,Partial Percent,MSE,Sigma,NPP,GP
0,PinMNIST,AE,mesh,10,1.0,79.386685,0.2,23.559507,23.559507
1,PinMNIST,AE,mesh,10,0.75,79.386685,0.2,23.559507,23.559507
2,PinMNIST,AE,mesh,10,0.5,79.386685,0.2,23.559507,23.559507
3,PinMNIST,AE,mesh,10,0.25,79.386685,0.2,23.559507,23.559507
4,PinMNIST,AE,mesh,10,0.0,79.386685,0.2,23.559507,23.559507
5,PinMNIST,AE,mesh,3,1.0,66.294543,2.0,22.799933,22.143597
6,PinMNIST,AE,mesh,3,0.75,66.294543,2.0,22.799933,22.325781
7,PinMNIST,AE,mesh,3,0.5,66.294543,2.0,22.799933,22.471643
8,PinMNIST,AE,mesh,3,0.25,66.294543,2.0,22.799933,22.651231
9,PinMNIST,AE,mesh,3,0.0,66.294543,2.0,22.799933,22.799933


In [12]:
df[df['Dataset'] == 'Synthetic'].reset_index()

Unnamed: 0,index,Dataset,Feature,Mode,D/NPins,Partial Percent,MSE,Sigma,NPP,GP
0,40,Synthetic,AE,mesh,10,1.0,12.596536,2.0,12.555825,12.555825
1,41,Synthetic,AE,mesh,10,0.75,12.596536,2.0,12.555825,12.555825
2,42,Synthetic,AE,mesh,10,0.5,12.596536,2.0,12.555825,12.555825
3,43,Synthetic,AE,mesh,10,0.25,12.596536,2.0,12.555825,12.555825
4,44,Synthetic,AE,mesh,10,0.0,12.596536,2.0,12.555825,12.555825
5,45,Synthetic,AE,mesh,3,1.0,22.53667,0.5,14.735918,14.578363
6,46,Synthetic,AE,mesh,3,0.75,22.53667,0.5,14.735918,14.617389
7,47,Synthetic,AE,mesh,3,0.5,22.53667,0.5,14.735918,14.655981
8,48,Synthetic,AE,mesh,3,0.25,22.53667,0.5,14.735918,14.701337
9,49,Synthetic,AE,mesh,3,0.0,22.53667,0.5,14.735918,14.735918


In [None]:
# Save DataFrame to a CSV file
df.to_csv('output.csv', index=False)

TESTING

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
import numpy as np
import os
import json
from tools.plot_utils import plot_and_save
from tools.data_utils import *
from tools.losses import NPPLoss
from tools.models import Autoencoder
from tools.optimization import EarlyStoppingCallback, evaluate_model
import matplotlib.pyplot as plt
import argparse
import time
from tools.models import *

experiment_id = "1709622898" # 1709521764
experiment_folder = "./history/exp_def"
dataset = "Synthetic"
feature = "DDPM"
mode = "random"
feature_extracted = True if feature == "DDPM" else False
mesh = True if mode == "mesh" else False
d = 3
n_pins = 100
partial_percent = 0.25
r = 3
batch_size = 32

num_kernels_encoder = [32, 16]
num_kernels_decoder = [32]


# Set a random seed for PyTorch
seed = 4  # You can use any integer value as the seed
torch.manual_seed(seed)
# Set a random seed for NumPy (if you're using NumPy operations)
np.random.seed(seed)

if dataset == "Synthetic":
    input_channel = 3 
elif dataset == "PinMNIST":
    input_channel = 1
elif dataset == "Building":
    input_channel = 4

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if feature_extracted:
    folder = f"{dataset}_ddpm"
else:
    folder = f"{dataset}"

if dataset == "PinMNIST":
    if mesh:
        data_folder = f"./data/{folder}/mesh_{d}step_{28}by{28}pixels_{r}radius_{seed}seed"
        config['n_pins'] = (28//d + 1)**2
    else:
        data_folder = f"./data/{folder}/random_fixedTrue_{n_pins}pins_{28}by{28}pixels_{r}radius_{seed}seed"
elif dataset == "Synthetic":
    folder += "/28by28pixels_1000images_123456seed"
    if mesh:
        data_folder = f"./data/{folder}/mesh_{d}step_pins"
        config['n_pins'] = (28//d + 1)**2
    else:
        data_folder = f"./data/{folder}/random_{n_pins}pins"
else: # dataset == "Building"
    raise Exception("Building option is still not implemented.")

transform = transforms.Compose([
    ToTensor(),         # Convert to tensor (as you were doing)
    Resize()  # Resize to 100x100
])

transformed_dataset = PinDataset(csv_file=f"{data_folder}/pins.csv",
                                      root_dir=f"./data/{folder}/images/",
                                      transform=transform)

dataset_size = len(transformed_dataset)
train_size = int(0.7 * dataset_size)
val_size = int(0.10 * dataset_size)
test_size = dataset_size - train_size - val_size

# Split the dataset into train, validation, and test sets
train_dataset, val_dataset, test_dataset = random_split(
    transformed_dataset, [train_size, val_size, test_size]
)

# Create your DataLoader with the custom_collate_fn
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)        

In [None]:
# Testing
dataloader = train_loader

if not os.path.exists(f'{experiment_folder}/{experiment_id}'):
    raise Exception(f"Could not find experiment with id: {experiment_id}")
else:
    autoencoder_MSE = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    autoencoder_NPP = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    # Load models
    try:
        autoencoder_MSE.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_MSE.pth', map_location=device))
        autoencoder_MSE.eval()
        autoencoder_NPP.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_NPP.pth', map_location=device))
        autoencoder_NPP.eval()
    except:
        raise Exception("The model you provided does not correspond with the selected architecture. Please revise and try again.")

        
hidden_samples = 0.5
for model in [autoencoder_MSE, autoencoder_NPP]:
    total_loss = 0.0
    criterion = NPPLoss(identity=True).to(device)

    with torch.no_grad():
        for batch in dataloader:
            x_test = batch['image'][:, :input_channel, :, :].to(device)
            p_test = [tensor.to(device) for tensor in batch['pins']]
            y_test = [tensor.to(device) for tensor in batch['outputs']]
            test_outputs = model(x_test.float())

            for i in range(len(x_test)):      
                num_samples = int(len(p_test[i]) * hidden_samples)
                p_sample = p_test[i][num_samples:]
                y_sample = y_test[i][num_samples:]
                mu_sample = (test_outputs[i].squeeze())[p_sample[:, 0], p_sample[:, 1]]
                if i == 0:
                    print('GT: ', y_sample)
                    print('OUTPUT: ', mu_sample)
            loss = criterion(y_test, test_outputs, p_test)

            total_loss += loss.item()

    total_loss /= len(dataloader)
    print('TEST LOSS: ', total_loss, '\n\n\n\n')

In [None]:
# Testing
experiment_id = 1709000615
dataloader = test_loader

if not os.path.exists(f'{experiment_folder}/{experiment_id}'):
    raise Exception(f"Could not find experiment with id: {experiment_id}")
else:
    autoencoder_MSE = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    autoencoder_NPP = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    # Load models
    try:
        autoencoder_MSE.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_MSE.pth', map_location=device))
        autoencoder_MSE.eval()
        autoencoder_NPP.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_NPP.pth', map_location=device))
        autoencoder_NPP.eval()
    except:
        raise Exception("The model you provided does not correspond with the selected architecture. Please revise and try again.")

        
hidden_samples = 0.5
for model in [autoencoder_MSE, autoencoder_NPP]:
    total_loss = 0.0
    criterion = NPPLoss(identity=True).to(device)

    with torch.no_grad():
        for batch in dataloader:
            x_test = batch['image'][:, :input_channel, :, :].to(device)
            p_test = [tensor.to(device) for tensor in batch['pins']]
            y_test = [tensor.to(device) for tensor in batch['outputs']]
            test_outputs = model(x_test.float())

            for i in range(len(x_test)):      
                num_samples = int(len(p_test[i]) * hidden_samples)
                p_sample = p_test[i][num_samples:]
                y_sample = y_test[i][num_samples:]
                mu_sample = (test_outputs[i].squeeze())[p_sample[:, 0], p_sample[:, 1]]
                if i == 0:
                    print('GT: ', y_sample)
                    print('OUTPUT: ', mu_sample)
            loss = criterion(y_test, test_outputs, p_test)

            total_loss += loss.item()

    total_loss /= len(dataloader)
    print('TEST LOSS: ', total_loss, '\n\n\n\n')

In [None]:
# Testing
if not os.path.exists(f'{experiment_folder}/{experiment_id}'):
    raise Exception(f"Could not find experiment with id: {experiment_id}")
else:
    autoencoder_MSE = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    autoencoder_NPP = Autoencoder(num_kernels_encoder, num_kernels_decoder, input_channel=input_channel).to(device)
    # Load models
    try:
        autoencoder_MSE.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_MSE.pth'))
        autoencoder_NPP.load_state_dict(torch.load(f'{experiment_folder}/{experiment_id}/best_model_NPP.pth'))
    except:
        raise Exception("The model you provided does not correspond with the selected architecture. Please revise and try again.")
    # NPP
    for percent in [0.25, 0.50, 0.75, 1.00]:
        print(f'Percent testing {percent}')
        best_MSE_test_loss = evaluate_model(autoencoder_MSE, test_loader, input_channel, device, partial_label_GP=False, partial_percent=percent)
        best_NPP_test_loss = evaluate_model(autoencoder_NPP, test_loader, input_channel, device, partial_label_GP=False, partial_percent=percent)
        try:
            GP_best_NPP_test_loss = evaluate_model(autoencoder_NPP, test_loader, input_channel, device, partial_label_GP=True, partial_percent=percent)
            # Write output into file
            filename = f"test_{folder.split('/')[0]}_{percent}.txt"
            with open(f"{experiment_folder}/{experiment_id}/{filename}", "w") as f:
                f.write(f"MSE {best_MSE_test_loss}; NPP {best_NPP_test_loss}, {GP_best_NPP_test_loss} (GP)")
        except Exception as Error:
            print(Error)