In [1]:
import sys
sys.path.append("../src/")


import numpy as np
import torch
import time

from probabilistic_flow_boosting.models.nodeflow import NodeFlow, NodeFlowDataModule
from probabilistic_flow_boosting.extras.datasets.uci_dataset import UCIDataSet

In [15]:
uci_datasets = [
    'concrete',
    'energy',
    'kin8nm',
    'naval_propulsion_plant',
    'power_plant',
    'protein_tertiary_structure',
    'wine_quality_red',
    'yacht'
]

def generate_samples_nodeflow(model, datamodule, observation, num_samples: int = 1000):
    samples = model._sample(observation, num_samples)

    samples_size = samples.shape
    samples: np.ndarray = samples.detach().cpu().numpy()
    samples: np.ndarray = samples.reshape((samples_size[0] * samples_size[1], samples_size[2]))
    samples: np.ndarray = datamodule.target_scaler.inverse_transform(samples)
    samples: np.ndarray = samples.reshape((samples_size[0], samples_size[1], samples_size[2]))
    samples: np.ndarray = samples.squeeze()
    return samples

In [16]:
for DATASET in uci_datasets:
    IDX = 0
    
    ## Load data
    x_train = UCIDataSet(
        filepath_data=f"../data/01_raw/UCI/{DATASET}/data.txt",
        filepath_index_columns=f"../data/01_raw/UCI/{DATASET}/index_features.txt",
        filepath_index_rows=f"../data/01_raw/UCI/{DATASET}/index_train_{IDX}.txt"
    ).load()
    y_train = UCIDataSet(
        filepath_data=f"../data/01_raw/UCI/{DATASET}/data.txt",
        filepath_index_columns=f"../data/01_raw/UCI/{DATASET}/index_target.txt",
        filepath_index_rows=f"../data/01_raw/UCI/{DATASET}/index_train_{IDX}.txt"
    ).load()
    x_test = UCIDataSet(
        filepath_data=f"../data/01_raw/UCI/{DATASET}/data.txt",
        filepath_index_columns=f"../data/01_raw/UCI/{DATASET}/index_features.txt",
        filepath_index_rows=f"../data/01_raw/UCI/{DATASET}/index_test_{IDX}.txt"
    ).load()
    y_test = UCIDataSet(
        filepath_data=f"../data/01_raw/UCI/{DATASET}/data.txt",
        filepath_index_columns=f"../data/01_raw/UCI/{DATASET}/index_target.txt",
        filepath_index_rows=f"../data/01_raw/UCI/{DATASET}/index_test_{IDX}.txt"
    ).load()

    x_train_tensor = torch.Tensor(x_train.values)
    x_test_tensor = torch.Tensor(x_test.values)
    y_train_tensor = torch.Tensor(y_train.values)
    y_test_tensor = torch.Tensor(y_test.values)

    datamodule = NodeFlowDataModule(x_train, y_train, x_test, y_test, split_size=0.8, batch_size=2048)
    datamodule.target_scaler.fit(datamodule.y_tr)

    # model_filepath = f"../data/nodeflow/UCI/{DATASET}/model_{IDX}"
    

    ## Do things
    total_params = []
    time_eval = []
    print(DATASET)
    for i in range(20):
        if DATASET == 'protein_tertiary_structure' and i == 5:
            break

        model_filepath = f"../data/06_models/UCI/{DATASET}/model_uci_{i}"
        nodeflow = NodeFlow.load(model_filepath, map_location="cpu")
        nodeflow.eval()
        observation = x_test_tensor[i, :].reshape(1, -1)

        total_params.append(sum(
        	param.numel() for param in nodeflow.parameters()
        ))
        
        time_start = time.time()
        nodeflow_samples = generate_samples_nodeflow(nodeflow, datamodule, observation, num_samples=1000)
        time_eval.append(time.time() - time_start)

        del nodeflow
    print(f"Total params: {np.mean(total_params).astype(int)}+-{np.std(total_params).astype(int)}")
    print(f"Time eval: {np.mean(time_eval).round(3)}+-{np.std(time_eval).round(3)}")

concrete
Total params: 4959763+-10832838
Time eval: 0.601+-0.484
energy
Total params: 923144+-1696191
Time eval: 0.52+-0.207
kin8nm
Total params: 10486159+-7801478
Time eval: 1.17+-0.474
naval_propulsion_plant
Total params: 3167099+-4415772
Time eval: 0.636+-0.264
power_plant
Total params: 894201+-772524
Time eval: 0.645+-0.226
protein_tertiary_structure
Total params: 531659+-623584
Time eval: 0.518+-0.13
wine_quality_red
Total params: 110592+-66932
Time eval: 0.437+-0.127
yacht
Total params: 76663+-56485
Time eval: 0.268+-0.074
