# Generating High Dimensional Data

In [1]:
import sys
import numpy as np
from pathlib import Path
# Add parent directory to sys.path
parent_dir = Path.cwd().parent
sys.path.append(str(parent_dir))
# Import modules
from dataset_gen import average_sphere_volume, generate_and_save_datasets, compute_volume_ratios

dimensions = [2, 3, 4, 5, 8, 10, 12]  # List of dimensions to generate datasets for
output_folder = 'datasets'
amplitude = 1.0  # Fixed amplitude for all Gaussians (set to None for random amplitudes)
sigma_range=(0.5, 1.0)
num_sets = 10  # Number of datasets to generate for each dimension
base_seed = 42  # Base seed for reproducibility

num_centers = [int(np.round((2**d)/average_sphere_volume(sigma_range[0], sigma_range[1], d)) * 2) for d in dimensions]
print("Computed num_centers:", num_centers)
generate_and_save_datasets(dimensions, num_centers, output_folder, num_sets, base_seed, amplitude)

Computed num_centers: [4, 8, 16, 38, 568, 4420, 39884]
Dataset for dimension 2 with 4 centers saved to 'datasets\dataset_1\dataset_dim_2.txt'
Dataset for dimension 3 with 8 centers saved to 'datasets\dataset_1\dataset_dim_3.txt'
Dataset for dimension 4 with 16 centers saved to 'datasets\dataset_1\dataset_dim_4.txt'
Dataset for dimension 5 with 38 centers saved to 'datasets\dataset_1\dataset_dim_5.txt'
Dataset for dimension 8 with 568 centers saved to 'datasets\dataset_1\dataset_dim_8.txt'
Dataset for dimension 10 with 4420 centers saved to 'datasets\dataset_1\dataset_dim_10.txt'
Dataset for dimension 12 with 39884 centers saved to 'datasets\dataset_1\dataset_dim_12.txt'
Dataset for dimension 2 with 4 centers saved to 'datasets\dataset_2\dataset_dim_2.txt'
Dataset for dimension 3 with 8 centers saved to 'datasets\dataset_2\dataset_dim_3.txt'
Dataset for dimension 4 with 16 centers saved to 'datasets\dataset_2\dataset_dim_4.txt'
Dataset for dimension 5 with 38 centers saved to 'datasets\

Here is how our number of gaussian centers scales with dimension to keep a constant density in our function.

# Training and Test Point Generation 

Can be modified to turn on non-conservative vector field generation.

In [1]:
import os
import sys
import numpy as np
import torch
from pathlib import Path
# Add parent directory to sys.path
parent_dir = Path.cwd().parent
sys.path.append(str(parent_dir))
# Import modules
from dataset_gen import vector_function, random_input_points, generate_random_transform_matrix

# Folder containing datasets
dataset_folder = "datasets"

num_samples = 10000 #Number of test and train data points.

# Iterate through all subfolders in the dataset folder
for subfolder in os.listdir(dataset_folder):
    subfolder_path = os.path.join(dataset_folder, subfolder)
    
    if os.path.isdir(subfolder_path):
        print(f"Processing folder: {subfolder}")

        # Create train and test subfolders if they don't exist
        train_folder = os.path.join(subfolder_path, "train")
        test_folder = os.path.join(subfolder_path, "test")
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(test_folder, exist_ok=True)
        
        for filename in os.listdir(subfolder_path):
            if filename.endswith(".txt"):
                filepath = os.path.join(subfolder_path, filename)
                loaded_data = np.loadtxt(filepath)
        
                dimension = loaded_data.shape[1] - 2
                print(f"Processing {filename} with dimension {dimension}")
                
                # Separate the data
                centers = torch.tensor(loaded_data[:, :dimension], dtype=torch.float32)
                amplitudes = torch.tensor(loaded_data[:, -2], dtype=torch.float32)
                sigmas = torch.tensor(loaded_data[:, -1], dtype=torch.float32)

                # Generate random transformation matrix using dimension as seed
                # Only necessary for testing a non-conservative vector field.
                transform_matrix = generate_random_transform_matrix(dimension, seed=dimension)
                print("Generated transformation matrix:")
                print(transform_matrix)
        
                # Generate points and values
                train_points = random_input_points(num_samples, dimension, seed=dimension)
                test_points = random_input_points(num_samples, dimension, seed=dimension+30)
                train_values = vector_function(train_points, centers, amplitudes, sigmas, transform_matrix, center_batch_size=100)
                test_values = vector_function(test_points, centers, amplitudes, sigmas, transform_matrix, center_batch_size=100)
                
                # Save files
                base_name = os.path.splitext(filename)[0]
                
                # Save train data
                train_data = np.column_stack((train_points.numpy(), train_values.numpy()))
                train_filename = os.path.join(train_folder, f"{base_name}_train.txt")
                np.savetxt(train_filename, train_data)
                
                # Save test data
                test_data = np.column_stack((test_points.numpy(), test_values.numpy()))
                test_filename = os.path.join(test_folder, f"{base_name}_test.txt")
                np.savetxt(test_filename, test_data)
                
                print(f"Saved train/test data for {filename}")
print ("Done!")

Processing folder: dataset_1
Processing dataset_dim_10.txt with dimension 10
Generated transformation matrix:
tensor([[ 0.0000,  0.0000,  0.0000, -0.8582,  0.0000,  1.4079, -0.5089, -0.1681,
          0.1718,  1.4548],
        [ 0.0000,  0.0000, -0.0098,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.9200,  0.8081,  0.6608,  0.0000, -0.1034,  0.4599,
          1.1965,  0.0000],
        [ 0.0000, -0.0638,  0.7609,  0.0000,  0.0000,  0.0000, -1.0550, -0.4098,
          0.0050,  0.0000],
        [ 1.3701,  0.0000,  0.0000,  0.4725,  1.3856, -0.6901,  0.0000,  1.4376,
          0.0000,  0.0000],
        [-1.0727,  0.0000,  0.0000, -0.5115,  0.0000,  0.0000,  0.0000,  0.0849,
         -0.5361,  0.0000],
        [-1.4308,  0.0000,  0.0000,  0.7986,  0.0000,  0.0000, -0.5071, -0.7678,
          0.0000,  0.0000],
        [ 1.2776, -0.8909,  0.5847, -0.5001,  0.0000,  0.6196, -0.8997, -0.5835,
          0.0000,  0.0000],
        [ 0.0000, 