# Generating Low Dimensional Data

In [1]:
import sys
import numpy as np
from pathlib import Path
# Add parent directory to sys.path
parent_dir = Path.cwd().parent.parent
sys.path.append(str(parent_dir))
# Import modules
from dataset_gen import average_sphere_volume, generate_and_save_datasets, compute_volume_ratios

dimensions = [2, 3, 5, 8, 10, 12, 15, 18, 20]  # List of dimensions to generate datasets for
output_folder = 'datasets'
amplitude = 1.0  # Fixed amplitude for all Gaussians (set to None for random amplitudes)
sigma_range=(0.5, 1.0)
num_sets = 10  # Number of datasets to generate for each dimension
base_seed = 42  # Base seed for reproducibility

num_centers = [int(2) for d in dimensions]
print("Computed num_centers:", num_centers)
generate_and_save_datasets(dimensions, num_centers, output_folder, num_sets, base_seed, amplitude)

Computed num_centers: [2, 2, 2, 2, 2, 2, 2, 2, 2]
Dataset for dimension 2 with 2 centers saved to 'datasets\dataset_1\dataset_dim_2.txt'
Dataset for dimension 3 with 2 centers saved to 'datasets\dataset_1\dataset_dim_3.txt'
Dataset for dimension 5 with 2 centers saved to 'datasets\dataset_1\dataset_dim_5.txt'
Dataset for dimension 8 with 2 centers saved to 'datasets\dataset_1\dataset_dim_8.txt'
Dataset for dimension 10 with 2 centers saved to 'datasets\dataset_1\dataset_dim_10.txt'
Dataset for dimension 12 with 2 centers saved to 'datasets\dataset_1\dataset_dim_12.txt'
Dataset for dimension 15 with 2 centers saved to 'datasets\dataset_1\dataset_dim_15.txt'
Dataset for dimension 18 with 2 centers saved to 'datasets\dataset_1\dataset_dim_18.txt'
Dataset for dimension 20 with 2 centers saved to 'datasets\dataset_1\dataset_dim_20.txt'
Dataset for dimension 2 with 2 centers saved to 'datasets\dataset_2\dataset_dim_2.txt'
Dataset for dimension 3 with 2 centers saved to 'datasets\dataset_2\da

# Training and Test Point Generation 

Can be modified to turn on non-conservative vector field generation.

In [2]:
import os
import sys
import numpy as np
import torch
from pathlib import Path
# Add parent directory to sys.path
parent_dir = Path.cwd().parent
sys.path.append(str(parent_dir))
# Import modules
from dataset_gen import vector_function, random_input_points, generate_random_transform_matrix

# Folder containing datasets
dataset_folder = "datasets"
num_samples = 10000 #Number of test and train data points.

def print_vector_stats(points, values, label):
    """Helper function to calculate and print vector statistics"""
    avg_vector = np.mean(values.numpy(), axis=0)
    vector_magnitude = np.linalg.norm(avg_vector)
    print(f"\n{label} Vector Statistics:")
    print(f"Average vector: {avg_vector}")
    print(f"Vector magnitude: {vector_magnitude:.4f}")
    print(f"Min value: {np.min(values.numpy()):.4f}")
    print(f"Max value: {np.max(values.numpy()):.4f}")
    print(f"Mean absolute value: {np.mean(np.abs(values.numpy())):.4f}")
    return avg_vector

# Iterate through all subfolders in the dataset folder
for subfolder in os.listdir(dataset_folder):
    subfolder_path = os.path.join(dataset_folder, subfolder)
    
    if os.path.isdir(subfolder_path):
        print(f"Processing folder: {subfolder}")

        # Create train and test subfolders if they don't exist
        train_folder = os.path.join(subfolder_path, "train")
        test_folder = os.path.join(subfolder_path, "test")
        os.makedirs(train_folder, exist_ok=True)
        os.makedirs(test_folder, exist_ok=True)
        
        for filename in os.listdir(subfolder_path):
            if filename.endswith(".txt"):
                filepath = os.path.join(subfolder_path, filename)
                loaded_data = np.loadtxt(filepath)
        
                dimension = loaded_data.shape[1] - 2
                print(f"Processing {filename} with dimension {dimension}")
                
                # Separate the data
                centers = torch.tensor(loaded_data[:, :dimension], dtype=torch.float32)
                amplitudes = torch.tensor(loaded_data[:, -2], dtype=torch.float32)
                sigmas = torch.tensor(loaded_data[:, -1], dtype=torch.float32)

                # Generate random transformation matrix using dimension as seed
                # Only necessary for testing a non-conservative vector field.
                #transform_matrix = generate_random_transform_matrix(dimension, seed=dimension)
                #print("Generated transformation matrix:")
                #print(transform_matrix)
        
                # Generate points and values
                train_points = random_input_points(num_samples, dimension, seed=dimension)
                test_points = random_input_points(num_samples, dimension, seed=dimension+30)
                train_values = vector_function(train_points, centers, amplitudes, sigmas, center_batch_size=100)# transform_matrix, center_batch_size=100)
                test_values = vector_function(test_points, centers, amplitudes, sigmas, center_batch_size=100)# transform_matrix, center_batch_size=100)

                # Print statistics - Uncomment to see statistics
                #train_avg = print_vector_stats(train_points, train_values, "Training")
                #test_avg = print_vector_stats(test_points, test_values, "Test")
                
                # Save files
                base_name = os.path.splitext(filename)[0]
                
                # Save train data
                train_data = np.column_stack((train_points.numpy(), train_values.numpy()))
                train_filename = os.path.join(train_folder, f"{base_name}_train.txt")
                np.savetxt(train_filename, train_data)
                
                # Save test data
                test_data = np.column_stack((test_points.numpy(), test_values.numpy()))
                test_filename = os.path.join(test_folder, f"{base_name}_test.txt")
                np.savetxt(test_filename, test_data)
                
                print(f"Saved train/test data for {filename}")
print ("Done!")

Processing folder: dataset_1
Processing dataset_dim_10.txt with dimension 10
Saved train/test data for dataset_dim_10.txt
Processing dataset_dim_12.txt with dimension 12
Saved train/test data for dataset_dim_12.txt
Processing dataset_dim_15.txt with dimension 15
Saved train/test data for dataset_dim_15.txt
Processing dataset_dim_18.txt with dimension 18
Saved train/test data for dataset_dim_18.txt
Processing dataset_dim_2.txt with dimension 2
Saved train/test data for dataset_dim_2.txt
Processing dataset_dim_20.txt with dimension 20
Saved train/test data for dataset_dim_20.txt
Processing dataset_dim_3.txt with dimension 3
Saved train/test data for dataset_dim_3.txt
Processing dataset_dim_5.txt with dimension 5
Saved train/test data for dataset_dim_5.txt
Processing dataset_dim_8.txt with dimension 8
Saved train/test data for dataset_dim_8.txt
Processing folder: dataset_10
Processing dataset_dim_10.txt with dimension 10
Saved train/test data for dataset_dim_10.txt
Processing dataset_dim_