In [1]:
# Copyright 2024 authors of the paper "Generative Topological Networks".
# Licensed under the Apache License, Version 2.0

import os
import math
import numpy as np
import torch

# Set device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Helpers (replacing make_dirs and save_dataset_as_torch) ===

def make_dirs(paths):
    for path in paths:
        os.makedirs(path, exist_ok=True)

def save_dataset_as_torch(train, val, test, dir_dataset, scale_01=True):
    if scale_01:
        # Min-max scale to [0, 1] for each set independently
        def scale(data):
            min_val = data.min()
            max_val = data.max()
            return (data - min_val) / (max_val - min_val + 1e-8)

        train = scale(train)
        val = scale(val)
        test = scale(test)

    torch.save(train, os.path.join(dir_dataset, 'train.pt'))
    torch.save(val, os.path.join(dir_dataset, 'val.pt'))
    torch.save(test, os.path.join(dir_dataset, 'test.pt'))
    print(f"Saved datasets to: {dir_dataset}")

# === Main logic ===

def make_swiss_roll_dataset(dir_dataset, n_samples):
    make_dirs([dir_dataset])

    # train set
    a = np.random.uniform(1.5 * math.pi, 4.5 * math.pi, size=n_samples)
    a_train = torch.tensor(sorted(a), device=DEVICE).unsqueeze(1)

    # val set
    a = np.random.uniform(1.5 * math.pi, 4.5 * math.pi, size=n_samples // 5)
    a_val = torch.tensor(sorted(a)).unsqueeze(1)

    # test set
    a = np.random.uniform(1.5 * math.pi, 4.5 * math.pi, size=n_samples // 5)
    a_test = torch.tensor(sorted(a)).unsqueeze(1)

    save_dataset_as_torch(a_train, a_val, a_test, dir_dataset, scale_01=True)

# === Run in Kaggle Notebook ===

if __name__ == '__main__':
    print("Preparing the swiss-roll data...")
    make_swiss_roll_dataset('/kaggle/working/swiss_roll_data', n_samples=100000)
    print("Done.")


Preparing the swiss-roll data...
Saved datasets to: /kaggle/working/swiss_roll_data
Done.
