Generate data

In [11]:
import random

from common import constants
from schematic_generator import generator

random.seed(0)

configs = [
    # Simple shapes
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": [lambda: random.randint(1, (constants.region_size[0] // 2) - 1)] * 5,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * len(constants.simple_block_types),
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    },
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": [lambda: random.randint(1, constants.region_size[0] - 1)] * 5,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * len(constants.simple_block_types),
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    },
    # Filled
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": [lambda: random.randint(3, (constants.region_size[0] // 2) - 1)] * 3,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * (len(constants.simple_block_types) // 3),
        "structure_fill_block_types": [["minecraft:air"], lambda: random.sample(constants.simple_block_types, 1), lambda: random.sample(constants.simple_block_types, 3)],
        "thickness": [lambda: random.randint(1, 3)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    },
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": [lambda: random.randint(7, constants.region_size[0] - 1)] * 3,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * (len(constants.simple_block_types) // 3),
        "structure_fill_block_types": [["minecraft:air"], lambda: random.sample(constants.simple_block_types, 1), lambda: random.sample(constants.simple_block_types, 3)],
        "thickness": [lambda: random.randint(1, 3)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    }
]

num_blocks = 5
sample_blocks = random.sample(constants.simple_block_types, num_blocks)

simple_cubes = [
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": range(3, 8),
        "structure_block_types": [[block] for block in sample_blocks],
        "background_block_types": [["minecraft:air"]],
        "position_offsets": [64],
        "random_seed": [0],
        "region_size": [constants.region_size]
    }
]

mixed_cubes = [
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": range(3, min(constants.region_size) - 1),
        "structure_block_types": [random.sample(sample_blocks, random.randint(2, 4)) for _ in range(len(sample_blocks) // 5)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))] * 5,
        "random_seed": [lambda: random.randint(0, 2**32 - 1)] * 10,
        "region_size": [constants.region_size]
    }
]

simple_spheres = [
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": range(1, (min(constants.region_size) // 2) - 1),
        "structure_block_types": [[block] for block in sample_blocks],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))] * 10,
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    }
]

mixed_spheres = [
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": range(1, (min(constants.region_size) // 2) - 1),
        "structure_block_types": [random.sample(sample_blocks, random.randint(2, 4)) for _ in range(len(sample_blocks) // 5)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))] * 5,
        "random_seed": [lambda: random.randint(0, 2**32 - 1)] * 10,
        "region_size": [constants.region_size]
    }
]

generator.generate_samples_from_configurations(simple_cubes, 'simple_cubes')
# generator.generate_samples_from_configurations(mixed_cubes, 'mixed_cubes')
# generator.generate_samples_from_configurations(simple_spheres, 'simple_spheres')
# generator.generate_samples_from_configurations(mixed_spheres, 'mixed_spheres')

Generating samples for simple_cubes: 100%|██████████| 227/227 [00:00<00:00, 13756.98it/s]


Prepare data

In [16]:
from data_preparer import load_schematics

schematics_dir = 'data/schematics'
hdf5_path = 'data/data.h5'
load_schematics(schematics_dir, hdf5_path, (0.7, 0.15, 0.15))

Loading schematics from data/schematics into data/data.h5
Processing generator type: simple_cubes_3
Split data into 228 training samples, 40 validation samples, and 52 test samples.


Generating set: train:   0%|          | 0/228 [00:00<?, ?it/s]

Generating set: train: 100%|██████████| 228/228 [00:00<00:00, 235.70it/s]
Generating set: validation: 100%|██████████| 40/40 [00:00<00:00, 243.85it/s]
Generating set: test: 100%|██████████| 52/52 [00:00<00:00, 243.42it/s]


Processing generator type: simple_cubes_4
Split data into 242 training samples, 38 validation samples, and 40 test samples.


Generating set: train: 100%|██████████| 242/242 [00:00<00:00, 243.50it/s]
Generating set: validation: 100%|██████████| 38/38 [00:00<00:00, 212.25it/s]
Generating set: test: 100%|██████████| 40/40 [00:00<00:00, 245.17it/s]


Processing generator type: simple_cubes_5
Split data into 94 training samples, 21 validation samples, and 20 test samples.


Generating set: train: 100%|██████████| 94/94 [00:00<00:00, 235.10it/s]
Generating set: validation: 100%|██████████| 21/21 [00:00<00:00, 239.95it/s]
Generating set: test: 100%|██████████| 20/20 [00:00<00:00, 239.46it/s]


Processing generator type: simple_cubes_6
Split data into 24 training samples, 8 validation samples, and 8 test samples.


Generating set: train: 100%|██████████| 24/24 [00:00<00:00, 236.41it/s]
Generating set: validation: 100%|██████████| 8/8 [00:00<00:00, 225.22it/s]
Generating set: test: 100%|██████████| 8/8 [00:00<00:00, 197.54it/s]


Processing generator type: simple_cubes_7
Split data into 32 training samples, 2 validation samples, and 6 test samples.


Generating set: train: 100%|██████████| 32/32 [00:00<00:00, 197.49it/s]
Generating set: validation: 100%|██████████| 2/2 [00:00<00:00, 222.19it/s]
Generating set: test: 100%|██████████| 6/6 [00:00<00:00, 218.18it/s]

Finished updating HDF5 file.





Check Data

In [17]:
import os

import h5py

from common.file_paths import BASE_DIR
import random

with h5py.File(os.path.join(BASE_DIR, 'data.h5'), 'r') as hf:
    # Iterate over dataset splits (train, val, test)
    print(f"Total splits: {len(hf)}")
    for split in hf:
        split_group = hf[split]
        print(f"  Split: {split}")
        print(f"  Total generator types: {len(split_group)}")

        # Iterate over generator types
        for generator_type in split_group:
            generator_group = split_group[generator_type]
            total_samples = len(generator_group.keys())
            print(f"    Generator Type: {generator_type}")
            print(f"    Total samples: {total_samples}")
            print(f"    Random sample:")

            # Get a random sample
            name = random.choice(list(generator_group.keys()))
            prompts = generator_group[name]['prompts']
            structure = generator_group[name]['structure']

            # Print the name, description, and data of the sample
            print(f"      Name: {name}")
            print(f"      Prompts:")
            for prompt in prompts:
                print(f"        {prompt.decode('utf-8')}")
            print(f"      Structure: {structure.shape}")

Total splits: 3
  Split: test
  Total generator types: 5
    Generator Type: simple_cubes_3
    Total samples: 52
    Random sample:
      Name: f1734f8d9939d106ea41f49873d8f96f5fb4868a9642531514cbf2fd5a7c3124
      Prompts:
        A perfect solid cube with a side length of 3 blocks. It is composed of deepslate iron ore. It is floating within an empty void.
        A perfect solid cube made of deepslate iron ore, each side precisely 3 blocks, floating immaculately in a void.
        An impeccably constructed deepslate iron ore cube, with 3 blocks along each edge, hovers in an empty void.
        A flawless deepslate iron ore cube, 3 blocks in size, suspended in a void.
        A meticulously crafted cube, perfect in geometry, made of deepslate iron ore and measuring 3 blocks per side, set against the backdrop of an empty void.
        Visualize a deepslate iron ore cube, perfect in its construction, 3 blocks to a side, ethereally floating in an expansive void.
      Structure: (8, 8, 