Generate data

In [7]:
import random

from common import constants
from schematic_generator import generator

random.seed(0)

configs = [
    # Simple shapes
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": [lambda: random.randint(1, (constants.region_size[0] // 2) - 1)] * 5,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * len(constants.simple_block_types),
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    },
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": [lambda: random.randint(1, constants.region_size[0] - 1)] * 5,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * len(constants.simple_block_types),
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    },
    # Filled
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": [lambda: random.randint(3, (constants.region_size[0] // 2) - 1)] * 3,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * (len(constants.simple_block_types) // 3),
        "structure_fill_block_types": [["minecraft:air"], lambda: random.sample(constants.simple_block_types, 1), lambda: random.sample(constants.simple_block_types, 3)],
        "thickness": [lambda: random.randint(1, 3)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    },
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": [lambda: random.randint(7, constants.region_size[0] - 1)] * 3,
        "structure_block_types": [[block] for block in constants.simple_block_types] + [lambda: random.sample(constants.simple_block_types, 3)] * (len(constants.simple_block_types) // 3),
        "structure_fill_block_types": [["minecraft:air"], lambda: random.sample(constants.simple_block_types, 1), lambda: random.sample(constants.simple_block_types, 3)],
        "thickness": [lambda: random.randint(1, 3)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    }
]

num_blocks = 10
sample_blocks = random.sample(constants.simple_block_types, num_blocks)

simple_cubes = [
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": range(3, min(constants.region_size) - 1),
        "structure_block_types": [[block] for block in sample_blocks],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))] * 5,
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    }
]

mixed_cubes = [
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": range(3, min(constants.region_size) - 1),
        "structure_block_types": [random.sample(sample_blocks, random.randint(2, 4)) for _ in range(len(sample_blocks) // 5)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))] * 5,
        "random_seed": [lambda: random.randint(0, 2**32 - 1)] * 5,
        "region_size": [constants.region_size]
    }
]

simple_spheres = [
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": range(1, (min(constants.region_size) // 2) - 1),
        "structure_block_types": [[block] for block in sample_blocks],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))] * 5,
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.region_size]
    }
]

mixed_spheres = [
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": range(1, (min(constants.region_size) // 2) - 1),
        "structure_block_types": [random.sample(sample_blocks, random.randint(2, 4)) for _ in range(len(sample_blocks) // 5)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))] * 5,
        "random_seed": [lambda: random.randint(0, 2**32 - 1)] * 5,
        "region_size": [constants.region_size]
    }
]

generator.generate_samples_from_configurations(simple_cubes, 'simple_cubes')
generator.generate_samples_from_configurations(mixed_cubes, 'mixed_cubes')
generator.generate_samples_from_configurations(simple_spheres, 'simple_spheres')
generator.generate_samples_from_configurations(mixed_spheres, 'mixed_spheres')

Generating samples for simple_cubes: 100%|██████████| 100/100 [00:00<00:00, 10002.87it/s]
Generating samples for mixed_cubes: 100%|██████████| 100/100 [00:00<00:00, 15381.22it/s]
Generating samples for simple_spheres: 100%|██████████| 50/50 [00:00<00:00, 16648.03it/s]
Generating samples for mixed_spheres: 100%|██████████| 50/50 [00:00<00:00, 20009.08it/s]


Prepare data

In [1]:
from data_preparer import load_schematics

schematics_dir = 'data/schematics'
hdf5_path = 'data/data.h5'
load_schematics(schematics_dir, hdf5_path, (0.7, 0.15, 0.15))

Loading schematics from data/schematics into data/data.h5
Processing generator type: mixed_cubes
Split data into 73 training samples, 18 validation samples, and 9 test samples.


Generating set: train: 100%|██████████| 73/73 [00:00<00:00, 523.18it/s]
Generating set: validation: 100%|██████████| 18/18 [00:00<00:00, 600.04it/s]
Generating set: test: 100%|██████████| 9/9 [00:00<00:00, 562.39it/s]


Processing generator type: mixed_spheres
Split data into 42 training samples, 3 validation samples, and 5 test samples.


Generating set: train: 100%|██████████| 42/42 [00:00<00:00, 591.39it/s]
Generating set: validation: 100%|██████████| 3/3 [00:00<00:00, 599.87it/s]
Generating set: test: 100%|██████████| 5/5 [00:00<00:00, 625.16it/s]


Processing generator type: simple_cubes
Split data into 64 training samples, 16 validation samples, and 20 test samples.


Generating set: train: 100%|██████████| 64/64 [00:00<00:00, 592.47it/s]
Generating set: validation: 100%|██████████| 16/16 [00:00<00:00, 603.37it/s]
Generating set: test: 100%|██████████| 20/20 [00:00<00:00, 624.99it/s]


Processing generator type: simple_spheres
Split data into 30 training samples, 7 validation samples, and 13 test samples.


Generating set: train: 100%|██████████| 30/30 [00:00<00:00, 631.52it/s]
Generating set: validation: 100%|██████████| 7/7 [00:00<00:00, 635.25it/s]
Generating set: test: 100%|██████████| 13/13 [00:00<00:00, 619.05it/s]

Finished updating HDF5 file.





Check Data

In [9]:
import os

import h5py

from common.file_paths import BASE_DIR
import random

with h5py.File(os.path.join(BASE_DIR, 'data.h5'), 'r') as hf:
    # Iterate over dataset splits (train, val, test)
    print(f"Total splits: {len(hf)}")
    for split in hf:
        split_group = hf[split]
        print(f"  Split: {split}")
        print(f"  Total generator types: {len(split_group)}")

        # Iterate over generator types
        for generator_type in split_group:
            generator_group = split_group[generator_type]
            total_samples = len(generator_group.keys())
            print(f"    Generator Type: {generator_type}")
            print(f"    Total samples: {total_samples}")
            print(f"    Random sample:")

            # Get a random sample
            name = random.choice(list(generator_group.keys()))
            prompts = generator_group[name]['prompts']
            structure = generator_group[name]['structure']

            # Print the name, description, and data of the sample
            print(f"      Name: {name}")
            print(f"      Prompts:")
            for prompt in prompts:
                print(f"        {prompt.decode('utf-8')}")
            print(f"      Structure: {structure.shape}")

Total splits: 3
  Split: test
  Total generator types: 4
    Generator Type: mixed_cubes
    Total samples: 9
    Random sample:
      Name: ee96f2938babdfa5aa32ff3ec083ff9416e8ea73b89eb1e9664c01f320845355
      Prompts:
        A perfect solid cube with a side length of 3 blocks. It is composed of raw iron block and crying obsidian. It is floating within an empty void.
        A perfect solid cube made of raw iron block and crying obsidian, each side precisely 3 blocks, floating immaculately in a void.
        An impeccably constructed raw iron block and crying obsidian cube, with 3 blocks along each edge, hovers in an empty void.
        A flawless raw iron block and crying obsidian cube, 3 blocks in size, suspended in a void.
        A meticulously crafted cube, perfect in geometry, made of raw iron block and crying obsidian and measuring 3 blocks per side, set against the backdrop of an empty void.
        Visualize a raw iron block and crying obsidian cube, perfect in its construc