Generate data

In [1]:
import random

random.seed(0)

from common import constants
from schematic_generator import generator

configs = [
    # Simple shapes
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": [lambda: random.randint(1, (constants.REGION_SIZE[0] // 2) - 1)] * 5,
        "structure_block_types": [[block] for block in constants.AIR] + [lambda: random.sample(constants.AIR, 3)] * len(constants.AIR),
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.REGION_SIZE]
    },
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": [lambda: random.randint(1, constants.REGION_SIZE[0] - 1)] * 5,
        "structure_block_types": [[block] for block in constants.AIR] + [lambda: random.sample(constants.AIR, 3)] * len(constants.AIR),
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.REGION_SIZE]
    },
    # Filled
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": [lambda: random.randint(3, (constants.REGION_SIZE[0] // 2) - 1)] * 3,
        "structure_block_types": [[block] for block in constants.AIR] + [lambda: random.sample(constants.AIR, 3)] * (len(constants.AIR) // 3),
        "structure_fill_block_types": [["minecraft:air"], lambda: random.sample(constants.AIR, 1), lambda: random.sample(constants.AIR, 3)],
        "thickness": [lambda: random.randint(1, 3)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.REGION_SIZE]
    },
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": [lambda: random.randint(7, constants.REGION_SIZE[0] - 1)] * 3,
        "structure_block_types": [[block] for block in constants.AIR] + [lambda: random.sample(constants.AIR, 3)] * (len(constants.AIR) // 3),
        "structure_fill_block_types": [["minecraft:air"], lambda: random.sample(constants.AIR, 1), lambda: random.sample(constants.AIR, 3)],
        "thickness": [lambda: random.randint(1, 3)],
        "background_block_types": [["minecraft:air"]],
        "position_offset": [lambda: (random.randint(-100, 100), random.randint(-100, 100), random.randint(-100, 100))],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.REGION_SIZE]
    }
]

simple_cubes = [
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": range(2, min(constants.REGION_SIZE) + 1),
        "structure_block_types": [[block] for block in constants.SIMPLE_BLOCK_TYPES],
        "background_block_types": [[constants.AIR]],
        "position_offsets": [4],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.REGION_SIZE],
        "descriptions": [0]
    }
]

mixed_cubes = [
    {
        "generator_type": ["shape"],
        "shape_type": ["cube"],
        "side_length": [lambda: random.randint(3, min(constants.REGION_SIZE))],
        "structure_block_types": [random.sample(constants.SIMPLE_BLOCK_TYPES, random.randint(2, 3)) for _ in range(len(constants.SIMPLE_BLOCK_TYPES) * 4)],
        "background_block_types": [[constants.AIR]],
        "position_offsets": [4],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)] * 4,
        "region_size": [constants.REGION_SIZE],
        "descriptions": [0]
    }
]

simple_spheres = [
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": range(1, min(constants.REGION_SIZE) // 2),
        "structure_block_types": [[block] for block in constants.SIMPLE_BLOCK_TYPES],
        "background_block_types": [[constants.AIR]],
        "position_offsets": [4],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)],
        "region_size": [constants.REGION_SIZE],
        "descriptions": [0]
    }
]

mixed_spheres = [
    {
        "generator_type": ["shape"],
        "shape_type": ["sphere"],
        "radius": [lambda: random.randint(1, min(constants.REGION_SIZE) // 2)],
        "structure_block_types": [random.sample(constants.SIMPLE_BLOCK_TYPES, random.randint(2, 3)) for _ in range(len(constants.SIMPLE_BLOCK_TYPES) * 4)],
        "background_block_types": [[constants.AIR]],
        "position_offsets": [4],
        "random_seed": [lambda: random.randint(0, 2**32 - 1)] * 4,
        "region_size": [constants.REGION_SIZE],
        "descriptions": [0]
    }
]

generator.generate_samples_from_configurations(simple_cubes, 'simple_cubes')
generator.generate_samples_from_configurations(mixed_cubes, 'mixed_cubes')
generator.generate_samples_from_configurations(
    simple_spheres, 'simple_spheres')
generator.generate_samples_from_configurations(mixed_spheres, 'mixed_spheres')

Generating samples for simple_cubes: 100%|██████████| 5950/5950 [00:16<00:00, 361.79it/s]


Removing old sample 00001af6ba3aa8883f504449019ef92a6d6007863d8744e33a5536cf90bd45e8.schem
Removing old sample 000047ccd928f6c3cfe9b47c2bb228cd4b9eacfa2823d61e6c51f891ce366ef3.schem
Removing old sample 0003db7b52b4fa08d4ce0e4a9104e57013b25d529fb4d16e3d0a893ef0138c8d.schem
Removing old sample 0006a8cf45b5e4af361ae446ac21191333e321478c0b7e7e58e5f353b1132dfc.schem
Removing old sample 000825eded7a89d12384a6d82ba119770c19101120cdd18f83132cc65c5635c1.schem
Removing old sample 0008b92290f63c6552f463e949e66845d8492ea3a3b2b924c26a72882dcdb3f3.schem
Removing old sample 000b216fff0c08141477c18eb0f9e047160c8f7af2352cf6cf94d1fce168366f.schem
Removing old sample 000b2a943783e7c25ecc5e80159082cbd97b62bced699a3fbf94727b71ced876.schem
Removing old sample 0011017807047b2334ca07e5098ab20e70e4ef437074cd95e2a76c1dfb578af8.schem
Removing old sample 0012c0b763022c07b46cf39ddd81082426cd0b5f8a7810f593842079d440736b.schem
Removing old sample 00133c45f675d8b233c7c669b5d5b00141f57876964b2576d9fec8f0a5d82e4a.schem

Generating samples for mixed_cubes: 100%|██████████| 13213/13213 [00:38<00:00, 346.51it/s]
Generating samples for simple_spheres: 100%|██████████| 2856/2856 [00:08<00:00, 351.20it/s]
Generating samples for mixed_spheres: 100%|██████████| 11532/11532 [00:32<00:00, 356.22it/s]


In [46]:
total_samples = 8250
print(f'Old total samples: {total_samples}')
total_per_block = total_samples // num_blocks
print(f'Old total per block: {total_per_block}')

print()
limit = 16
print(min(1**3, limit))
print(min(2**3, limit))
print(min(3**3, limit))
print(min(4**3, limit))
print(min(5**3, limit))
print(min(6**3, limit))
print(min(7**3, limit))
print(min(8**3, limit))
total = min(1**3, limit) + min(2**3, limit) + min(3**3, limit) + min(4**3, limit) + min(5**3, limit) + min(6**3, limit) + min(7**3, limit) + min(8**3, limit)
print(f'Total: {total}')
print(f'Total total: {total * len(constants.SIMPLE_BLOCK_TYPES)}')
print(f'Reduction: {((total_per_block - total) / total_per_block) * 100:.2f}%')

print()
print('Ratios:')
print(f'{(min(1**3, limit) / total) * 100:.2f}%')
print(f'{(min(2**3, limit) / total) * 100:.2f}%')
print(f'{(min(3**3, limit) / total) * 100:.2f}%')
print(f'{(min(4**3, limit) / total) * 100:.2f}%')
print(f'{(min(5**3, limit) / total) * 100:.2f}%')
print(f'{(min(6**3, limit) / total) * 100:.2f}%')
print(f'{(min(7**3, limit) / total) * 100:.2f}%')
print(f'{(min(8**3, limit) / total) * 100:.2f}%')

print()
increase = len(constants.SIMPLE_BLOCK_TYPES) // num_blocks
print(f'Increase: {increase}')
total_new = increase * total_samples
print(f'New total samples: {total_new}')
scaling_reduction = 0.1
total_reduced = int(total_new * scaling_reduction)
print(f'Reduced total samples: {total_reduced}')

print()
print(f'New total per block: {total_reduced // len(constants.SIMPLE_BLOCK_TYPES)}')

Old total samples: 8250
Old total per block: 825

1
8
16
16
16
16
16
16
Total: 105
Total total: 24990
Reduction: 87.27%

Ratios:
0.95%
7.62%
15.24%
15.24%
15.24%
15.24%
15.24%
15.24%

Increase: 23
New total samples: 189750
Reduced total samples: 18975

New total per block: 79


In [1]:
import os
from schempy import Schematic
from pathlib import Path
import hashlib

descriptions = [
    'An oak tree.',
    'A tree.',
    'A standard oak tree.',
    'A simple oak tree.',
    'A classic oak tree.',
    'A typical oak tree.',
    'A beneric oak tree.',
    'A basic oak tree.',
    'A small oak tree.',
    'An unmodified oak tree.',
    'An unaltered oak tree.',
    'A naturally generated oak tree.',
]

input_directory = 'data/schematics/trees'
output_directory = 'data/schematics/trees_pos_4'
count = 0
for filename in os.listdir(input_directory):
    if filename.endswith('.schem') and count < 1000:
        schematic = Schematic.from_file(Path(os.path.join(input_directory, filename)))
        file_hash = hashlib.sha256(filename.encode()).hexdigest()
        schematic.name = descriptions[0]
        schematic.author = "mmmfrieddough"
        schematic.metadata = {'SchematicGenerator': {'Prompts': descriptions}}
        schematic.save_to_file(Path(os.path.join(output_directory, f'{file_hash}.schem')), 2)
        count += 1

In [2]:
import random
from amulet import load_level
from amulet.api.selection import SelectionBox, SelectionGroup
from amulet.api.level import World
import amulet

def generate_random_coordinates(world, section_size):
    bounds = world.bounds('minecraft:overworld')
    x_range = (bounds.min[0], bounds.max[0] - section_size)
    y_range = (bounds.min[1], bounds.max[1] - section_size)
    z_range = (bounds.min[2], bounds.max[2] - section_size)

    return (
        random.randint(*x_range),
        random.randint(*y_range),
        random.randint(*z_range)
    )


# def extract_cubic_section(world: World, start_coords, section_size):
#     x_start, y_start, z_start = start_coords
#     selection = SelectionBox((x_start, y_start, z_start), (x_start +
#                              section_size, y_start + section_size, z_start + section_size))

#     # Initialize a structure to hold the blocks
#     cubic_section = {}

#     for cx, cz in selection.chunk_locations():
#         chunk = world.get_chunk(cx, cz, 'minecraft:overworld')
#         for bx, by, bz in selection.:
#             if chunk.bounds.contains_block((bx, by, bz)):
#                 block = chunk.get_block(bx, by, bz)
#                 cubic_section[(bx, by, bz)] = block

#     return cubic_section


# Path to your Minecraft world
world_path = "New World (2)"

# Load the world
world = load_level(world_path)

selection = SelectionGroup(SelectionBox((0, 0, 0), (16, 16, 16)))
structure = world.extract_structure(selection, 'minecraft:overworld')
wrapper = amulet.load_format('test.schem')
wrapper.create_and_open('java', 3578, bounds=selection, overwrite=True)
wrapper._path = 'test.schem'
structure.save(wrapper)

# Remember to close the world when done
world.close()

# cubic_section now contains the block data with their 3D coordinates


INFO - Loading level New World (2)


: 

Prepare data

In [1]:
from data_preparer import load_schematics

schematics_dir = 'data/schematics'
hdf5_path = 'data/data.h5'
load_schematics(schematics_dir, hdf5_path, (0.7, 0.15, 0.15), dataset_names=['world-11-29-2022-2'])

Loading schematics from data/schematics into data/data.h5
Processing dataset: world-11-29-2022-2
Split data into 2484 training samples, 538 validation samples, and 549 test samples.


Generating set: train: 100%|██████████| 2484/2484 [00:19<00:00, 128.35it/s]
Generating set: validation: 100%|██████████| 538/538 [00:03<00:00, 154.97it/s]
Generating set: test: 100%|██████████| 549/549 [00:03<00:00, 160.14it/s]

Finished updating HDF5 file.





Check Data

In [1]:
import os

import h5py

from common.file_paths import BASE_DIR
import random

with h5py.File(os.path.join(BASE_DIR, 'data.h5'), 'r') as hf:
    # Iterate over dataset splits (train, val, test)
    print(f"Total splits: {len(hf)}")
    for split in hf:
        split_group = hf[split]
        print(f"  Split: {split}")
        print(f"  Total datasets: {len(split_group)}")

        # Iterate over datasets
        for dataset in split_group:
            dataset_group = split_group[dataset]
            total_samples = len(dataset_group.keys())
            print(f"    Dataset: {dataset}")
            print(f"    Total samples: {total_samples}")
            print(f"    Random sample:")

            # Get a random sample
            name = random.choice(list(dataset_group.keys()))
            structure = dataset_group[name]['structure']

            # Print the name, description, and data of the sample
            print(f"      Name: {name}")
            print(f"      Structure: {structure.shape}")

Total splits: 3
  Split: test
  Total datasets: 1
    Dataset: world-11-29-2022-2
    Total samples: 549
    Random sample:
      Name: f31be6e96d29cc13096b9ba881c88cd0f907ab7c32a6c008e2c3b1cf3c9fae22
      Structure: (11, 11, 11)
  Split: train
  Total datasets: 1
    Dataset: world-11-29-2022-2
    Total samples: 2484
    Random sample:
      Name: 0f1c298683e722a932233b62c9d9f0c81109f973c55596c9061deac579fb2e85
      Structure: (11, 11, 11)
  Split: validation
  Total datasets: 1
    Dataset: world-11-29-2022-2
    Total samples: 538
    Random sample:
      Name: d7b8bb917dd332b3d9a7228ab193f91db451ac493f5020ac69d79c9d888e0a52
      Structure: (11, 11, 11)
