In [1]:
import struct
import zlib
from pynbt import NBTFile
import io
import os
import math
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
import tqdm
import itertools


def translate_chunk_location(x, z, h_offset):
    region_x = x * 32
    region_z = z * 32
    
    chunk_x = int(region_x + (h_offset % 32));
    chunk_z = int(region_z + (h_offset / 32)); 
    
    return chunk_x, chunk_z


def get_chunk_offset_and_length(header, i):
    # Extract the four bytes for this chunk entry
    entry_bytes = header[i * 4: i * 4 + 4]

    # The first three bytes are the offset (big-endian)
    offset = int.from_bytes(entry_bytes[:3], 'big')

    # The fourth byte is the length
    length = entry_bytes[3]

    return offset, length
    

def read_region_file(filepath):
    # Get the filename from path.
    filename = filepath.split('/')[-1]

    # Filename format is r.x.z.mca
    filesplit = filename.split('.')

    # X and Z coords are parts 1 and 2 of the filename.
    x = int(filesplit[1])
    z = int(filesplit[2])
    
    chunks = {}
    chunk_mapping = {}
    # filename = f'data/region/r.{x}.{z}.mca'
    with open(f"{dir_path}/{filename}", 'rb') as file:
        # Region files begin with an 8192 byte header
        header = file.read(8192)

        # locations (1024 entries)
        for i in range(0, 1024):
            try:
                offset, length = get_chunk_offset_and_length(header, i)
                
                if offset == 0 and length == 0:
                    continue  # Chunk is not present
                    
                # Convert the offset to bytes (multiply by 4096)
                offset *= 4096
    
                # check past offset
                file.seek(0, 2)  # end to of file
                if file.tell() < offset: 
                    # offset is past file, no chunk exists
                    continue
    
                # goto offset and read 
                file.seek(offset)
                chunk_header = file.read(5)  # Read chunk length and compression type
                if len(chunk_header) < 5:
                    print(f"Incomplete chunk header for chunk {i}.")
                    continue
    
                chunk_length, compression_type = struct.unpack('>IB', chunk_header)
                chunk_length -= 1  # Subtract the compression type byte
    
                # Read and decompress the chunk data
                compressed_chunk_data = file.read(chunk_length)
                if len(compressed_chunk_data) < chunk_length:
                    print(f"Incomplete chunk data for chunk {i}.")
                    continue
    
                chunks[translate_chunk_location(x, z, i)] = {
                    'data': zlib.decompress(compressed_chunk_data),
                    'i': i,
                    'file': filename
                }
            except Exception as e:
                print(f"i={i}")
                print(f"chunk_length={chunk_length}")
                print(f"chunk_loc={translate_chunk_location(x, z, i)}")
                print(e)
                continue
            # chunks[(x, z, i)] = zlib.decompress(compressed_chunk_data)
    return chunks
        

In [2]:
def plot_chunks(sub_chunks):
    coords, file_indices = zip(*[(k, v+1) for k, v in sub_chunks.items()])
    print(f"coords={coords}")

    cmap = matplotlib.colormaps['nipy_spectral'](np.linspace(.5, 1, len(fnames)))

    coords = np.array(coords)
    file_indices = np.array(file_indices)

    min_coords = np.min(coords, axis=0)
    max_coords = np.max(coords, axis=0)
    dim_sizes = max_coords - min_coords + 1

    # Create a 3D numpy array of zeros
    array_3d = np.zeros(dim_sizes, dtype=int)

    print("creating array_3d")
    # Set the values using advanced indexing
    array_3d[coords[:, 0] - min_coords[0], 
             coords[:, 1] - min_coords[1], 
             coords[:, 2] - min_coords[2]] = file_indices

    # Create a mapping from integer values to colors
    value_to_color = {i+1: cmap[i] for i, _ in enumerate(fnames)}

    # Plotting
    fig = plt.figure(figsize=(40, 8), constrained_layout=True)
    ax = fig.add_subplot(111, projection='3d')

    for val, _ in tqdm.tqdm(enumerate(fnames)):
        voxelarray = array_3d == val+1
        ax.voxels(voxelarray, facecolors=value_to_color[val+1], edgecolor='k')  # edgecolor is the color of the edges

    handles = [plt.Line2D([0], [0], marker='o', color='w', label=label,
                          markerfacecolor=value_to_color[val+1], markersize=10) 
               for val, label in enumerate(fnames)]
    ax.legend(handles=handles, title="Labels", loc='center left', bbox_to_anchor=(1, 0.5), fontsize='small')
    
    plt.show()

In [3]:
def create_coords_tuple(idx, section_size):
    y = idx % section_size
    z = (idx // section_size) % section_size
    x = idx // (section_size * section_size)
    return (y, z, x)

In [4]:
def read_biome_section(biome_section, biome_full_palette):
    # Create an empty dict to store the biome
    biomes = {}
    
    # Need size of palette to calculate bit-size
    palette_size = len(biome_section['palette'])

    # Iterate over the palette to add them to the full palette
    for palette in biome_section['palette']:
        # If there is no entry for the palette then add one
        biome_full_palette.setdefault(palette.value, len(biome_full_palette))

    # If there is only 1 entry in the palette
    # Tne entire section is that entry
    if palette_size == 1:
        for yxz_coords in list(itertools.product(range(0,4), repeat=3)):
            # # adjust for chunk offset
            # yxz_coords = tuple(a + (b*4) for a, b in zip(yxz_coords, chunk_offset))
            biomes[yxz_coords] = biome_full_palette[biome_section['palette'][0].value]
        return biomes

    # Bit size is least number of bits required to represent largest index 
    bit_size = math.ceil(math.log2(palette_size)) 
    current_idx = 0

    # Get biome data
    for element in biome_section['data'].value:
        # We start reading from 0 
        bits_read = 0
        
        # Keep reading until we get to 64 bits
        while bits_read + bit_size <= 64:
            # Shift element to the bits_read, then mask so we only read bit_size
            index = (element >> bits_read) & ((1 << bit_size) - 1)
            # Convert current idx into yzx coords
            yxz_coords = create_coords_tuple(current_idx, 4)
            # The index we want is the index of the palette from the full palette
            full_palette_index = biome_full_palette[biome_section['palette'][index].value]
            
            # adjust for chunk offset
            # yxz_coords = tuple(a + (b*4) for a, b in zip(yxz_coords, chunk_offset))
            # Store the value of the index in the multi dim array
            biomes[yxz_coords] = full_palette_index
            
            # Step forward to read next bits
            bits_read += bit_size
            # Next idx
            current_idx += 1
    
    return biomes

In [5]:
def read_block_section(block_states_section, block_type_full_palette, biomes, combined_block_palette):
    # Need size of palette to calculate bit-size
    palette_size = len(block_states_section['palette'])

    # Iterate over the palette to add them to the full palette
    for palette in block_states_section['palette']:
        # If there is no entry for the palette then add one
        block_type_full_palette.setdefault(palette['Name'].value, len(block_type_full_palette))

    # If there is only 1 entry in the palette
    # Tne entire section is that entry
    if palette_size == 1:
        # Create an array that contains the single value from the palette
        block_type_array = np.full((16,16,16), block_type_full_palette[block_states_section['palette'][0]['Name'].value])
    else:
        # Bit size is 4 or least number of bits required to represent largest index 
        bit_size = max(4, math.ceil(math.log2(palette_size)))
        
        # Create an empty array to store the block states
        block_type_array = np.empty((16,16,16), dtype=int)
    
        
        # element = block_states_section['data'].value[0]
        # print(block_states_section['data'])
        current_idx = 0
        
        # Get block data
        for element in block_states_section['data'].value:
            # We start reading from 0 
            bits_read = 0
            
            # Keep reading until we get to 64 bits
            while bits_read + bit_size <= 64:
                # Shift element to the bits_read, then mask so we only read bit_size
                index = (element >> bits_read) & ((1 << bit_size) - 1)
                # Insert into the array the index of the palette from the full biome palette
                # Convert current idx into yzx coords
                yxz_coords = get_coords(current_idx, 16)
                # The index we want is the index of the palette from the full palette
                full_palette_index = block_type_full_palette[block_states_section['palette'][index]['Name'].value]
                # Store the value of the index in the multi dim array
                try:
                    block_type_array[*yxz_coords] = full_palette_index
                except Exception as e:
                    break
                    print(e)
                    print(f"current_idx={current_idx}")
                    print(f"yxz_coords={yxz_coords}")
                # Step forward to read next bits
                bits_read += bit_size
                # Next idx
                current_idx += 1
                
            if current_idx >= 4095:
                break

    combined_block_type = np.empty((16,16,16), dtype=int)
    
    for x in range(0, 16):
        for y in range(0, 16):
            for z in range(0, 16):
                biome_palette_idx = biomes[(y//4,x//4,z//4)]
                # get combined paletee index or create if it doesn't exist
                combined_palette_idx = combined_block_palette.setdefault((biome_palette_idx, block_type_array[y,x,z]), len(combined_block_palette))
                combined_block_type[y][x][z] = biome_palette_idx
                
    
    return combined_block_type

In [6]:
def extract_sub_chunks(all_chunks):
    biome_palette = {}
    block_type_palette = {}
    combined_block_biome_palette = {}
    all_biomes = {}
    combined_block_biome = {}
    
    all_sub_chunks = {}
    for x_z, chunk in list(all_chunks.items()):
    # for x_z, chunk in all_chunks.items():
        chunk_data = io.BytesIO(chunk['data'])
        nbt = NBTFile(chunk_data)
    
        for idx, section in enumerate(nbt['sections']):
            print(idx)
            if plot_y:
                y_coord = section['Y'].value
            else:
                y_coord = 1
            # chunk_offset = (*(x_z), y_coord)
            chunk_offset = (*(x_z), y_coord)
            # parse biome
            biome = read_biome_section(section['biomes'], biome_palette)
            print(biome)
            combined_block_biome[chunk_offset] = read_block_section(section['block_states'], block_type_palette, biome, combined_block_biome_palette)
            if not plot_y:
                break
        break
    # return all_sub_chunks
    return combined_block_biome, combined_block_biome_palette

In [7]:
def plot_blocks(sub_chunks, biome_palette):
    coords, file_indices = zip(*[(k, v+1) for k, v in sub_chunks.items()])

    cmap = matplotlib.colormaps['nipy_spectral'](np.linspace(.5, 1, len(biome_palette)))

    coords = np.array(coords)
    file_indices = np.array(file_indices)

    min_coords = np.min(coords, axis=0)
    max_coords = np.max(coords, axis=0)
    dim_sizes = max_coords - min_coords + 1

    print(f"min_coords={min_coords}")
    print(f"max_coords={max_coords}")
    print(f"dim_sizes={dim_sizes}")

    # Create a 3D numpy array of zeros
    array_3d = np.zeros(dim_sizes, dtype=int)

    # Set the values using advanced indexing
    array_3d[coords[:, 0] - min_coords[0], 
             coords[:, 1] - min_coords[1], 
             coords[:, 2] - min_coords[2]] = file_indices

    # Create a mapping from integer values to colors
    value_to_color = {i+1: cmap[i] for i, _ in enumerate(biome_palette.values())}

    # Plotting
    fig = plt.figure(figsize=(50, 8))
    ax = fig.add_subplot(111, projection='3d')
    print(biome_palette)

    # for val, _ in tqdm.tqdm(enumerate(biome_palette.values())):
    # for val in range(5, 8): 
    for val in [2]:
        voxelarray = array_3d == val+1
        ax.voxels(voxelarray, facecolors=value_to_color[val+1], edgecolor='k')  # edgecolor is the color of the edges

    x_labels = np.arange(min_coords[0], max_coords[0] + 1)
    y_labels = np.arange(min_coords[1], max_coords[1] + 1)
    z_labels = np.arange(min_coords[2], max_coords[2] + 1)
    ax.set_xticks(np.arange(len(x_labels)))
    ax.set_yticks(np.arange(len(y_labels)))
    ax.set_zticks(np.arange(len(z_labels)))
    
    ax.set_xticklabels(x_labels)
    ax.set_yticklabels(y_labels)
    ax.set_zticklabels(z_labels)
    ax.autoscale(False)

    handles = [plt.Line2D([0], [0], marker='o', color='w', label=label,
                          markerfacecolor=value_to_color[val+1], markersize=10) 
               for label, val in biome_palette.items()]
    ax.legend(handles=handles, title="Labels", loc='center left', bbox_to_anchor=(1, 0.5), fontsize='small')
    
    plt.show()

In [8]:
fnames = [fn for fn in os.listdir('data/region/') if '.mca' in fn]
dir_path = 'data/region/'
plot_y = True
fnames = [fnames[0]]

all_chunks = {}
all_sub_chunks = {}
biome_full_palette = {}
block_full_palette = {}

for fname in fnames:
    print(f"{fname}")
    file_chunks = read_region_file(f'data/region/{fname}')
    
    print(f"Loaded {len(file_chunks)} chunks.")
    all_chunks.update(file_chunks)

all_sub_chunks, full_palette = extract_sub_chunks(all_chunks)
# plot_blocks(all_sub_chunks, biome_full_palette)

print(len(full_palette))
print(all_sub_chunks.keys())

r.-1.-1.mca
Loaded 559 chunks.
0
{(0, 0, 0): 0, (0, 0, 1): 0, (0, 0, 2): 0, (0, 0, 3): 0, (0, 1, 0): 0, (0, 1, 1): 0, (0, 1, 2): 0, (0, 1, 3): 0, (0, 2, 0): 0, (0, 2, 1): 0, (0, 2, 2): 0, (0, 2, 3): 0, (0, 3, 0): 0, (0, 3, 1): 0, (0, 3, 2): 0, (0, 3, 3): 0, (1, 0, 0): 0, (1, 0, 1): 0, (1, 0, 2): 0, (1, 0, 3): 0, (1, 1, 0): 0, (1, 1, 1): 0, (1, 1, 2): 0, (1, 1, 3): 0, (1, 2, 0): 0, (1, 2, 1): 0, (1, 2, 2): 0, (1, 2, 3): 0, (1, 3, 0): 0, (1, 3, 1): 0, (1, 3, 2): 0, (1, 3, 3): 0, (2, 0, 0): 0, (2, 0, 1): 0, (2, 0, 2): 0, (2, 0, 3): 0, (2, 1, 0): 0, (2, 1, 1): 0, (2, 1, 2): 0, (2, 1, 3): 0, (2, 2, 0): 0, (2, 2, 1): 0, (2, 2, 2): 0, (2, 2, 3): 0, (2, 3, 0): 0, (2, 3, 1): 0, (2, 3, 2): 0, (2, 3, 3): 0, (3, 0, 0): 0, (3, 0, 1): 0, (3, 0, 2): 0, (3, 0, 3): 0, (3, 1, 0): 0, (3, 1, 1): 0, (3, 1, 2): 0, (3, 1, 3): 0, (3, 2, 0): 0, (3, 2, 1): 0, (3, 2, 2): 0, (3, 2, 3): 0, (3, 3, 0): 0, (3, 3, 1): 0, (3, 3, 2): 0, (3, 3, 3): 0}
1
{(0, 0, 0): 0, (0, 0, 1): 0, (0, 0, 2): 0, (0, 0, 3): 0, (0, 1, 0): 