In [1]:
import struct
import zlib
from pynbt import NBTFile
import io
import os
import math


def translate_chunk_location(x, z, h_offset):
    # Calculate the chunk coordinates
    c_x = h_offset % 32
    c_z = h_offset // 32

    # Now, `offset` is the location of the chunk's data in the file,
    # and `x` and `z` are the chunk's coordinates within this region.
    # print(f"Chunk at [{x}({c_x}), {z}({c_z})] has an offset of {h_offset}")
    x_min = (x * 16) + (c_x * 16)
    x_max = (x * 16) + (c_x * 16) + 16
    z_min = (z * 16) + (c_z * 16)
    z_max = (z * 16) + (c_z * 16) + 16

    # print(f"Chunk covers [{x_min}, {z_min}] [{x_min}, {z_max}] [{x_max}, {z_min}] [{x_max}, {z_max}]")

    return c_x, c_z


def get_chunk_offset_and_length(header, i):
    # Extract the four bytes for this chunk entry
    entry_bytes = header[i * 4: i * 4 + 4]

    # The first three bytes are the offset (big-endian)
    offset = int.from_bytes(entry_bytes[:3], 'big')

    # The fourth byte is the length
    length = entry_bytes[3]

    return offset, length
    

def read_region_file(filepath):
    # Get the filename from path.
    filename = filepath.split('/')[-1]

    # Filename format is r.x.z.mca
    filesplit = filename.split('.')

    # X and Z coords are parts 1 and 2 of the filename.
    x = int(filesplit[1])
    z = int(filesplit[2])
    
    chunks = {}
    filename = f'data/region/r.{x}.{z}.mca'
    with open(filename, 'rb') as file:
        # Region files begin with an 8192 byte header
        header = file.read(8192)

        # locations (1024 entries)
        for i in range(0, 1024):
            offset, length = get_chunk_offset_and_length(header, i)
            
            if offset == 0 and length == 0:
                continue  # Chunk is not present
                
            # Convert the offset to bytes (multiply by 4096)
            offset *= 4096

            # check past offset
            file.seek(0, 2)  # end to of file
            if file.tell() < offset: 
                # offset is past file, no chunk exists
                continue

            # goto offset and read 
            file.seek(offset)
            chunk_header = file.read(5)  # Read chunk length and compression type
            if len(chunk_header) < 5:
                print(f"Incomplete chunk header for chunk {i}.")
                continue

            chunk_length, compression_type = struct.unpack('>IB', chunk_header)
            chunk_length -= 1  # Subtract the compression type byte

            # Read and decompress the chunk data
            compressed_chunk_data = file.read(chunk_length)
            if len(compressed_chunk_data) < chunk_length:
                print(f"Incomplete chunk data for chunk {i}.")
                continue

            
            chunks[translate_chunk_location(x, z, i)] = zlib.decompress(compressed_chunk_data)
            # chunks[(x, z, i)] = zlib.decompress(compressed_chunk_data)
    return chunks
        

In [2]:
def read_chunk(the_chunk):
    # convert to bytes to io stream and read
    chunk_data = io.BytesIO(the_chunk)
    nbt = NBTFile(chunk_data)
    
    
    #print(nbt.pretty())
    
    for idx, section in enumerate(nbt['sections']):
        print(section['Y'])
        # print(section['biomes'].pretty())
        # print(section['block_states'].pretty())
        # print(section['block_states']['data'])
        print(section['block_states']['palette'])
        block_palette_size = len(section['block_states']['palette'])
        print(block_palette_size)
        # for bio_pal in section['biomes']['palette']:
        #     print(f"biome={bio_pal}")
        #     # print(section['biomes']['palette'])
        # for block_states in section['block_states']:
        #     print(block_states.pretty())
        #     print(block_states['data'].pretty())
            # print(block_type['name'])
        # print("------")
    

In [3]:
fnames = os.listdir('data/region/')

all_chunks = {}

for fname in fnames:
    print(fname)
    file_chunks = read_region_file(f'data/region/{fname}')
    print(f"Loaded {len(file_chunks)} chunks.")
    all_chunks.update(file_chunks)

r.-1.-1.mca
Loaded 288 chunks.
r.-1.0.mca
Loaded 142 chunks.
r.-2.-1.mca
Loaded 209 chunks.
r.-2.0.mca
Loaded 117 chunks.


In [4]:
read_chunk(all_chunks[(3, 26)])

TAG_Byte(-4, 'Y')
TAG_List(8 entries, 'palette')
8
TAG_Byte(-3, 'Y')
TAG_List(6 entries, 'palette')
6
TAG_Byte(-2, 'Y')
TAG_List(8 entries, 'palette')
8
TAG_Byte(-1, 'Y')
TAG_List(8 entries, 'palette')
8
TAG_Byte(0, 'Y')
TAG_List(16 entries, 'palette')
16
TAG_Byte(1, 'Y')
TAG_List(23 entries, 'palette')
23
TAG_Byte(2, 'Y')
TAG_List(11 entries, 'palette')
11
TAG_Byte(3, 'Y')
TAG_List(17 entries, 'palette')
17
TAG_Byte(4, 'Y')
TAG_List(23 entries, 'palette')
23
TAG_Byte(5, 'Y')
TAG_List(14 entries, 'palette')
14
TAG_Byte(6, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(7, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(8, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(9, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(10, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(11, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(12, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(13, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(14, 'Y')
TAG_List(1 entries, 'palette')
1
TAG_Byte(15, 'Y')
TAG_List(1 