<a href="https://colab.research.google.com/github/lardratboy/vomitpngs_playground/blob/main/vomitpngs_playground.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# VOMIT PNG's 1.0 - CHUNKY DATA EXPLORER

This tool is able to dump IFF-like chunk files (4 byte ascii id, followed by a big endian 32 bit integer, followed by the contained data. Each chunk header includes the size of data and the size of the header, making this slightly different than standard IFF/RIFF chunks).

Step 0. (optional) connect to google drive

In [None]:
mount = False #@param {type:"boolean"}
if mount:
  from google.colab import drive
  drive.mount('/content/drive')

Step 1. Setup what you want to dump and where to put it

In [None]:
test_filename = '/content/something.he1' #@param {type:"string"}
test_output_format = '%4s_%08d.png' #@param {type:"string"}
test_xor_key = 105 #@param {type:"number"}
test_page_width = 1024 #@param {type:"number"}
test_page_height = 1024 #@param {type:"number"}
test_bundle = True #@param {type:"boolean"}
test_gray = False #@param {type:"boolean"}

Step 2. Define a rectangle packing system

In [None]:
# https://gist.github.com/lardratboy/847d8901f4ca60c70677f07f2edc4d66

class Rect:

    def __init__(self, left, top, right, bottom):

        self.left = left
        self.top = top
        self.right = right
        self.bottom = bottom

    def width(self):
        return self.right - self.left

    def height(self):
        return self.bottom - self.top

class Page:

    def __init__(self, width, height):
        self.width = width
        self.height = height
        self.free_rects = [ Rect( 0, 0, width, height ) ]
        self.occupied_rects = []

    def external_clipped_rects( a, b ):
        top, bottom = a.top, a.bottom
        if ( a.top < b.top ):
            top = b.top
            yield Rect( a.left, a.top, a.right, b.top )
        if ( a.bottom > b.bottom ):
            bottom = b.bottom
            yield Rect( a.left, b.bottom, a.right, a.bottom )
        if ( a.left < b.left ):
            yield Rect( a.left, top, b.left, bottom )
        if ( a.right > b.right ):
            yield Rect( b.right, top, a.right, bottom )

    def insert( self, width, height ):
        for free_rect in self.free_rects:
            if free_rect.width() < width or free_rect.height() < height: continue
            rect = Rect( free_rect.left, free_rect.top, free_rect.left + width, free_rect.top + height )
            self.occupied_rects.append( rect )
            self.free_rects.remove( free_rect )
            free_count = len( self.free_rects )
            for clipped_rect in Page.external_clipped_rects( free_rect, rect ):
                self.free_rects.append( clipped_rect )
            if free_count != len( self.free_rects ):
                self.free_rects.sort( key=lambda x: (x.height()) )
            return rect

    def calculate_efficency( self ):
        total_area = self.width * self.height
        used_area = sum( [ rect.width() * rect.height() for rect in self.occupied_rects ] )
        return used_area / total_area

class Packer:

    def __init__(self, width, height):
        self.pages = [ Page( width, height ) ]
        self.page_width = width
        self.page_height = height

    def insert( self, width, height ):
        for page in self.pages:
            rect = page.insert( width, height )
            if rect: return page, rect
        new_page = Page( self.page_width, self.page_height )
        self.pages.append( new_page )
        return new_page, new_page.insert( width, height )

Step 3. Chunk collecting and parsing classes

In [None]:
import io, struct, argparse, math
from PIL import Image
import numpy as np
from tqdm import tqdm

class DataChunkCollector:

    def __init__( self, output_formatter = None, save_as_grayscale=False, bundle=False, page_width=1024, page_height=1024 ):
        self.output_formatter = output_formatter
        self.next_number = 1
        self.save_as_grayscale = save_as_grayscale
        self.save_png = None != output_formatter
        self.bundle = bundle
        self.images_by_types = {}
        self.bundled_images = []
        self.page_width = page_width
        self.page_height = page_height

    def collect( self, chunk ):

        if not chunk.id in [ 'AWIZ', 'AKOS', 'SDAT', 'SOUN', 'DIGI', 'RMIM', 'COST', 'FORM' ]: return
        actual_size = len(chunk.data)

        if self.save_png:

            filename = self.output_formatter % (chunk.id, self.next_number)
            self.next_number += 1

            if self.save_as_grayscale:

                aligned_size = actual_size
                side = math.floor( max( math.ceil(math.sqrt(aligned_size)), 1 ) )
                dim = ( side, side )
                storage = ( dim[0] * dim[1] )

            else:

                aligned_size = ((actual_size + 2) // 3)
                side = math.floor( max( math.ceil(math.sqrt(aligned_size)), 1 ) )
                dim = ( side, side, 3 )
                storage = ( dim[0] * dim[1] * dim[2] )

            nd = np.zeros(dim, dtype=np.uint8)
            v1 = nd.reshape(storage,)
            v1[0:actual_size] = list(chunk.data)
            img = Image.fromarray( nd )

            if self.bundle:

                if not chunk.id in self.images_by_types: self.images_by_types[ chunk.id ] = []
                img.chunk = chunk
                self.images_by_types[ chunk.id ].append( img )

            else:

                img.save( filename )
                del img

            del nd

    def save_bundles( self ):

        if not self.bundle: return

        packs = []

        # pack images by type

        for id in tqdm( self.images_by_types.keys() ):

            self.images_by_types[ id ].sort( key=lambda x: x.height, reverse=True)

            packer = Packer( self.page_width, self.page_width )
            packs.append( packer )
            packer.rects_by_image = {}
            packer.id = id

            for img in self.images_by_types[ id ]:
                page, rect = packer.insert( img.width, img.height )
                if not rect: print( f'ERROR:: failed to insert {img.width=}, {img.height=} ::ERROR' )
                rect.img = img

        # build and save bundles for each packer

        for packer in packs:
            for i, page in enumerate( packer.pages, 1 ):
                canvas = Image.new( 'RGB', ( packer.page_width, packer.page_height ), color=(0,0,0) )
                for rect in tqdm( page.occupied_rects ):
                    canvas.paste( rect.img, ( rect.left, rect.top ) )
                filename = self.output_formatter % (packer.id, i)
                canvas.save( filename )
                canvas.filename = filename
                canvas.page = page
                self.bundled_images.append( canvas )


Main logic for traversing the nested chunks

In [None]:
class Chunk:

    def __init__( self, id, offset, depth=0 ):
        self.id = id
        self.offset = offset
        self.depth = depth
        self.children = []
        self.data = None

def skip_possible_garbage_data( input, chunk, remaining_data ):

    if not chunk.id in [ 'DIGI' ]: return 0
    start_offset = input.tell()
    skipped = 0

    while skipped < remaining_data:

        try:
            id = input.read( 4 ).decode( 'ascii' )
            input.seek( start_offset + skipped )
            return 0
        except:
            print( f'possible garbage trying to skip' )
            skipped += 1

        input.seek( start_offset + skipped )

    return skipped

def process_chunk( input, parent_chunk, remaining_data, depth = 0, collector = None, bundle = False ):

    if None != parent_chunk.id and not parent_chunk.id in [ 'MULT', 'WRAP', 'TALK', 'TLKB', 'LECF', 'LFLF', 'SONG', 'NEST', 'RMDA', 'OBIM', 'ROOM' ]:

        print( ' ' * depth + f'{parent_chunk.id} size={remaining_data}' )

        input.seek( input.tell() - 8 )
        parent_chunk.data = input.read( remaining_data + 8 )
        if collector: collector.collect( parent_chunk )
        return

    print( ' ' * depth + f'processing {parent_chunk.id}' )

    while 8 < remaining_data:

        offset = input.tell()
        id = input.read( 4 ).decode( 'ascii' )
        size = struct.unpack( '>I', input.read( 4 ))[0]
        chunk = Chunk( id, offset, depth=depth )
        parent_chunk.children.append( chunk )
        process_chunk( input, chunk, size - 8, depth + 1, collector=collector, bundle=bundle )
        remaining_data -= size
        skipped = skip_possible_garbage_data( input, chunk, remaining_data )
        remaining_data -= skipped

def parse_chunks_for_file( filename, xor_key = None, collector = None, bundle = False):

    with open(filename,'rb') as f: raw_data = f.read()
    if xor_key: raw_data = bytes( a ^ xor_key for a in raw_data )
    input = io.BytesIO( raw_data )
    root_chunk = Chunk( None, input.tell() )
    process_chunk( input, root_chunk, len( raw_data ), collector=collector, bundle=bundle )
    return root_chunk

def inner_main(args):

    collector = DataChunkCollector( args.png, save_as_grayscale=args.gray, bundle=args.bundle, page_width=args.page_width, page_height=args.page_height )
    file_chunks = parse_chunks_for_file( args.filename, xor_key=args.xor, collector=collector, bundle=args.bundle )
    collector.save_bundles()
    return collector


Step 4. Perform the operation by using the test settings from above

In [None]:
class args_helper:
  def __init__( self, filename, gray, png, xor, bundle, page_width, page_height ):
    self.filename = filename
    self.gray = gray
    self.png = png
    self.xor = xor
    self.bundle = bundle
    self.page_width = page_width
    self.page_height = page_height

collector = inner_main( args_helper(
    filename=test_filename, gray=test_gray, png=test_output_format, xor=test_xor_key,
    bundle=test_bundle, page_width=test_page_width, page_height=test_page_height ) )

Step 5. display the page output filenames and the packing efficency


In [None]:
print(f'Total number of bundles = {len(collector.bundled_images)}')

for i, canvas in enumerate( collector.bundled_images ):
  print( f'page {canvas.filename} efficency={canvas.page.calculate_efficency()}')