# PNG Parser in pure python

[libpng file specification](http://www.libpng.org/pub/png/spec/1.2/PNG-Structure.html)

In [179]:
from pathlib import Path
from dataclasses import dataclass, field
from io import BytesIO

In [180]:
VERBOSE = True

In [181]:
def _p_uint(b):
  return int.from_bytes(b, byteorder='big', signed=False)

def _p_int(b):
  return int.from_bytes(b, byteorder='big', signed=True)

In [182]:
def at_eof(fo):
    c = fo.read(1)
    fo.seek(-1, 1)
    if not c:
        return True
    return False

In [183]:
cwd = Path('.')
target_files = list(cwd.glob('*.png'))

target_files

[WindowsPath('palette-test.png'),
 WindowsPath('Screenshot 2023-04-01 183103.png')]

In [184]:
PNG_SIGNATURE = bytearray([137, 80, 78, 71, 13, 10, 26, 10])
NULL_SEP = b'\0' # or b'\x00'

In [185]:
class _BaseEnum: pass

def enum_fetch_all(enum: _BaseEnum):
  _reserved = list(dir(object)) +  ['__dict__', '__module__', '__weakref__']
  return [getattr(enum, i) for i in dir(enum) if i not in _reserved]

class ENUM_CT_PNG(_BaseEnum):

    # Critical chunk types 
    IHDR = b'IHDR'
    PLTE = b'PLTE'
    IDAT = b'IDAT'
    IEND = b'IEND'

    # Ancillary chunk types
    cHRM = b'cHRM'    # Before PLTE and IDAT
    gAMA = b'gAMA'    # Before PLTE and IDAT
    iCCP = b'iCCP'    # Before PLTE and IDAT
    sBIT = b'sBIT'    # Before PLTE and IDAT
    sRGB = b'sRGB'    # Before PLTE and IDAT
    bKGD = b'bKGD'    # After PLTE; before IDAT
    hIST = b'hIST'    # After PLTE; before IDAT
    tRNS = b'tRNS'    # After PLTE; before IDAT
    pHYs = b'pHYs'    # Before IDAT
    sPLT = b'sPLT'    # Before IDAT
    tIME = b'tIME'
    iTXt = b'iTXt'
    tEXt = b'tEXt'
    zTXt = b'zTXt'

In [186]:
enum_fetch_all(ENUM_CT_PNG)

[b'IDAT',
 b'IEND',
 b'IHDR',
 b'PLTE',
 b'bKGD',
 b'cHRM',
 b'gAMA',
 b'hIST',
 b'iCCP',
 b'iTXt',
 b'pHYs',
 b'sBIT',
 b'sPLT',
 b'sRGB',
 b'tEXt',
 b'tIME',
 b'tRNS',
 b'zTXt']

In [187]:
LS_CT_sRGB_INTENT_V = [
    'Perceptual',
    'Relative colorimetric',
    'Saturation',
    'Absolute colorimetric'
]

In [188]:
class PNGImage: pass
class I_ChunkTemplate: pass

class T_IHDR(I_ChunkTemplate): pass
class T_PLTE(I_ChunkTemplate): pass
class T_PLTE(I_ChunkTemplate): pass
class T_IDAT(I_ChunkTemplate): pass
class T_IEND(I_ChunkTemplate): pass

class T_cHRM(I_ChunkTemplate): pass
class T_gAMA(I_ChunkTemplate): pass
class T_iCCP(I_ChunkTemplate): pass
class T_sBIT(I_ChunkTemplate): pass
class T_sRGB(I_ChunkTemplate): pass
class T_bKGD(I_ChunkTemplate): pass
class T_hIST(I_ChunkTemplate): pass
class T_tRNS(I_ChunkTemplate): pass
class T_pHYs(I_ChunkTemplate): pass
class T_sPLT(I_ChunkTemplate): pass
class T_tIME(I_ChunkTemplate): pass
class T_iTXt(I_ChunkTemplate): pass
class T_tEXt(I_ChunkTemplate): pass
class T_zTXt(I_ChunkTemplate): pass

class C_RGB: pass

In [189]:
@dataclass
class C_RGB:
    r: int = 0
    g: int = 0
    b: int = 0

In [190]:
@dataclass
class I_ChunkTemplate:
    _png_instance   : PNGImage = None
    chunk_size      : int = 0
    chunk_type      : bytes = b''
    chunk_data      : BytesIO = None
    chunk_crc       : bytes = b''

    def set_pngImageInstance(self, instance):
        self._png_instance = instance

    def parse(self):
        print(f'WARNING! chunk type {self.chunk_type} parser method is not yet overridden. skipping procedure.')
        return


In [191]:
@dataclass
class T_IHDR(I_ChunkTemplate):
    width               : int = None
    height              : int = None
    bit_depth           : int = None
    color_type          : int = None
    compression_method  : int = None
    filter_method       : int = None
    interlace_method    : int = None

    def parse(self):
        self.width              = _p_uint(self.chunk_data.read(4))
        self.height             = _p_uint(self.chunk_data.read(4))
        self.bit_depth          = _p_int(self.chunk_data.read(1))
        self.color_type         = _p_int(self.chunk_data.read(1))
        self.compression_method = _p_int(self.chunk_data.read(1))
        self.filter_method      = _p_int(self.chunk_data.read(1))
        self.interlace_method   = _p_int(self.chunk_data.read(1))

        assert self.width != 0, 'invalid value: width is zero'
        assert self.height != 0, 'invalid value: height is zero'

@dataclass
class T_PLTE(I_ChunkTemplate):
    entries : list = field(default_factory=list)
    
    def parse(self):
        assert self.chunk_size % 3 == 0, 'invalid value: chunk length is not divisible by 3'
        l = self.chunk_size // 3
        for _ in range(l):
            c_rgb = C_RGB()
            c_rgb.r = _p_uint(self.chunk_data.read(1))
            c_rgb.g = _p_uint(self.chunk_data.read(1))
            c_rgb.b = _p_uint(self.chunk_data.read(1))

            self.entries.append(c_rgb)
    
@dataclass
class T_IDAT(I_ChunkTemplate):

    def parse(self):
        pass

@dataclass
class T_IEND(I_ChunkTemplate):

    def parse(self):
        pass

@dataclass
class T_cHRM(I_ChunkTemplate):
    white_point_x : int = 0
    white_point_y : int = 0
    red_x : int = 0
    red_y : int = 0
    green_x : int = 0
    green_y : int = 0
    blue_x : int = 0
    blue_y : int = 0

    def parse(self):
        self.white_point_x = _p_uint(self.chunk_data.read(4))
        self.white_point_y = _p_uint(self.chunk_data.read(4))
        self.red_x         = _p_uint(self.chunk_data.read(4))
        self.red_y         = _p_uint(self.chunk_data.read(4))
        self.green_x       = _p_uint(self.chunk_data.read(4))
        self.green_y       = _p_uint(self.chunk_data.read(4))
        self.blue_x        = _p_uint(self.chunk_data.read(4))
        self.blue_y        = _p_uint(self.chunk_data.read(4))

@dataclass
class T_gAMA(I_ChunkTemplate):
    gamma: int = 0

    def parse(self):
        self.gamma = _p_uint(self.chunk_data.read(4))

@dataclass
class T_iCCP(I_ChunkTemplate):
    ...

@dataclass
class T_sBIT(I_ChunkTemplate):
    ...

@dataclass
class T_sRGB(I_ChunkTemplate):
    rendering_intent : int = None
    rendering_intent_value : str = None
    
    def parse(self):
      self.rendering_intent = _p_uint(self.chunk_data.read(1))
      self.rendering_intent_value = LS_CT_sRGB_INTENT_V[self.rendering_intent]

@dataclass
class T_bKGD(I_ChunkTemplate):
    ...

@dataclass
class T_hIST(I_ChunkTemplate):
    ...

@dataclass
class T_tRNS(I_ChunkTemplate):
    ...

@dataclass
class T_pHYs(I_ChunkTemplate):
    pixel_per_unit_x: int = None
    pixel_per_unit_y: int = None
    unit_specifier: int = None

    def parse(self):
        self.pixel_per_unit_x = _p_uint(self.chunk_data.read(4))
        self.pixel_per_unit_y = _p_uint(self.chunk_data.read(4))
        self.unit_specifier = _p_uint(self.chunk_data.read(1))

@dataclass
class T_sPLT(I_ChunkTemplate):
    ...

@dataclass
class T_tIME(I_ChunkTemplate):
    ...

@dataclass
class T_iTXt(I_ChunkTemplate):
    keyword             : bytes = b''
    compression_flag    : int = None
    compression_method  : int = None
    language_tag        : bytes = b''
    translated_keyword  : bytes = b''
    text                : bytes = b''

    def parse(self):
        chunk = self.chunk_data.read()
        chunk_split = chunk.split(NULL_SEP)

        self.keyword, flag_lang_tag, self.translated_keyword, self.text = chunk_split
        self.compression_flag = flag_lang_tag[0]
        self.compression_method = flag_lang_tag[1]
        self.language_tag = flag_lang_tag[2:]

@dataclass
class T_tEXt(I_ChunkTemplate):
    keyword : bytes = ''
    text    : bytes = ''
    
    def parse(self):
        chunk = self.chunk_data.read()
        chunk_split = chunk.split(NULL_SEP)
        self.keyword, self.text = chunk_split


@dataclass
class T_zTXt(I_ChunkTemplate):
    keyword             : bytes = b''
    compression_method  : int = None
    compressed_text     : bytes = b''

    def parse(self):
        chunk = self.chunk_data.read()
        chunk_split = chunk.split(NULL_SEP)
        self.keyword, compression_info = chunk_split

        self.compression_method = compression_info[0]
        self.compressed_text = compression_info[1:]


In [192]:
class PNGImage:
    # signature   : bytes 
    # _chunks     : list[I_ChunkTemplate]

    def __init__(self):
        self.signature = PNG_SIGNATURE
        self._chunks   = list()
        self._fname     = ''
        self._ext       = '.png'

    def set_filename(self, filename: str):
        assert filename[-len(self._ext):] == self._ext, f'invalid value: filename extension is not .png'
        self._fname = filename

    def get_filename(self):
        return self._fname

    def check_signature(self, signature: bytes):
        return signature == self.signature

    def append_chunk(self, ct: I_ChunkTemplate):
        ct.set_pngImageInstance(self)
        self._chunks.append(ct)

    def get_chunks(self, chunk_type_enum = None):
        if chunk_type_enum:
            return list(filter(lambda x: x.chunk_type == chunk_type_enum, self._chunks))
        return self._chunks

    def parse_all(self):
        for c in self._chunks:
            if VERBOSE: print(f'parsing {c.chunk_type}; {c.chunk_size} bytes')
            c.parse()

    def __repr__(self):

        # default:
        #
        # return '<%s.%s object at %s>' % (
        #     self.__class__.__module__,
        #     self.__class__.__name__,
        #     hex(id(self))
        # )

        return '<%s %s>' % (
            self.__class__.__name__,
            hex(id(self))
        )

In [193]:
headers_struct_constructor_dict = {
    ENUM_CT_PNG.IHDR: T_IHDR,
    ENUM_CT_PNG.IDAT: T_IDAT,
    ENUM_CT_PNG.IEND: T_IEND,
    ENUM_CT_PNG.PLTE: T_PLTE,

    ENUM_CT_PNG.cHRM: T_cHRM,
    ENUM_CT_PNG.gAMA: T_gAMA,
    ENUM_CT_PNG.iCCP: T_iCCP,
    ENUM_CT_PNG.sBIT: T_sBIT,
    ENUM_CT_PNG.sRGB: T_sRGB,
    ENUM_CT_PNG.bKGD: T_bKGD,
    ENUM_CT_PNG.hIST: T_hIST,
    ENUM_CT_PNG.tRNS: T_tRNS,
    ENUM_CT_PNG.pHYs: T_pHYs,
    ENUM_CT_PNG.sPLT: T_sPLT,
    ENUM_CT_PNG.tIME: T_tIME,
    ENUM_CT_PNG.iTXt: T_iTXt,
    ENUM_CT_PNG.tEXt: T_tEXt,
    ENUM_CT_PNG.zTXt: T_zTXt,
}

In [194]:
pngs = []

for target_file_path in target_files:

    if VERBOSE: print('target file:', target_file_path)

    png = PNGImage()
    png.set_filename(target_file_path.name)

    with open(target_file_path, 'rb') as fo:
        png_signature = fo.read(8)
        if VERBOSE: print('signature:', png_signature)

        assert png.check_signature(png_signature), f'file does not contain or has correct PNG signature: {png_signature}'

        while not at_eof(fo):
            chunk_size = _p_uint(fo.read(4))
            chunk_type = fo.read(4)
            chunk_data = BytesIO(fo.read(chunk_size)) if chunk_size else None
            chunk_crc = fo.read(4)

            if chunk_type not in enum_fetch_all(ENUM_CT_PNG):
                raise ValueError(f'chunk type is not present in ENUM_CT_PNG: {chunk_type}')

            elif chunk_type not in headers_struct_constructor_dict:
                print(f'WARNING! chunk type {chunk_type} parser is not yet implemented')
                continue

            t = headers_struct_constructor_dict.get(chunk_type)()
            t.chunk_size = chunk_size
            t.chunk_data = chunk_data
            t.chunk_type = chunk_type
            t.chunk_crc = chunk_crc

            png.append_chunk(t)

        png.parse_all()
        pngs.append(png)

    if VERBOSE: print()

target file: palette-test.png
signature: b'\x89PNG\r\n\x1a\n'
parsing b'IHDR'; 13 bytes
parsing b'tEXt'; 25 bytes
parsing b'PLTE'; 30 bytes
parsing b'tRNS'; 10 bytes
parsing b'IDAT'; 5266 bytes
parsing b'IEND'; 0 bytes

target file: Screenshot 2023-04-01 183103.png
signature: b'\x89PNG\r\n\x1a\n'
parsing b'IHDR'; 13 bytes
parsing b'sRGB'; 1 bytes
parsing b'gAMA'; 4 bytes
parsing b'pHYs'; 9 bytes
parsing b'IDAT'; 4445 bytes
parsing b'IEND'; 0 bytes



In [195]:
for png in pngs:
    print(png.get_filename())
    for c in png.get_chunks():
        print(c)
    print()


palette-test.png
T_IHDR(_png_instance=<PNGImage 0x2a31c177cd0>, chunk_size=13, chunk_type=b'IHDR', chunk_data=<_io.BytesIO object at 0x000002A31C1075B0>, chunk_crc=b'\xdb\xce\x9c\xbd', width=1080, height=1080, bit_depth=8, color_type=3, compression_method=0, filter_method=0, interlace_method=0)
T_tEXt(_png_instance=<PNGImage 0x2a31c177cd0>, chunk_size=25, chunk_type=b'tEXt', chunk_data=<_io.BytesIO object at 0x000002A319FB7650>, chunk_crc=b'q\xc9e<', keyword=b'Software', text=b'Adobe ImageReady')
T_PLTE(_png_instance=<PNGImage 0x2a31c177cd0>, chunk_size=30, chunk_type=b'PLTE', chunk_data=<_io.BytesIO object at 0x000002A319FC5350>, chunk_crc=b'T\xba\x08\x96', entries=[C_RGB(r=255, g=255, b=255), C_RGB(r=0, g=45, b=115), C_RGB(r=75, g=255, b=46), C_RGB(r=139, g=255, b=243), C_RGB(r=0, g=0, b=0), C_RGB(r=240, g=255, b=46), C_RGB(r=0, g=129, b=255), C_RGB(r=208, g=0, b=76), C_RGB(r=191, g=191, b=191), C_RGB(r=255, g=255, b=255)])
T_tRNS(_png_instance=<PNGImage 0x2a31c177cd0>, chunk_size=10