# Advanced example: parsing PLY files

PLY is a file format storing 3D polygonal data that has support for both ASCII and binary formats. Some references: [Wikipedia entry](https://en.wikipedia.org/wiki/PLY_(file_format)) and [Paul Bourke's pages](http://paulbourke.net/dataformats/ply/).

In [1]:
from __future__ import annotations
from typing import Optional
from dataclasses import dataclass
from functools import partial
import numpy as np
import pprint

from byteparsing import (parse_bytes)
from byteparsing.trampoline import (Parser, parser)
from byteparsing.parsers import (
    sequence, named_sequence, choice, optional, value, repeat_n,
    text_literal, char, text_one_of, text_end_by,
    byte_none_of, byte_one_of,
    flush, flush_decode,
    push, pop,
    many, some, many_char, some_char, many_char_0, some_char_0,
    ascii_alpha, ascii_underscore, ascii_alpha_num,
    integer, scientific_number, array, binary_value,
    fmap, construct)

pp = pprint.PrettyPrinter(indent=2, width=80)

## Header

The header starts with a "magic number", a line containing `ply`.

In [2]:
eol = choice(text_literal("\n"), text_literal("\n\r"))
ply_magic_number = sequence(text_literal("ply"), eol)

The second line indicates which variation of the PLY format this is.

In [3]:
from enum import Enum

class PlyFormat(Enum):
    ASCII = 1
    BINARY_LE = 2
    BINARY_BE = 3
    
    @staticmethod
    def from_string(s: str) -> PlyFormat:
        if s == "ascii":
            return PlyFormat.ASCII
        if s == "binary_little_endian":
            return PlyFormat.BINARY_LE
        if s == "binary_big_endian":
            return PlyFormat.BINARY_BE
        else:
            raise ValueError(f"Unrecognized format string: {s}")

In [4]:
def tokenize(p: Parser) -> Parser:
    return sequence(p >> push, many_char(text_one_of(" ")), pop())

word = sequence(
    flush(), ascii_alpha, many_char_0(choice(ascii_alpha_num, ascii_underscore)),
    flush_decode())

In [5]:
ply_format = sequence(
    tokenize(text_literal("format")),
    tokenize(word) >> fmap(PlyFormat.from_string) >> push,
    tokenize(text_literal("1.0")), eol, pop())

In [6]:
parse_bytes(ply_format, b"format binary_little_endian 1.0\n")

<PlyFormat.BINARY_LE: 2>

Comments may be placed in the header by using the word comment at the start of the line. Everything from there until the end of the line should then be ignored.

In [7]:
ply_comment = sequence(
    tokenize(text_literal("comment")), flush(),
    text_end_by("\n") >> push, optional(char("\r")), pop())

In [8]:
ply_type_table = {
    "char": "int8",
    "uchar": "uint8",
    "short": "int16",
    "ushort": "uint16",
    "int": "int32",
    "uint": "uint32",
    "float": "float32",
    "double": "float64"
}

class PlyType:
    pass

@dataclass
class PlyPrimitiveType(PlyType):
    dtype: np.dtype
        
    @staticmethod
    def from_string(s: str) -> PlyPrimitiveType:
        sanitized_name = ply_type_table.get(s, s)
        return PlyPrimitiveType(np.dtype(sanitized_name))
    
    @property
    def byte_size(self) -> int:
        return self.dtype.itemsize
    
    def ascii(self) -> Parser:
        return sequence(
            flush(), some_char_0(byte_none_of(b"\n ")),
            many_char_0(byte_one_of(b"\n ")), flush(self.dtype.type))
    
    def binary(self) -> Parser:
        return binary_value(self.dtype)
    
@dataclass
class PlyListType(PlyType):
    size_type: PlyPrimitiveType
    value_type: PlyPrimitiveType
    
    def ascii(self) -> Parser:
        return self.size_type.ascii() >> partial(repeat_n, self.value_type.ascii())
    
    def binary(self) -> Parser:
        return binary_value(self.size_type.dtype) >> partial(array, self.value_type.dtype)

In [9]:
primitive_type = tokenize(word) >> fmap(PlyPrimitiveType.from_string)

list_type = named_sequence(
    _1=tokenize(text_literal("list")),
    size_type=primitive_type,
    value_type=primitive_type) >> construct(PlyListType)

ply_type = choice(list_type, primitive_type)

In [10]:
parse_bytes(ply_type, b"float float")

PlyPrimitiveType(dtype=dtype('float32'))

In [11]:
pp.pprint(parse_bytes(ply_type, b"list uint8 float"))

PlyListType(size_type=PlyPrimitiveType(dtype=dtype('uint8')), value_type=PlyPrimitiveType(dtype=dtype('float32')))


In [12]:
@dataclass
class PlyProperty:
    dtype: PlyType
    name: str

ply_property = named_sequence(
    _1=tokenize(text_literal("property")),
    dtype=ply_type,
    name=tokenize(word),
    _2=eol) >> construct(PlyProperty)

In [13]:
parse_bytes(
    ply_property,
    b"property float x\n")

PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name='x')

In [14]:
end_header = sequence(text_literal("end_header"), eol)

In [15]:
@dataclass
class PlyElement:
    name: str
    size: int
    properties: List[PlyProperty]
        
    def ascii(self) -> Parser:
        single_item = named_sequence(
            **{p.name: p.dtype.ascii() for p in self.properties})
        return repeat_n(single_item, self.size)
    
    @property
    def afine(self) -> bool:
        return all(isinstance(p.dtype, PlyPrimitiveType)
                   for p in self.properties)
    
    def binary(self) -> Parser:
        if self.afine:
            compound_type = [(p.name, p.dtype.dtype) for p in self.properties]
            return array(compound_type, self.size)
        else:
            single_item = named_sequence(
                **{p.name: p.dtype.binary() for p in self.properties})
            return repeat_n(single_item, self.size)
    

ply_element = named_sequence(
    _1=tokenize(text_literal("element")),
    name=tokenize(word),
    size=tokenize(integer),
    _2=eol,
    properties=some(ply_property)) >> construct(PlyElement)

In [16]:
parse_bytes(
    some(ply_element), 
    b"element vertex 8\nproperty float x\nproperty float y\nproperty float z\n" +
    b"element face 6\nproperty list uchar int vertex_index\n")

[PlyElement(name='vertex', size=8, properties=[PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name='x'), PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name='y'), PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name='z')]),
 PlyElement(name='face', size=6, properties=[PlyProperty(dtype=PlyListType(size_type=PlyPrimitiveType(dtype=dtype('uint8')), value_type=PlyPrimitiveType(dtype=dtype('int32'))), name='vertex_index')])]

In [17]:
parse_bytes(ply_element, b"element face 6\nproperty list uchar int vertex_index\n")

PlyElement(name='face', size=6, properties=[PlyProperty(dtype=PlyListType(size_type=PlyPrimitiveType(dtype=dtype('uint8')), value_type=PlyPrimitiveType(dtype=dtype('int32'))), name='vertex_index')])

In [18]:
@dataclass
class PlyHeader:
    format: PlyFormat
    comment: List[str]
    elements: List[PlyElement]
        
    def parser(self) -> Parser:
        if self.format == PlyFormat.ASCII:
            return named_sequence(
                **{e.name: e.ascii() for e in self.elements})
        if self.format == PlyFormat.BINARY_LE:
            return named_sequence(
                **{e.name: e.binary() for e in self.elements})
        else:
            raise NotImplementedError()

ply_header = named_sequence(
    _1=ply_magic_number,
    format=ply_format,
    comment=many(ply_comment),
    elements=some(ply_element),
    _2=sequence(text_literal("end_header"), eol)) >> construct(PlyHeader)

def ply_data(header):
    return named_sequence(header=value(header), data=header.parser())

ply_file = ply_header >> ply_data

The following ASCII example is given on Paul Bourke's page.

In [19]:
ascii_example = b"""ply
format ascii 1.0
comment made by Greg Turk
comment this file is a cube
element vertex 8
property float x
property float y
property float z
element face 6
property list uchar int vertex_index
end_header
0 0 0
0 0 1
0 1 1
0 1 0
1 0 0
1 0 1
1 1 1
1 1 0
4 0 1 2 3
4 7 6 5 4
4 0 4 5 1
4 1 5 6 2
4 2 6 7 3
4 3 7 4 0
"""

In [20]:
pp.pprint(parse_bytes(ply_header, ascii_example))

PlyHeader(format=<PlyFormat.ASCII: 1>, comment=['made by Greg Turk', 'this file is a cube'], elements=[PlyElement(name='vertex', size=8, properties=[PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name='x'), PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name='y'), PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name='z')]), PlyElement(name='face', size=6, properties=[PlyProperty(dtype=PlyListType(size_type=PlyPrimitiveType(dtype=dtype('uint8')), value_type=PlyPrimitiveType(dtype=dtype('int32'))), name='vertex_index')])])


No for the fun part! The header that we read actually encodes the parser for the rest of the file!

In [21]:
pp.pprint(parse_bytes(ply_file, ascii_example))

{ 'data': { 'face': [ {'vertex_index': [0, 1, 2, 3]},
                      {'vertex_index': [7, 6, 5, 4]},
                      {'vertex_index': [0, 4, 5, 1]},
                      {'vertex_index': [1, 5, 6, 2]},
                      {'vertex_index': [2, 6, 7, 3]},
                      {'vertex_index': [3, 7, 4, 0]}],
            'vertex': [ {'x': 0.0, 'y': 0.0, 'z': 0.0},
                        {'x': 0.0, 'y': 0.0, 'z': 1.0},
                        {'x': 0.0, 'y': 1.0, 'z': 1.0},
                        {'x': 0.0, 'y': 1.0, 'z': 0.0},
                        {'x': 1.0, 'y': 0.0, 'z': 0.0},
                        {'x': 1.0, 'y': 0.0, 'z': 1.0},
                        {'x': 1.0, 'y': 1.0, 'z': 1.0},
                        {'x': 1.0, 'y': 1.0, 'z': 0.0}]},
  'header': PlyHeader(format=<PlyFormat.ASCII: 1>, comment=['made by Greg Turk', 'this file is a cube'], elements=[PlyElement(name='vertex', size=8, properties=[PlyProperty(dtype=PlyPrimitiveType(dtype=dtype('float32')), name

## The Stanford Bunny
Now that we have the capability to parse binary PLY files, we can load the Stanford Bunny. For visualisation purposes it helps to know that all faces in this file are triangles.

In [22]:
from pathlib import Path

bunny = parse_bytes(ply_file, Path("_static/stanford_bunny.ply").open(mode="rb").read())

In [25]:
import ipyvolume as ipv

v = bunny["data"]["vertex"]
t = np.array([f["vertex_indices"] for f in bunny["data"]["face"]])
ipv.figure()
mesh = ipv.plot_trisurf(v["x"], v["y"], v["z"], triangles=t, color="#5588ee")
ipv.xlim(-0.1, 0.05)
ipv.zlim(-0.075, 0.075)
ipv.ylim(0.03, 0.18)
ipv.pylab.style.box_off()
ipv.pylab.style.axes_off()
ipv.show()

VBox(children=(Figure(camera=PerspectiveCamera(fov=46.0, position=(0.0, 0.0, 2.0), projectionMatrix=(1.0, 0.0,…

![The bunny](_static/stanford_bunny.png)