In [53]:
# TODO: make a proper module and remove this hack
import os
import sys
sys.path.append("/home/robbe/open-hardware-definitions/")

from parsers.common import DATASHEET_DIR

from open_hardware_definitions import *

from py_pdf_parser import tables
from py_pdf_parser.common import BoundingBox
from py_pdf_parser.loaders import load_file
from py_pdf_parser.visualise import visualise

from collections import namedtuple

from pprint import pprint

In [34]:
Module = namedtuple('Module', ['name', 'base_addr', 'base_addr_str', 'ref_page'])
Register = namedtuple('Register', ['name', 'module', 'addr', 'description', 'read_allowed', 'write_allowed', 'default', 'ref_page'])

In [25]:
# Load with lower char_margin to fix concatenation of columns, and also specify a custom fuzzy orderer

FUZZY_ROUNDING = 5
def fr(val):
    return round(val / FUZZY_ROUNDING) * FUZZY_ROUNDING

orderer = lambda elements: sorted(
    elements, key=lambda elem: (fr(-elem.y0), fr(elem.x0))
)

doc = load_file(os.path.join(DATASHEET_DIR, "MPC5668xRM.pdf"), {'char_margin': 1}, element_ordering=orderer)
content = doc.elements

In [26]:
# Filter out header and footer elements
header_box = BoundingBox(0, 1000, 740, 1000)
footer_box = BoundingBox(0, 1000, 0, 80)
for page in range(1, doc.number_of_pages):
    content -= content.filter_partially_within_bounding_box(header_box, page)
    content -= content.filter_partially_within_bounding_box(footer_box, page)

In [27]:
# Extract module base addresses
mod_base_addr_before = content.filter_by_text_contains("Table A-1. Module Base Addresses")[0]
mod_base_addr_after = content.filter_by_text_contains("Table A-2. MPC5668x System Memory Map")[0]

mod_base_addr_table_elements = content.between(mod_base_addr_before, mod_base_addr_after).filter_by_fonts("IOCJHP+Helvetica-Bold,9.0", "IOCJJB+Helvetica,9.0")
mod_base_addr_table_data = mod_base_addr_table_elements.filter_by_font("IOCJJB+Helvetica,9.0")

t = tables.extract_simple_table(mod_base_addr_table_data, as_text=True)
modules = [Module(d[0], int(d[1].replace("_", ""), 16), d[1], d[2].split("Page ")[1]) for d in t]

for m in modules:
    print(f"Module '{m.name}': Base: {hex(m.base_addr)}, Page: {m.ref_page}")

Program/Data Flash
0x0000_3FFF
Page A-3
Flash Emulation Mapping
0x0100_0000
Page A-3
SRAM
0x4000_0000
Page A-3
MLB_DIM Configuration
0xC3F8_4000
Page A-8
I2C_C
0xC3F8_8000
Page A-11
I2C_D
0xC3F8_C000
Page A-11
DSPI_C
0xC3F9_0000
Page A-12
DSPI_D
0xC3F9_4000
Page A-13
eSCI_J
0xC3FA_0000
Page A-14
eSCI_K
0xC3FA_4000
Page A-14
eSCI_L
0xC3FA_8000
Page A-15
eSCI_M
0xC3FA_C000
Page A-16
FlexRay
0xC3FD_C000
Page A-16
AXBS
0xFFF0_4000
Page A-38
Sema4
0xFFF1_0000
Page A-39
MPU
0xFFF1_4000
Page A-40
SWT
0xFFF3_8000
Page A-42
STM
0xFFF3_C000
Page A-42
ECSM
0xFFF4_0000
Page A-43
eDMA
0xFFF4_4000
Page A-43
INTC
0xFFF4_8000
Page A-46
FEC
0xFFF4_C000
Page A-49
ADC_A
0xFFF8_0000
Page A-51
I2C_A
0xFFF8_8000
Page A-55
I2C_B
0xFFF8_C000
Page A-56
DSPI_A
0xFFF9_0000
Page A-56
DSPI_B
0xFFF9_4000
Page A-57
eSCI_A
0xFFFA_0000
Page A-58
eSCI_B
0xFFFA_4000
Page A-58
eSCI_C
0xFFFA_8000
Page A-59
eSCI_D
0xFFFA_C000
Page A-59
eSCI_E
0xFFFB_0000
Page A-60
eSCI_F
0xFFFB_4000
Page A-60
eSCI_G
0xFFFB_8000
Page A-61
e

In [48]:
# Register map (table A-4)
reg_map_before = content.filter_by_text_contains("Table A-4. MPC5668x Detailed Register Map")[0]
reg_map_after = content.filter_by_text_contains("In this column, R/W indicates a read/write register")[0]
reg_map_elements = content.between(reg_map_before, reg_map_after)

# Find start elements of each module in this table
module_elements = {}
for m in modules:
    tmp_list = reg_map_elements.filter_by_text_contains(m.base_addr_str)
    if len(tmp_list) > 0:
        module_elements[m.name] = tmp_list[0]

# Extract all registers for the module
registers = []
for i, m in enumerate(modules):
    if m.name not in module_elements.keys():
        continue
    if m.name in module_elements.keys() and i < len(modules) - 1:
        tbl_contents = content.between(module_elements[m.name], module_elements[modules[i+1].name]).filter_by_font("IOCJJB+Helvetica,9.0")
    elif m.name in module_elements.keys() and i == len(modules) - 1:
        tbl_contents = content.between(module_elements[m.name], reg_map_after).filter_by_font("IOCJJB+Helvetica,9.0")
    
    if len(tbl_contents) == 0:
        continue
    
    # Get rid of "reserved" lines
    res_elements = tbl_contents.filter_by_text_contains("Reserved")
    for res_element in res_elements:
        tbl_contents -= tbl_contents.horizontally_in_line_with(res_element)
    tbl_contents -= res_elements
    
    # Extract table
    count = 0
    tbl = tables.extract_simple_table(tbl_contents, as_text=True, allow_gaps=True)    
    for r in tbl:
        ['name', 'module', 'addr', 'description', 'read_allowed', 'write_allowed', 'default', 'ref_page']
                
        try:
            # Errata!
            if r[0] == "00x040":
                r[0] = "0x0040"
            if r[0] == "8C0x03":
                r[0] = "0x038C"
            r[0] = r[0].replace('h', '')
            r[0] = r[0].replace('O', '0')
            
                
            # Quirks
            if '—' in r[3] or 'U' in r[3] or len(r[3]) == 0:
                default = None
            else:
                default = int(r[3].replace('_', ''), 16)

            if r[1] == 'FlexRay memory':
                continue
                        
            if r[0] == '0x0D78–0x0D7B':
                r[0] = '0x0D78'
            
            # Other parsing
            name = r[1].split('—')[0]
            addr = int(r[0], 16) + m.base_addr
            description = '—'.join(r[1].split('—')[1:])
            read_allowed = 'R' in r[2]
            write_allowed = 'W' in r[2]

            # Phew, we have everything
            registers.append(Register(name, m.name, addr, description, read_allowed, write_allowed, default, r[4]))
            count += 1
        except Exception as e:
            print(f"CANNOT HANDLE: {r}", e)
    print(f"Module '{m.name}': {count} registers")
print(f"Total: {len(registers)} registers")
            

Module 'MLB_DIM Configuration': 91 registers
Module 'I2C_C': 6 registers
Module 'I2C_D': 6 registers
Module 'DSPI_C': 28 registers
Module 'DSPI_D': 28 registers
Module 'eSCI_J': 12 registers
Module 'eSCI_K': 12 registers
Module 'eSCI_L': 12 registers
Module 'eSCI_M': 12 registers
Module 'FlexRay': 624 registers
Module 'AXBS': 13 registers
Module 'Sema4': 22 registers
Module 'MPU': 41 registers
Module 'SWT': 7 registers
Module 'STM': 14 registers
Module 'ECSM': 15 registers
Module 'eDMA': 79 registers
Module 'INTC': 88 registers
Module 'FEC': 23 registers
Module 'ADC_A': 136 registers
Module 'I2C_A': 6 registers
Module 'I2C_B': 6 registers
Module 'DSPI_A': 22 registers
Module 'DSPI_B': 22 registers
Module 'eSCI_A': 12 registers
Module 'eSCI_B': 12 registers
Module 'eSCI_C': 12 registers
Module 'eSCI_D': 12 registers
Module 'eSCI_E': 12 registers
Module 'eSCI_F': 12 registers
Module 'eSCI_G': 12 registers
Module 'eSCI_H': 12 registers
Module 'FlexCan_A': 140 registers
Module 'FlexCan_B':