In [3]:
%load_ext autoreload
%autoreload 2

import os
import sys

# TODO: remove hack
sys.path.append("/home/robbe/open-hardware-definitions/")
from parsers.common import DATASHEET_DIR

from open_hardware_definitions import *

from py_pdf_parser import tables
from py_pdf_parser.common import BoundingBox
from py_pdf_parser.loaders import load_file
from py_pdf_parser.visualise import visualise

from collections import namedtuple

from pprint import pprint

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
# Module = namedtuple('Module', ['name', 'base_addr', 'base_addr_str', 'ref_page'])
# Register = namedtuple('Register', ['name', 'module', 'addr', 'description', 'read_allowed', 'write_allowed', 'default', 'ref_page'])

In [23]:
# Start Processor definition

proc = Processor(
    manufacturer='NXP',
    part_number='MPC5668x',
    architecture="PowerPC",
    bit_width=32,
    endianness=Endianness.LITTLE   
)


In [7]:
# Load with lower char_margin to fix concatenation of columns, and also specify a custom fuzzy orderer

FUZZY_ROUNDING = 5
def fr(val):
    return round(val / FUZZY_ROUNDING) * FUZZY_ROUNDING

orderer = lambda elements: sorted(
    elements, key=lambda elem: (fr(-elem.y0), fr(elem.x0))
)

doc = load_file(os.path.join(DATASHEET_DIR, "MPC5668xRM.pdf"), {'char_margin': 1}, element_ordering=orderer)
content = doc.elements

In [8]:
# Filter out header and footer elements
header_box = BoundingBox(0, 1000, 740, 1000)
footer_box = BoundingBox(0, 1000, 0, 80)
for page in range(1, doc.number_of_pages):
    content -= content.filter_partially_within_bounding_box(header_box, page)
    content -= content.filter_partially_within_bounding_box(footer_box, page)

In [24]:
# Extract module base addresses
mod_base_addr_before = content.filter_by_text_contains("Table A-1. Module Base Addresses")[0]
mod_base_addr_after = content.filter_by_text_contains("Table A-2. MPC5668x System Memory Map")[0]

mod_base_addr_table_elements = content.between(mod_base_addr_before, mod_base_addr_after).filter_by_fonts("IOCJHP+Helvetica-Bold,9.0", "IOCJJB+Helvetica,9.0")
mod_base_addr_table_data = mod_base_addr_table_elements.filter_by_font("IOCJJB+Helvetica,9.0")

t = tables.extract_simple_table(mod_base_addr_table_data, as_text=True)

base_addr_strings = {}
for d in t:
    proc.modules.append(Module(
        name=d[0],
        base_addr=int(d[1].replace("_", ""), 16)
    ))
    base_addr_strings[d[0]] = d[1]

for m in proc.modules:
    print(m)

Module 'Program/Data Flash': Base address: 0x3fff
Module 'Flash Emulation Mapping': Base address: 0x1000000
Module 'SRAM': Base address: 0x40000000
Module 'MLB_DIM Configuration': Base address: 0xc3f84000
Module 'I2C_C': Base address: 0xc3f88000
Module 'I2C_D': Base address: 0xc3f8c000
Module 'DSPI_C': Base address: 0xc3f90000
Module 'DSPI_D': Base address: 0xc3f94000
Module 'eSCI_J': Base address: 0xc3fa0000
Module 'eSCI_K': Base address: 0xc3fa4000
Module 'eSCI_L': Base address: 0xc3fa8000
Module 'eSCI_M': Base address: 0xc3fac000
Module 'FlexRay': Base address: 0xc3fdc000
Module 'AXBS': Base address: 0xfff04000
Module 'Sema4': Base address: 0xfff10000
Module 'MPU': Base address: 0xfff14000
Module 'SWT': Base address: 0xfff38000
Module 'STM': Base address: 0xfff3c000
Module 'ECSM': Base address: 0xfff40000
Module 'eDMA': Base address: 0xfff44000
Module 'INTC': Base address: 0xfff48000
Module 'FEC': Base address: 0xfff4c000
Module 'ADC_A': Base address: 0xfff80000
Module 'I2C_A': Base

In [25]:
# Register map (table A-4)
reg_map_before = content.filter_by_text_contains("Table A-4. MPC5668x Detailed Register Map")[0]
reg_map_after = content.filter_by_text_contains("In this column, R/W indicates a read/write register")[0]
reg_map_elements = content.between(reg_map_before, reg_map_after)

# Find start elements of each module in this table
module_elements = {}
for m in proc.modules:
    tmp_list = reg_map_elements.filter_by_text_contains(base_addr_strings[m.name])
    if len(tmp_list) > 0:
        module_elements[m.name] = tmp_list[0]

# Extract all registers for the module
for i, m in enumerate(proc.modules):
    print(i, m)
    if m.name not in module_elements.keys():
        continue
    if m.name in module_elements.keys() and i < (len(proc.modules) - 1):
        tbl_contents = content.between(module_elements[m.name], module_elements[proc.modules[i+1].name]).filter_by_font("IOCJJB+Helvetica,9.0")
    elif m.name in module_elements.keys() and i == len(proc.modules) - 1:
        tbl_contents = content.between(module_elements[m.name], reg_map_after).filter_by_font("IOCJJB+Helvetica,9.0")
    
    if len(tbl_contents) == 0:
        continue
    
    # Get rid of "reserved" lines
    res_elements = tbl_contents.filter_by_text_contains("Reserved")
    for res_element in res_elements:
        tbl_contents -= tbl_contents.horizontally_in_line_with(res_element)
    tbl_contents -= res_elements
    
    # Extract table
    tbl = tables.extract_simple_table(tbl_contents, as_text=True, allow_gaps=True)    
    for r in tbl:                
        try:
            # Errata!
            if r[0] == "00x040":
                r[0] = "0x0040"
            if r[0] == "8C0x03":
                r[0] = "0x038C"
            r[0] = r[0].replace('h', '')
            r[0] = r[0].replace('O', '0')
            
                
            # Quirks
            if '—' in r[3] or 'U' in r[3] or len(r[3]) == 0:
                default = None
            else:
                default = int(r[3].replace('_', ''), 16)

            if r[1] == 'FlexRay memory':
                continue
                        
            if r[0] == '0x0D78–0x0D7B':
                r[0] = '0x0D78'
            
            # Other parsing
            name = r[1].split('—')[0]
            addr = int(r[0], 16) + m.base_addr
            description = '—'.join(r[1].split('—')[1:])
            read_allowed = 'R' in r[2]
            write_allowed = 'W' in r[2]

            # Phew, we have everything
            proc.modules[i].registers.append(Register(
                name,
                addr,
                description,
                read_allowed,
                write_allowed,
                default, 
                extras={'description_page': r[4]}
            ))
        except Exception as e:
            print(f"CANNOT HANDLE: {r}", e)
    print(f"Module '{m.name}': {len(m.registers)} registers")

print(f"Total: {sum(map(lambda m: len(m.registers), proc.modules))} registers")
            

0 Module 'Program/Data Flash': Base address: 0x3fff
1 Module 'Flash Emulation Mapping': Base address: 0x1000000
2 Module 'SRAM': Base address: 0x40000000
3 Module 'MLB_DIM Configuration': Base address: 0xc3f84000
Module 'MLB_DIM Configuration': 91 registers
4 Module 'I2C_C': Base address: 0xc3f88000
Module 'I2C_C': 6 registers
5 Module 'I2C_D': Base address: 0xc3f8c000
Module 'I2C_D': 6 registers
6 Module 'DSPI_C': Base address: 0xc3f90000
Module 'DSPI_C': 28 registers
7 Module 'DSPI_D': Base address: 0xc3f94000
Module 'DSPI_D': 28 registers
8 Module 'eSCI_J': Base address: 0xc3fa0000
Module 'eSCI_J': 12 registers
9 Module 'eSCI_K': Base address: 0xc3fa4000
Module 'eSCI_K': 12 registers
10 Module 'eSCI_L': Base address: 0xc3fa8000
Module 'eSCI_L': 12 registers
11 Module 'eSCI_M': Base address: 0xc3fac000
Module 'eSCI_M': 12 registers
12 Module 'FlexRay': Base address: 0xc3fdc000
Module 'FlexRay': 624 registers
13 Module 'AXBS': Base address: 0xfff04000
Module 'AXBS': 13 registers
14 Mo

In [26]:
print(proc.dump())

!Processor
manufacturer: NXP
part_number: MPC5668x
architecture: PowerPC
bit_width: 32
endianness: little
modules:
- !Module
  name: Program/Data Flash
  base_addr: 0x3fff
- !Module
  name: Flash Emulation Mapping
  base_addr: 0x1000000
- !Module
  name: SRAM
  base_addr: 0x40000000
- !Module
  name: MLB_DIM Configuration
  base_addr: 0xc3f84000
  registers:
  - !Register
    name: DCCR
    addr: 0xc3f84000
    description: Device Control Configuration Register
    read_allowed: true
    write_allowed: true
    description_page: 26.3.2.1/26-8
  - !Register
    name: SSCR
    addr: 0xc3f84004
    description: System Status Configuration Register
    read_allowed: true
    write_allowed: true
    description_page: 26.3.2.2/26-10
  - !Register
    name: SDCR
    addr: 0xc3f84008
    description: System Data Configuration Register
    read_allowed: true
    description_page: 26.3.2.3/26-11
  - !Register
    name: SMCR
    addr: 0xc3f8400c
    description: System Mask Configuration Register