In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

from open_hardware_definitions import *
from open_hardware_definitions.common import DATASHEET_DIR, DEFINITIONS_DIR

from py_pdf_parser import tables
from py_pdf_parser.common import BoundingBox
from py_pdf_parser.loaders import load_file
from py_pdf_parser.visualise import visualise

from collections import namedtuple

from pprint import pprint

In [2]:
# Start Device definition

dev = Device(
    manufacturer='NXP',
    part_number='MPC5775',
    architecture="PowerPC",
    bit_width=32,
    endianness=Endianness.BIG
)


In [3]:
# Load with lower char_margin to fix concatenation of columns, and also specify a custom fuzzy orderer

FUZZY_ROUNDING = 5
def fr(val):
    return round(val / FUZZY_ROUNDING) * FUZZY_ROUNDING

orderer = lambda elements: sorted(
    elements, key=lambda elem: (fr(-elem.y0), fr(elem.x0))
)

doc = load_file(os.path.join(DATASHEET_DIR, "MPC5775KRM.pdf"), {'char_margin': 1}, element_ordering=orderer)
content = doc.elements

In [10]:
# Filter out header and footer elements
header_box = BoundingBox(0, 1000, 740, 1000)
footer_box = BoundingBox(0, 1000, 0, 50)
for page in range(1, doc.number_of_pages):
    content -= content.filter_partially_within_bounding_box(header_box, page)
    content -= content.filter_partially_within_bounding_box(footer_box, page)

In [None]:
# Extract module base addresses
mod_base_addr_before = content.filter_by_text_contains("Peripheral Bridge 0 slot assignments")[0]
mod_base_addr_after = content.filter_by_text_contains("PMC register base address is 0xFFFA0400")[0]

mod_base_addr_table_elements = content.between(mod_base_addr_before, mod_base_addr_after).filter_by_fonts("PHUAOA+HelveticaLTStd-Bold,9.0", "PHUAOA+HelveticaLTStd-Roman,9.0")
print(mod_base_addr_before, mod_base_addr_after)
print(mod_base_addr_table_elements)

mod_base_addr_table_data = mod_base_addr_table_elements.filter_by_font("PHUAOA+HelveticaLTStd-Roman,9.0")[:-1]
visualise(doc, 372, elements=content)#, elements=mod_base_addr_table_data)

t = tables.extract_simple_table(mod_base_addr_table_data, as_text=True)

base_addr_strings = {}
modules = []
for d in t:
    modules.append(Module(
        name=d[0],
        base_addr=int(d[1].replace("_", ""), 16)
    ))
    base_addr_strings[d[0]] = d[1]

for m in modules:
    print(m)

<PDFElement tags: set(), font: 'PHUAOA+HelveticaLTStd-Bold,12.0'> <PDFElement tags: set(), font: 'PHUAOA+HelveticaLTStd-Roman,9.0'>
<ElementList of 516 elements>


TableExtractionError: Element not found, there appears to be a gap in the table. If this is expected, pass allow_gaps=True.

In [29]:
# Add regions (manual input for now)
dev.regions = [
    Region("Program/Data Flash", 0x3fff, 0x1000000 - 0x3fff, True, True, True, False),
    Region("Flash Emulation Mapping", 0x1000000, 0x40000000 - 0x1000000,True, True, True, False),
    Region("SRAM", 0x40000000, 0xC0000000 - 0x40000000, True, True, False, False),
]

In [30]:
# Register map (table A-4)
reg_map_before = content.filter_by_text_contains("Table A-4. MPC5668x Detailed Register Map")[0]
reg_map_after = content.filter_by_text_contains("In this column, R/W indicates a read/write register")[0]
reg_map_elements = content.between(reg_map_before, reg_map_after)

# Find start elements of each module in this table
module_elements = {}
for m in modules:
    tmp_list = reg_map_elements.filter_by_text_contains(base_addr_strings[m.name])
    if len(tmp_list) > 0:
        module_elements[m.name] = tmp_list[0]

dev.modules = []        
# Extract all registers for the module
for i, m in enumerate(modules):
    # Skip non-peripherals
    if i <= 2:
        continue
    
    print(i, m)
    if m.name not in module_elements.keys():
        continue
    if m.name in module_elements.keys() and i < (len(modules) - 1):
        tbl_contents = content.between(module_elements[m.name], module_elements[modules[i+1].name]).filter_by_font("IOCJJB+Helvetica,9.0")
    elif m.name in module_elements.keys() and i == len(modules) - 1:
        tbl_contents = content.between(module_elements[m.name], reg_map_after).filter_by_font("IOCJJB+Helvetica,9.0")
    
    if len(tbl_contents) == 0:
        continue
    
    # Get rid of "reserved" lines
    res_elements = tbl_contents.filter_by_text_contains("Reserved")
    for res_element in res_elements:
        tbl_contents -= tbl_contents.horizontally_in_line_with(res_element)
    tbl_contents -= res_elements
    
    # Extract table
    tbl = tables.extract_simple_table(tbl_contents, as_text=True, allow_gaps=True)    
    LCBCR15_seen = False
    MBCCFR13_seen = False
    MBCCFR14_seen = False
    MBCCFR26_seen = False
    for i, r in enumerate(tbl):
        try:
            # Errata!
            if r[0] == "00x040":
                r[0] = "0x0040"
            if r[0] == "8C0x03":
                r[0] = "0x038C"
            r[0] = r[0].replace('h', '')
            r[0] = r[0].replace('O', '0')
                        
            # Quirks
            if r[1] == 'FlexRay memory':
                continue
                        
            if r[0] == '0x0D78–0x0D7B':
                r[0] = '0x0D78'
            
            # Parse addr
            addr = int(r[0], 16) + m.base_addr
            
            if 'EMIOS_CCNTR' in r[1] and addr % 16 == 6:
                addr += 2
                
            # Quirks
            bit_width = 32
            if '—' in r[3] or not 'x' in r[3] or len(r[3]) == 0:
                if i + 1 < len(tbl):
                    next_addr = int(tbl[i+1][0], 16) + m.base_addr
                    if next_addr - addr in [1, 2, 4]:
                        bit_width = {1: 8, 2: 16, 4: 32}[next_addr - addr]
            else:
                clean_r3 = r[3].replace('_', '')
                bit_width = {10: 32, 6: 16, 4:8}[len(clean_r3)]
            
            if '—' in r[3] or 'U' in r[3] or len(r[3]) == 0:
                default = None
                
            else:
                clean_r3 = r[3].replace('_', '')                
                default = int(clean_r3, 16)

            
            # Other parsing
            name_desc = r[1].replace('–', '—')            
            name = name_desc.split('—')[0]
            
            # Quirk
            if name == "LCBCR15":
                if LCBCR15_seen:
                    continue
                LCBCR15_seen = True
                
            # Errata
            if name == "EMIOS_CCNTR[11]":
                addr += 0x20

            if name == "MBCCFR13":
                if MBCCFR13_seen:
                    name = "MBCCFR113"
                MBCCFR13_seen = True
            if name == "MBCCFR14":
                if MBCCFR14_seen:
                    name = "MBCCFR114"
                MBCCFR14_seen = True
            if name == "MBCCFR26":
                if MBCCFR26_seen:
                    name = "MBCCFR126"
                MBCCFR26_seen = True

                
            description = '—'.join(name_desc.split('—')[1:])
            
            # Errata
            if name == 'UT0':
                if '1' in description:
                    name = 'UT1'
                if '2' in description:
                    name = 'UT2'
            
            read_allowed = 'R' in r[2]
            write_allowed = 'W' in r[2]

            # Phew, we have everything
            m.registers.append(Register(
                name.strip(),
                addr,
                description=description.strip(),
                read_allowed=read_allowed,
                write_allowed=write_allowed,
                reset_value=default,
                size_bits=bit_width,
                extras={'description_page': r[4]}
            ))
        except Exception as e:
            print(f"CANNOT HANDLE: {r}", str(e))
    
    dev.modules.append(m)
    print(f"Module '{m.name}': {len(m.registers)} registers")

print(f"Total: {sum(map(lambda m: len(m.registers), dev.modules))} registers")            

3 Module 'MLB_DIM Configuration': Base address: 0xc3f84000
Module 'MLB_DIM Configuration': 90 registers
4 Module 'I2C_C': Base address: 0xc3f88000
Module 'I2C_C': 6 registers
5 Module 'I2C_D': Base address: 0xc3f8c000
Module 'I2C_D': 6 registers
6 Module 'DSPI_C': Base address: 0xc3f90000
Module 'DSPI_C': 28 registers
7 Module 'DSPI_D': Base address: 0xc3f94000
Module 'DSPI_D': 28 registers
8 Module 'eSCI_J': Base address: 0xc3fa0000
Module 'eSCI_J': 12 registers
9 Module 'eSCI_K': Base address: 0xc3fa4000
Module 'eSCI_K': 12 registers
10 Module 'eSCI_L': Base address: 0xc3fa8000
Module 'eSCI_L': 12 registers
11 Module 'eSCI_M': Base address: 0xc3fac000
Module 'eSCI_M': 12 registers
12 Module 'FlexRay': Base address: 0xc3fdc000
Module 'FlexRay': 624 registers
13 Module 'AXBS': Base address: 0xfff04000
Module 'AXBS': 13 registers
14 Module 'Sema4': Base address: 0xfff10000
CANNOT HANDLE: ['0x0104', 'SEMA4_RSTNTF—Semaphores reset IRQ notification', 'R/W', '0x00000', '14.3.2.5/14-8'] 7
Mo

In [8]:
print(dev.dump())

!Device
manufacturer: NXP
part_number: MPC5668x
architecture: PowerPC
bit_width: 32
endianness: little
regions:
- !Region
  name: Program/Data Flash
  base_addr: 0x3fff
  size: 0xffc001
- !Region
  name: Flash Emulation Mapping
  base_addr: 0x1000000
  size: 0x3f000000
- !Region
  name: SRAM
  base_addr: 0x40000000
  size: 0x80000000
modules:
- !Module
  name: MLB_DIM Configuration
  base_addr: 0xc3f84000
  registers:
  - !Register
    name: DCCR
    addr: 0xc3f84000
    size_bits: 32
    description: Device Control Configuration Register
    read_allowed: true
    write_allowed: true
    description_page: 26.3.2.1/26-8
  - !Register
    name: SSCR
    addr: 0xc3f84004
    size_bits: 32
    description: System Status Configuration Register
    read_allowed: true
    write_allowed: true
    description_page: 26.3.2.2/26-10
  - !Register
    name: SDCR
    addr: 0xc3f84008
    size_bits: 32
    description: System Data Configuration Register
    read_allowed: true
    write_allowed: fals

In [31]:
# Commit to file
with open(DEFINITIONS_DIR + f"/{dev.manufacturer}/{dev.part_number}.yaml", 'w') as f:
    f.write(dev.dump())