In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

from open_hardware_definitions import *
from open_hardware_definitions.common import DATASHEET_DIR, DEFINITIONS_DIR

from py_pdf_parser import tables
from py_pdf_parser.common import BoundingBox
from py_pdf_parser.loaders import load_file
from py_pdf_parser.visualise import visualise

from collections import namedtuple

from pprint import pprint

In [2]:
# Start Device definition

dev = Device(
    manufacturer='NXP',
    part_number='SPC5604',
    architecture="PowerPC",
    bit_width=32,
    endianness=Endianness.LITTLE   
)


In [3]:
# Load with lower char_margin to fix concatenation of columns, and also specify a custom fuzzy orderer

FUZZY_ROUNDING = 5
def fr(val):
    return round(val / FUZZY_ROUNDING) * FUZZY_ROUNDING

orderer = lambda elements: sorted(
    elements, key=lambda elem: (fr(-elem.y0), fr(elem.x0))
)

doc = load_file(os.path.join(DATASHEET_DIR, "SPC5604BF2CLL6_ref_manual.pdf"), {'char_margin': 1}, element_ordering=orderer)
content = doc.elements

In [4]:
# Filter out header and footer elements
header_box = BoundingBox(0, 1000, 740, 1000)
footer_box = BoundingBox(0, 1000, 0, 80)
for page in range(1, doc.number_of_pages):
    content -= content.filter_partially_within_bounding_box(header_box, page)
    content -= content.filter_partially_within_bounding_box(footer_box, page)

In [9]:
print(content.filter_by_text_contains("Table A-2. Detailed register map"))

<ElementList of 80 elements>


In [49]:
# Extract module base addresses
mod_base_addr_before = content.filter_by_text_contains("Table A-1. Module base addresses")[0]
mod_base_addr_after = content.after(mod_base_addr_before).filter_by_text_contains("Table A-2. Detailed register map")[0]

mod_base_addr_table_elements = content.between(mod_base_addr_before, mod_base_addr_after).filter_by_fonts("JEIOCK+Helvetica-Bold,9.0", "JEIODL+Helvetica,9.0")
mod_base_addr_table_data = mod_base_addr_table_elements.filter_by_font("JEIODL+Helvetica,9.0")

t = tables.extract_simple_table(mod_base_addr_table_data, as_text=True)

base_addr_strings = {}
modules = []
for d in t:
    name = d[0]
    if '(' in name:
        name = name.split('(')[1].split(')')[0]
    modules.append(Module(
        name=name,
        base_addr=int(d[1].replace("_", ""), 16)
    ))
    base_addr_strings[name] = d[1]

for m in modules:
    print(m)

Module 'Code Flash A Configuration': Base address: 0xc3f88000
Module 'Data Flash A Configuration': Base address: 0xc3f8c000
Module 'SIUL': Base address: 0xc3f90000
Module 'WakeUp Unit': Base address: 0xc3f94000
Module 'eMIOS_0': Base address: 0xc3fa0000
Module 'eMIOS_1': Base address: 0xc3fa4000
Module 'SSCM': Base address: 0xc3fd8000
Module 'MC_ME': Base address: 0xc3fdc000
Module 'FXOSC': Base address: 0xc3fe0000
Module 'SXOSC': Base address: 0xc3fe0040
Module 'FIRC': Base address: 0xc3fe0060
Module 'SIRC': Base address: 0xc3fe0080
Module 'FMPLL': Base address: 0xc3fe00a0
Module 'CMU': Base address: 0xc3fe0100
Module 'MC_CGM': Base address: 0xc3fe0370
Module 'MC_RGM': Base address: 0xc3fe4000
Module 'MC_PCU': Base address: 0xc3fe8000
Module 'RTC/API': Base address: 0xc3fec000
Module 'PIT': Base address: 0xc3ff0000
Module 'ADC': Base address: 0xffe00000
Module 'I2C': Base address: 0xffe30000
Module 'LINFlex_0': Base address: 0xffe40000
Module 'LINFlex_1': Base address: 0xffe44000
Modu

In [46]:
# Add regions (manual input for now)
dev.regions = [
    Region("Code Flash", 0x0000, 0x80000, True, True, True, False),
    Region("Code Flash Shadow", 0x200000, 0x204000 - 0x200000, True, True, True, False),
    Region("Code Flash Test", 0x400000, 0x404000 - 0x400000, True, True, True, False),
    Region("Data Flash", 0x800000, 0x810000 - 0x800000, True, True, False, False),
    Region("Data Flash Test", 0xC00000, 0xC04000 - 0xC00000, True, True, False, False),
    Region("Flash Emulation Mapping", 0x1000000, 0x20000000 - 0x1000000,True, True, True, False),
    Region("SRAM", 0x40000000, 0x4000C000 - 0x40000000, True, True, False, False),
]

In [47]:
# Register map (table A-4)
reg_map_before = content.filter_by_text_contains("Table A-2. Detailed register map")[0]
reg_map_after = content.after(reg_map_before).filter_by_text_contains("Appendix B")[0]
reg_map_elements = content.between(reg_map_before, reg_map_after)
print(len(reg_map_elements))
for e in reg_map_elements[:20]:
    print(e.text())

8977
Register description
Register name
Address
Used 
size
Code Flash A Configuration
0xC3F8_8000
Module Configuration Register
CFLASH_MCR
32-bit
Base + 0x0000
Low/Mid Address Space Block Locking Register
CFLASH_LML
32-bit
Base + 0x0004
High Address Space Block Locking Register
CFLASH_HBL
32-bit
Base + 0x0008
Secondary Low/Mid Address Space Block Locking Register
CFLASH_SLL


In [54]:
# Find start elements of each module in this table
module_elements = {}
for m in modules:
    tmp_list = reg_map_elements.filter_by_text_contains(base_addr_strings[m.name])
    if len(tmp_list) > 0:
        module_elements[m.name] = tmp_list[0]
        
for m in module_elements:
    print(m)

Code Flash A Configuration
Data Flash A Configuration
SIUL
WakeUp Unit
eMIOS_0
eMIOS_1
SSCM
MC_ME
FXOSC
SXOSC
FIRC
SIRC
FMPLL
CMU
MC_CGM
MC_RGM
MC_PCU
RTC/API
PIT
ADC
I2C
LINFlex_0
LINFlex_1
LINFlex_2
LINFlex_3
CTU
CAN sampler
MPU
SWT
STM
ECSM
INTC
DSPI_0
DSPI_1
DSPI_2
CAN0
CAN1
CAN2
CAN3
CAN4
CAN5


In [60]:
dev.modules = []        
# Extract all registers for the module
for i, m in enumerate(modules):    
    print(i, m)
    if m.name not in module_elements.keys():
        continue
    if m.name in module_elements.keys() and i < (len(modules) - 1):
        tbl_contents = content.between(module_elements[m.name], module_elements[modules[i+1].name]).filter_by_font("JEIODL+Helvetica,9.0")
    elif m.name in module_elements.keys() and i == len(modules) - 1:
        tbl_contents = content.between(module_elements[m.name], reg_map_after).filter_by_font("JEIODL+Helvetica,9.0")
    
    if len(tbl_contents) == 0:
        print("Nothing for", m.name)
        continue
    
    # Get rid of "reserved" lines
    res_elements = tbl_contents.filter_by_text_contains("Reserved")
    for res_element in res_elements:
        tbl_contents -= tbl_contents.horizontally_in_line_with(res_element)
    tbl_contents -= res_elements
    
    # Extract table
    tbl = tables.extract_simple_table(tbl_contents, as_text=True, allow_gaps=True)    
    for i, r in enumerate(tbl):
        try:            
            # Parse addr
            addr = int(r[3].replace('Base + ', ''), 16) + m.base_addr
                
            # Quirks
            if '8' in r[2]:
                bit_width = 8
            elif '16' in r[2]:
                bit_width = 16
            elif '32' in r[2]:
                bit_width = 32
            elif '128' in r[2]:
                bit_width = 128
            else:
                print("NO MATCH", r[2])
            
            # Other parsing
            name = r[1].strip()
            description = r[0].strip()

            # Phew, we have everything
            m.registers.append(Register(
                name,
                addr,
                description=description,
                size_bits=bit_width,
            ))
        except Exception as e:
            print(f"CANNOT HANDLE: {r}", str(e))
            break
    
    dev.modules.append(m)
    print(f"Module '{m.name}': {len(m.registers)} registers")

print(f"Total: {sum(map(lambda m: len(m.registers), dev.modules))} registers")            

0 Module 'Code Flash A Configuration': Base address: 0xc3f88000
Module 'Code Flash A Configuration': 18 registers
1 Module 'Data Flash A Configuration': Base address: 0xc3f8c000
Module 'Data Flash A Configuration': 15 registers
2 Module 'SIUL': Base address: 0xc3f90000
Module 'SIUL': 233 registers
3 Module 'WakeUp Unit': Base address: 0xc3f94000
Module 'WakeUp Unit': 9 registers
4 Module 'eMIOS_0': Base address: 0xc3fa0000
Module 'eMIOS_0': 104 registers
5 Module 'eMIOS_1': Base address: 0xc3fa4000
Module 'eMIOS_1': 104 registers
6 Module 'SSCM': Base address: 0xc3fd8000
Module 'SSCM': 5 registers
7 Module 'MC_ME': Base address: 0xc3fdc000
Module 'MC_ME': 62 registers
8 Module 'FXOSC': Base address: 0xc3fe0000
Module 'FXOSC': 1 registers
9 Module 'SXOSC': Base address: 0xc3fe0040
Module 'SXOSC': 1 registers
10 Module 'FIRC': Base address: 0xc3fe0060
Module 'FIRC': 1 registers
11 Module 'SIRC': Base address: 0xc3fe0080
Module 'SIRC': 1 registers
12 Module 'FMPLL': Base address: 0xc3fe00

In [61]:
print(dev.dump())

!Device
manufacturer: NXP
part_number: SPC5604
architecture: PowerPC
bit_width: 32
endianness: little
regions:
- !Region
  name: Code Flash
  base_addr: 0x0
  size: 0x80000
  readable: true
  writable: true
  executable: true
  volatile: false
- !Region
  name: Code Flash Shadow
  base_addr: 0x200000
  size: 0x4000
  readable: true
  writable: true
  executable: true
  volatile: false
- !Region
  name: Code Flash Test
  base_addr: 0x400000
  size: 0x4000
  readable: true
  writable: true
  executable: true
  volatile: false
- !Region
  name: Data Flash
  base_addr: 0x800000
  size: 0x10000
  readable: true
  writable: true
  executable: false
  volatile: false
- !Region
  name: Data Flash Test
  base_addr: 0xc00000
  size: 0x4000
  readable: true
  writable: true
  executable: false
  volatile: false
- !Region
  name: Flash Emulation Mapping
  base_addr: 0x1000000
  size: 0x1f000000
  readable: true
  writable: true
  executable: true
  volatile: false
- !Region
  name: SRAM
  base_add

In [62]:
# Commit to file
with open(DEFINITIONS_DIR + f"/{dev.manufacturer}/{dev.part_number}.yaml", 'w') as f:
    f.write(dev.dump())