This Jupyter notebook handles the generation of all register enum values based on a "Modbus Interface Definitions"-PDF file provided by Huawei and the existing register enum class content for remapping already existing values.

Input parameters:

- `pdf_path`: Path to the "Modbus Interface Definitions"-PDF file
- `register_pages`: Page range where the tables of one register (e.g. "Inverter Equipment Register") are located in the PDF
- `old_registers_file_path`: Path of the text file containing the current register enum values
- `output_file_path`: Path of the resulting text-file containing the generated register enum values

The process flow consists of the following steps:

1. Parse the "Modbus Interface Definitions"-PDF into a pandas DataFrame
    1. The pages set in `register_pages` of the "Modbus Interface Definitions"-PDF file set in `pdf_path` are read by tabula to extract the contained table contents
    2. All found tables are merged into one, unnecessary columns are dropped, all columns are named accordingly and empty rows are dropped
    3. Carriage-returns are removed from the _name_-column
    4. The values in the columns _type_, _unit_ and _gain_ are mapped
    5. The table is sorted along the _address_ values
    6. The table is displayed for examination
2. Parse the current register enum values into a pandas DataFrame
    1. The file set in `old_registers_file_path` is opened for reading
    2. Each line is manipulated to represent a CSV-line
    3. All CSV-lines are read into a pandas DataFrame, all columns are named accordingly
    4. The table is displayed for examination
3. DataFrames are joined
    1. The both DataFrames from step 1 and 2 are left-joined on the _address_-columns
    2. The table is displayed for examination
4. Generated register enum values are exported to output file
    1. Each row in the joined column is converted to the target format for a register enum value
    2. All rows are exported to the file set in `output_file_path`

The content of the output file can be copied over to the respective register enum class in `registers.py` (e.g. `InverterEquipmentRegister`). Some manual post-processing might be required.

In [21]:
import pandas as pd
import tabula
from IPython.display import display

pdf_path = '../docs/modbus.pdf'
register_pages = '78-80'
old_registers_file_path = 'input/registers.txt'
output_file_path = 'output/powermeter.txt'

#------------------------------------------------
datatype_mapping = {
    'STR': 'STRING',
    'String': 'STRING',
    'STRING': 'STRING',
    'U16': 'UINT16_BE',
    'UINT16': 'UINT16_BE',
    'ENUM16': 'UINT16_BE',
    'U32': 'UINT32_BE',
    'UINT32': 'UINT32_BE',
    'EPOCHTIME': 'UINT32_BE',
    'I16': 'INT16_BE',
    'INT16': 'INT16_BE',
    'I32': 'INT32_BE',
    'INT32': 'INT32_BE',
    'Bitfield16': 'BITFIELD16',
    'Bitfield32': 'BITFIELD32',
    'DBitfield32': 'BITFIELD32',
    'MLD/Bytes': 'MULTIDATA',
    'MULTIDATA': 'MULTIDATA',
    'BYTES': 'MULTIDATA',
    'Bytes': 'MULTIDATA',
}


unit_mapping = {
    'N/A': "None",
    'NA': "None",
    'Var': "'var'",
    'kVar': "'kvar'",
    'kVarh': "'kvarh'",
    'MÎ©': "'MOhm'",
    'kW': "'W'",
    'Kw': "'W'",
}


gain_mapping = {
    'N/A': "None",
    'NA': "None",
}


def process_name_column(merged_table: pd.DataFrame) -> pd.DataFrame:
    merged_table['name'] = merged_table['name'].str.replace('\r', ' ')

    return merged_table


def process_type_column(merged_table: pd.DataFrame) -> pd.DataFrame:
    merged_table['type'] = merged_table['type'].str.replace('\r', '')
    merged_table['parsed_type'] = merged_table['type'].map(datatype_mapping)

    return merged_table


def process_unit_column(merged_table: pd.DataFrame) -> pd.DataFrame:
    merged_table['unit'] = merged_table['unit'].str.replace('\r', '')
    merged_table['parsed_unit'] = merged_table['unit'].apply(lambda x: unit_mapping.get(x, f'\'{x}\''))

    return merged_table


def process_gain_column(merged_table: pd.DataFrame) -> pd.DataFrame:
    merged_table['parsed_gain'] = merged_table.apply(lambda x: 1 if x['unit'] in ['kW', 'Kw'] else gain_mapping.get(x['gain'], x['gain']), axis=1)

    return merged_table


# Display all rows for examination of data
pd.set_option('display.max_rows', None)

tables = tabula.read_pdf(pdf_path, pages=register_pages, multiple_tables=True, lattice=True, pandas_options={'dtype': str})
if not tables:
    print('No tables found')
    exit(1)
else:
    merged_table = pd.concat(objs=tables, ignore_index=True)
    merged_table.drop(columns={merged_table.columns[0], merged_table.columns[9]}, inplace=True)
    column_mapping = {
        merged_table.columns[0]: 'index',
        merged_table.columns[1]: 'name',
        merged_table.columns[2]: 'mode',
        merged_table.columns[3]: 'type',
        merged_table.columns[4]: 'unit',
        merged_table.columns[5]: 'gain',
        merged_table.columns[6]: 'address',
        merged_table.columns[7]: 'quantity'
    }
    merged_table.rename(columns=column_mapping, inplace=True)
    merged_table.dropna(inplace=True)

    merged_table = process_name_column(merged_table)
    merged_table = process_type_column(merged_table)
    merged_table = process_unit_column(merged_table)
    merged_table = process_gain_column(merged_table)

    merged_table.sort_values(by=['address'], inplace=True)

    # Examination step
    display(merged_table)

Unnamed: 0,index,name,mode,type,unit,gain,address,quantity,parsed_type,parsed_unit,parsed_gain
0,1,Meter status,RO,UINT16,,1,37100,1,UINT16_BE,,1
1,2,Grid voltage (A phase),RO,INT32,V,10,37101,2,INT32_BE,'V',10
2,3,B phase voltage,RO,INT32,V,10,37103,2,INT32_BE,'V',10
3,4,C phase voltage,RO,INT32,V,10,37105,2,INT32_BE,'V',10
4,5,Grid current(A phase),RO,INT32,A,100,37107,2,INT32_BE,'A',100
5,6,B phase current,RO,INT32,A,100,37109,2,INT32_BE,'A',100
6,7,C phase current,RO,INT32,A,100,37111,2,INT32_BE,'A',100
7,8,Active power,RO,INT32,W,1,37113,2,INT32_BE,'W',1
8,9,Reactive power,RO,INT32,Var,1,37115,2,INT32_BE,'var',1
9,10,Power factor,RO,INT16,,1000,37117,1,INT16_BE,,1000


In [22]:
from io import StringIO
import pandas

csv_lines = ''
with open(old_registers_file_path, 'r') as input_file:
    for line in input_file:
        csv_line = line.replace('=', ',').replace('(', ',').replace(')', '').replace(' ', '')
        csv_lines += csv_line

old_registers = pandas.read_csv(StringIO(csv_lines), header=None, names=['name', 'register', 'address', 'quantity', 'type', 'gain', 'unit', 'mode', 'mapping'], dtype=str)

# Examination step
display(old_registers)

Unnamed: 0,name,register,address,quantity,type,gain,unit,mode,mapping
0,MeterType,Register,37125,1,datatypes.DataType.UINT16_BE,1,,AccessType.RO,mappings.MeterType
1,MeterStatus,Register,37100,1,datatypes.DataType.UINT16_BE,1,,AccessType.RO,mappings.MeterStatus
2,MeterModelDetectionResult,Register,37138,1,datatypes.DataType.UINT16_BE,1,,AccessType.RO,mappings.MeterModelDetectionResult
3,APhaseVoltage,Register,37101,2,datatypes.DataType.INT32_BE,10,'V',AccessType.RO,
4,BPhaseVoltage,Register,37103,2,datatypes.DataType.INT32_BE,10,'V',AccessType.RO,
5,CPhaseVoltage,Register,37105,2,datatypes.DataType.INT32_BE,10,'V',AccessType.RO,
6,APhaseCurrent,Register,37107,2,datatypes.DataType.INT32_BE,100,'A',AccessType.RO,
7,BPhaseCurrent,Register,37109,2,datatypes.DataType.INT32_BE,100,'A',AccessType.RO,
8,CPhaseCurrent,Register,37111,2,datatypes.DataType.INT32_BE,100,'A',AccessType.RO,
9,ActivePower,Register,37113,2,datatypes.DataType.INT32_BE,1,'W',AccessType.RO,


In [23]:
joined = merged_table.merge(old_registers, how='left', left_on='address', right_on='address')

# Examination step
display(joined)

Unnamed: 0,index,name_x,mode_x,type_x,unit_x,gain_x,address,quantity_x,parsed_type,parsed_unit,parsed_gain,name_y,register,quantity_y,type_y,gain_y,unit_y,mode_y,mapping
0,1,Meter status,RO,UINT16,,1,37100,1,UINT16_BE,,1,MeterStatus,Register,1,datatypes.DataType.UINT16_BE,1,,AccessType.RO,mappings.MeterStatus
1,2,Grid voltage (A phase),RO,INT32,V,10,37101,2,INT32_BE,'V',10,APhaseVoltage,Register,2,datatypes.DataType.INT32_BE,10,'V',AccessType.RO,
2,3,B phase voltage,RO,INT32,V,10,37103,2,INT32_BE,'V',10,BPhaseVoltage,Register,2,datatypes.DataType.INT32_BE,10,'V',AccessType.RO,
3,4,C phase voltage,RO,INT32,V,10,37105,2,INT32_BE,'V',10,CPhaseVoltage,Register,2,datatypes.DataType.INT32_BE,10,'V',AccessType.RO,
4,5,Grid current(A phase),RO,INT32,A,100,37107,2,INT32_BE,'A',100,APhaseCurrent,Register,2,datatypes.DataType.INT32_BE,100,'A',AccessType.RO,
5,6,B phase current,RO,INT32,A,100,37109,2,INT32_BE,'A',100,BPhaseCurrent,Register,2,datatypes.DataType.INT32_BE,100,'A',AccessType.RO,
6,7,C phase current,RO,INT32,A,100,37111,2,INT32_BE,'A',100,CPhaseCurrent,Register,2,datatypes.DataType.INT32_BE,100,'A',AccessType.RO,
7,8,Active power,RO,INT32,W,1,37113,2,INT32_BE,'W',1,ActivePower,Register,2,datatypes.DataType.INT32_BE,1,'W',AccessType.RO,
8,9,Reactive power,RO,INT32,Var,1,37115,2,INT32_BE,'var',1,ReactivePower,Register,2,datatypes.DataType.INT32_BE,1,'var',AccessType.RO,
9,10,Power factor,RO,INT16,,1000,37117,1,INT16_BE,,1000,PowerFactor,Register,1,datatypes.DataType.INT16_BE,1000,,AccessType.RO,


In [24]:
import csv

joined['new_register'] = joined.apply(lambda x: f'{'"' + x['name_x'] + '"' if pandas.isnull(x['name_y']) else x['name_y']} = Register({x['address']}, {x['quantity_x']}, datatypes.DataType.{x['parsed_type']}, {x['parsed_gain']}, {x['parsed_unit']}, AccessType.{x['mode_x']}, {None if pandas.isnull(x['mapping']) else x['mapping']})', axis=1)
joined.to_csv(output_file_path, columns=['new_register'], index=False, header=False, quoting=csv.QUOTE_NONE, sep=';')