In [None]:
import fixedfieldreader as ffr
import pandas as pd

First we define the field descriptors, based on the documentation.
The format is
```
(name, width, type)
```
where `name` is the field name, `width` is the field width, and `type` is the field type (`s` for string or `x` for omitted/ignored).
Names for non-omitted fields must be distinct. Names for omitted fields are not used, and can be repeated (e.g., "reserved").

In [None]:
fields = [
    ('reserved', 18, 'x'),
    ('record_type', 1, 's'),
    ('resident_status', 1, 's'),
    ('state_of_occurrence_fips', 2, 's'),
    ('county_of_occurrence_fips', 3, 's'),
    ('reserved', 2, 'x'),
    ('county_of_occurrence_pop', 1, 'x'),
    ('state_of_residence_fips', 2, 's'),
    ('reserved', 2, 'x'),
    ('state_of_residence_recode', 2, 's'),
    ('county_of_residence_fips', 3, 's'),
    ('reserved', 13, 'x'),
    ('county_of_residence_pop', 1, 'x'),
    ('reserved', 7, 'x'),
    ('state_of_birth_recode', 2, 's'),
    ('edu2003', 1, 's'),
    ('eduflag', 1, 's'),
    ('death_mon', 2, 's'),
    ('reserved', 2, 'x'),
    ('sex', 1, 's'),
    ('detailed_age', 4, 's'),
    ('age_sub_flag', 1, 's'),
    ('age_recode_52', 2, 's'),
    ('age_recode_27', 2, 's'),
    ('age_recode_12', 2, 's'),
    ('infant_age_recode_22', 2, 's'),
    ('place_of_death_status', 1, 's'),
    ('marital_status', 1, 's'),
    ('death_day_of_week', 1, 's'),
    ('reserved', 16, 'x'),
    ('data_year', 4, 's'),
    ('work_injury', 1, 's'),
    ('death_manner', 1, 's'),
    ('disposition_manner', 1, 's'),
    ('autopsy', 1, 's'),
    ('reserved', 34, 'x'),
    ('activity_code', 1, 's'),
    ('place_of_injury', 1, 's'),
    ('icd10', 4, 's'),
    ('358_cause_recode', 3, 's'),
    ('reserved', 1, 'x'),
    ('113_cause_recode', 3, 's'),
    ('130_infant_cause_recode', 3, 's'),
    ('39_cause_recode', 2, 's'),
    ('reserved', 1, 'x'),
    ('multiple_conditions', 281, 's'),
    ('reserved', 1, 'x'),
    ('race', 2, 's'),
    ('race_bridged', 1, 's'),
    ('race_imputed', 1, 's'),
    ('race_recode_3', 1, 's'),
    ('race_recode_5', 1, 's'),
    ('reserved', 33, 'x'),
    ('hispanic_origin', 3, 's'),
    ('reserved', 1, 'x'),
    ('hispanic_origin_race_recode', 3, 's'),
]

The fields are used to initialize a factory class instance.

In [None]:
factory = ffr.FixedFieldReaderFactory(*fields)

And the factory is used to make a reader for a file (any iterable of lines) in the specified format.

In [None]:
with open('vs16mort-subset.duscmcpub', 'rb') as file:
    reader = factory.reader(file, usedict=False)
    data = list(reader)

The reader can (must) be iterated to retrieve tuples of fields (if `usedict` is `False`) or dicts of fields (if `usedict` is true). Below, we use it to initialize a dataframe, and further process one of the columns by splitting it into a list.

In [None]:
df = pd.DataFrame(data,
                  columns=[field[0] for field in fields
                           if field[2] != 'x'])
df['multiple_conditions'] = df['multiple_conditions'].str.split(' +')

In [None]:
df.T