In [146]:
# Initialize an empty dictionary to store the results
result_dict = {}

# Open the FMT file with the given name for reading
f = open("BATERR.FMT", "r")

# Initialize a counter to track LITERAL occurrences
batch_count = 0
store_count = 0
literal_count = 0
crlf_count = 0

# Loop through each line in the file
for line in f:
  # Check if the first character of the line is a letter (indicating a field)
  if line[0].isalpha():
    
    # Remove any leading and trailing whitespaces from the line
    line = line.strip()
    
    # If the line matches the specific string "RECORD_TYPE BATCH_LINE", skip to the next iteration
    if line == "RECORD_TYPE BATCH_LINE":
        continue
    
    # Find the position of the '#' character (indicating the start of a comment)
    pound_loc = line.find("#")
    
    # If a comment is found, remove it from the line
    if pound_loc != -1:
        line = line[:pound_loc]
    
    # Split the line to extract the key (field name)
    key = line.split()[0]
    
    # Extract the value (field format) by removing the key from the line and stripping whitespaces
    value = line.replace(key, "").strip()
    
    # If the key is "BATCH.BATCH_NUMBER", append the current literal_count to the key and increment the count
    if key == "BATCH.BATCH_NUMBER":
        key = f"{key}{batch_count}"
        batch_count += 1

    # If the key is "STORE.STORE_NUMBER", append the current literal_count to the key and increment the count
    if key == "STORE.STORE_NUMBER":
        key = f"{key}{store_count}"
        store_count += 1  

    # If the key is "LITERAL", append the current literal_count to the key and increment the count
    if key == "LITERAL":
        key = f"{key}{literal_count}"
        literal_count += 1

    # If the key is "CRLF", append the current literal_count to the key and increment the count
    if key == "CRLF":
        key = f"{key}{crlf_count}"
        crlf_count += 1
    
    # Add the key-value pair to the result dictionary
    result_dict[key] = value

    
result_dict

{'BATCH.BATCH_NUMBER0': '9',
 'LITERAL0': '"      "',
 'LITERAL1': '"HDR"',
 'LITERAL2': '"    "',
 'STORE.DIVISION_NUMBER': '3',
 'LITERAL3': '"00"',
 'STORE.STORE_NUMBER0': '4',
 'LITERAL4': '"                "',
 'LITERAL5': '"    "',
 'CRLF0': '2',
 'BATCH.BATCH_NUMBER1': '9',
 'BATCH.HOST_SEQ': '9',
 'LITERAL6': '"0"',
 'BATCH_LINE.TABLE': '2',
 'BATCH_OP': '1',
 'LITERAL7': '"0"',
 'BATCH_LINE.ERR_CODE': '5',
 'LITERAL8': '"00"',
 'STORE.STORE_NUMBER1': '4',
 'PRIMARY_KEY': '21',
 'CRLF1': '2'}

In [147]:
#further processing for length of fields and transforming str nums to ints
for key, value in result_dict.items():
    if value.startswith('"') and value.endswith('"'):
        str_value = value.strip('"')
        if str_value.isdigit():
            result_dict[key] = len(str_value)
        else:
            result_dict[key] = len(str_value)
    elif value.isdigit():
        result_dict[key] = int(value)

In [148]:
hdr_keys = [
    "BATCH.BATCH_NUMBER0",
    "LITERAL0",
    "LITERAL1",
    "LITERAL2",
    "STORE.DIVISION_NUMBER",
    "LITERAL3",
    "STORE.STORE_NUMBER0",
    "LITERAL4",
    "LITERAL5",
    # "CRLF0"
]
hdr_dict  = result_dict
hdr_dict = {key: result_dict[key] for key in hdr_keys}
hdr_dict["BATCH.BATCH_NUMBER0"] = 6 
hdr_dict

{'BATCH.BATCH_NUMBER0': 6,
 'LITERAL0': 6,
 'LITERAL1': 3,
 'LITERAL2': 4,
 'STORE.DIVISION_NUMBER': 3,
 'LITERAL3': 2,
 'STORE.STORE_NUMBER0': 4,
 'LITERAL4': 16,
 'LITERAL5': 4}

In [149]:
table_keys = [
    "BATCH.BATCH_NUMBER1",
    "BATCH.HOST_SEQ",
    "LITERAL6",
    "BATCH_LINE.TABLE",
    "BATCH_OP",
    "LITERAL7",
    "BATCH_LINE.ERR_CODE",
    "LITERAL8",
    "STORE.STORE_NUMBER1",
    "PRIMARY_KEY",
    # "CRLF1"
]
table_dict  = result_dict
table_dict = {key: result_dict[key] for key in table_keys}
table_dict["BATCH.BATCH_NUMBER1"] = 6
table_dict["BATCH.HOST_SEQ"] = 6
# table_dict["PRIMARY_KEY"] = 20
table_dict

{'BATCH.BATCH_NUMBER1': 6,
 'BATCH.HOST_SEQ': 6,
 'LITERAL6': 1,
 'BATCH_LINE.TABLE': 2,
 'BATCH_OP': 1,
 'LITERAL7': 1,
 'BATCH_LINE.ERR_CODE': 5,
 'LITERAL8': 2,
 'STORE.STORE_NUMBER1': 4,
 'PRIMARY_KEY': 21}

In [150]:
import pandas as pd

hdr_df = pd.DataFrame()
pd.set_option('display.colheader_justify', 'center')
f = open("BATERRCT.DAT", "r")

#For header lines:
for line in f:
    if "HDR" in line:
        result_dict = {}
        for key, field_len in hdr_dict.items():
            value = line[:field_len]
            line=line[field_len:]
            if value.isspace():
                value = f'"{value}"'
            result_dict[key]=[value]
        hdr_df = pd.concat([
            pd.DataFrame(result_dict), hdr_df
        ])
        hdr_df.sort_values(by=["BATCH.BATCH_NUMBER0"], ascending=True, inplace=True)
        hdr_df.reset_index(drop=True, inplace=True)
        hdr_df.index = hdr_df.index + 1
hdr_df.to_csv('IMDSD_BATERR.csv')
print(hdr_df)
hdr_df

  BATCH.BATCH_NUMBER0  LITERAL0 LITERAL1 LITERAL2 STORE.DIVISION_NUMBER  \
1        020614        "      "    HDR    "    "            014           
2        020617        "      "    HDR    "    "            014           

  LITERAL3 STORE.STORE_NUMBER0       LITERAL4      LITERAL5  
1    00            0405         "                "  "    "   
2    00            0405         "                "  "    "   


Unnamed: 0,BATCH.BATCH_NUMBER0,LITERAL0,LITERAL1,LITERAL2,STORE.DIVISION_NUMBER,LITERAL3,STORE.STORE_NUMBER0,LITERAL4,LITERAL5
1,20614,""" """,HDR,""" """,14,0,405,""" """,""" """
2,20617,""" """,HDR,""" """,14,0,405,""" """,""" """


In [151]:
table_df = pd.DataFrame()
pd.set_option('display.colheader_justify', 'center')
f = open("BATERRCT.DAT", "r")

# For table lines:
for line in f:
    if "HDR" not in line:
        result_dict = {}
        for key, field_len in table_dict.items():
            value = line[:field_len]
            line=line[field_len:]
            # To keep spaces for field len validation
            # if key == 'PRIMARY_KEY':
            #     value = f'"{value}"'
            if value.isspace():
                value = f'"{value}"'
            result_dict[key]=[value]
        table_df = pd.concat([
            pd.DataFrame(result_dict), table_df
        ])
        table_df.replace('"\n"', float("Nan"), inplace=True)
        table_df.dropna(inplace=True)
        table_df.sort_values(by=["BATCH.BATCH_NUMBER1", "BATCH.HOST_SEQ"], ascending=True, inplace=True)
        table_df.reset_index(drop=True, inplace=True)
        table_df.index = table_df.index + 1
table_df.to_csv('IMDSD_BATERR.csv', mode='a', header=True)
print(table_df)
table_df

    BATCH.BATCH_NUMBER1 BATCH.HOST_SEQ LITERAL6 BATCH_LINE.TABLE BATCH_OP  \
1          020614           000178         S           UP            3      
2          020614           000179         S           UP            3      
3          020614           000180         S           UP            3      
4          020614           000182         S           UP            3      
5          020614           000183         S           UP            3      
..                  ...            ...      ...              ...      ...   
110        020617           000231         S           UP            3      
111        020617           000232         S           UP            3      
112        020617           000233         S           UP            3      
113        020617           000234         S           UP            3      
114        020617           000235         S           UP            3      

    LITERAL7 BATCH_LINE.ERR_CODE LITERAL8 STORE.STORE_NUMBER1  \
1       0 

Unnamed: 0,BATCH.BATCH_NUMBER1,BATCH.HOST_SEQ,LITERAL6,BATCH_LINE.TABLE,BATCH_OP,LITERAL7,BATCH_LINE.ERR_CODE,LITERAL8,STORE.STORE_NUMBER1,PRIMARY_KEY
1,020614,000178,S,UP,3,0,00008,00,0405,0429179 \n
2,020614,000179,S,UP,3,0,00008,00,0405,0452008 \n
3,020614,000180,S,UP,3,0,00008,00,0405,0431181 \n
4,020614,000182,S,UP,3,0,00008,00,0405,0429179 \n
5,020614,000183,S,UP,3,0,00008,00,0405,0445013 \n
...,...,...,...,...,...,...,...,...,...,...
110,020617,000231,S,UP,3,0,00008,00,0405,0473095 \n
111,020617,000232,S,UP,3,0,00008,00,0405,0400004 \n
112,020617,000233,S,UP,3,0,00008,00,0405,0400016 \n
113,020617,000234,S,UP,3,0,00008,00,0405,0400011 \n
