In [1]:
import re
import pandas as pd

In [2]:
def replace_row_handle_with_tab(s):
    '''
    This function replaces the row indication in the data file to a tab character for easy splitting of data.
    :param s: raw string from the data file
    :return: string with tab character instead of row indicator, if any
    '''
    s = re.sub(
        "\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \d+. row \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\n",
        "\t", s)
    return s


if __name__ == "__main__":
    file = open("AFM_raw_data.txt")
    lines_in_file = file.read()
    modified_lines = replace_row_handle_with_tab(lines_in_file)
    rows = modified_lines.split("\t")[1:]
    records = list()
    columns = ["alarm_id", "customer_id", "hostname", "fault_severity", "device_contract_level", "status", "created_at",
               "updated_at", "alarm_created", "description", "file_name", "impact", "mgmt_system_addr",
               "mgmt_system_host_name", "mgmt_system_id", "mgmt_system_type", "os", "os_version", "raise_sr",
               "remediation",
               "sr_status", "sr_update_date", "syslog_msg", "tac_number", "tac_severity", "afm_cust_id_fk",
               "prod_obj_id_fk",
               "signature_id", "tac_sr_num_fk", "managed_ne_id", "product_id", "sw_type", "serial_number", "cli_status",
               "email_status", "alarm_source", "alarm_type", "cli_output", "is_collector", "company", "contract_number",
               "isclioutput_compressed_format", "request_id", "sa_id", "secondary_case", "device_id_fk", "solution_id",
               "usecase_id", "signature_uuid"]

    # this loop will create a list of dictionaries having
    # key - the columns name we need,
    # and value - string present after ':' character in data file
    for row in rows:
        attributes = row.split("\n")
        column_index = 0
        dictionary = dict()
        for index in range(len(attributes)):
            attributes[index] = attributes[index].strip()
            key, value = attributes[index].split(":")[0], attributes[index].split(":")[1:]
            if key in columns:
                dictionary[key] = " ".join(value).strip()
                column_index = columns.index(key)
            else:
                dictionary[columns[column_index]] += attributes[index]

        records.append(dictionary)

    afm_data = pd.DataFrame(records)
    afm_data.rename(columns = {'description' : 'fault_description'}, inplace = True)
    afm_data.to_csv("AFM_data.csv", index=False)
    print("Saved csv format")
    print("Columns - {}".format(afm_data.columns))

Saved csv format
Columns - Index(['alarm_id', 'customer_id', 'hostname', 'fault_severity',
       'device_contract_level', 'status', 'created_at', 'updated_at',
       'alarm_created', 'fault_description', 'file_name', 'impact',
       'mgmt_system_addr', 'mgmt_system_host_name', 'mgmt_system_id',
       'mgmt_system_type', 'os', 'os_version', 'raise_sr', 'remediation',
       'sr_status', 'sr_update_date', 'syslog_msg', 'tac_number',
       'tac_severity', 'afm_cust_id_fk', 'prod_obj_id_fk', 'signature_id',
       'tac_sr_num_fk', 'managed_ne_id', 'product_id', 'sw_type',
       'serial_number', 'cli_status', 'email_status', 'alarm_source',
       'alarm_type', 'cli_output', 'is_collector', 'company',
       'contract_number', 'isclioutput_compressed_format', 'request_id',
       'sa_id', 'secondary_case', 'device_id_fk', 'solution_id', 'usecase_id',
       'signature_uuid'],
      dtype='object')


In [3]:
def change_case(str):
    res = [str[0].lower()]
    for c in str[1:]:
        if c in ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
            res.append('_')
            res.append(c.lower())
        else:
            res.append(c)
     
    return ''.join(res)

rmc_data = pd.read_csv("RMC_raw_data.csv", low_memory = False)
new_columns = list()
for column in rmc_data.columns :
    new_columns.append(change_case(column))
    
rmc_data.columns = new_columns
rmc_data.to_csv("RMC_data.csv", index=False)
print("Saved csv format")

Saved csv format
