Data File - Excel


In [None]:
# ---------------------------------------------------------
# Script: Convert Auditor & Liquidator Excel Sheets to CSV
# Description:
#   - Reads two sheets ("REG_AUDITOR_202511" and "LIQUIDATOR_202510")
#     from an Excel file
#   - Cleans data types (string, dates)
#   - Exports results to CSV in destination/excel/
# ---------------------------------------------------------

In [None]:
import os
import pandas as pd

# Input configuration
excel_file_path = "auditor_and_liquidator.xlsx"
auditor_sheetname = "REG_AUDITOR_202511"
liquidator_sheetname = "LIQUIDATOR_202510"

output_folder = "destination/excel"
os.makedirs(output_folder, exist_ok=True)

Read Auditor data

In [None]:
# Read data from "REG_AUDITOR_202511" spreadsheet with header starting on row 3
df = pd.read_excel(excel_file_path, sheet_name=auditor_sheetname, header=2)
print(df.head())

         REGISTER_NAME  REG_AUD_NUM               REG_AUD_NAME  REG_AUD_ACN  \
0  Registered Auditors       339233  A D DANIELI AUDIT PTY LTD  136616610.0   
1  Registered Auditors       486826    ABBOTT, ALASTAIR GORDON          NaN   
2  Registered Auditors       517142       ABBOTT, BELINDA KATE          NaN   
3  Registered Auditors       155449      ABBOTT, DALE GEOFFREY          NaN   
4  Registered Auditors       479572         ABBOTT, DAVID GREG          NaN   

  REG_AUD_START_DT REG_AUD_STATUS REG_AUD_SUSP_DT REG_AUD_ADD_LOCAL  \
0       24/07/2009           APPR             NaN           SYDNEY    
1       10/05/2016           APPR             NaN       EAST PERTH    
2       17/07/2019           APPR             NaN     MALVERN EAST    
3       17/07/1995           APPR             NaN         NARROGIN    
4       19/10/2015           APPR             NaN         BALLARAT    

  REG_AUD_ADD_STATE  REG_AUD_ADD_PCODE REG_AUD_ADD_COUNTRY  
0               NSW             2000.

In [4]:
# Inspect column types
print(df.dtypes)

REGISTER_NAME           object
REG_AUD_NUM              int64
REG_AUD_NAME            object
REG_AUD_ACN            float64
REG_AUD_START_DT        object
REG_AUD_STATUS          object
REG_AUD_SUSP_DT         object
REG_AUD_ADD_LOCAL       object
REG_AUD_ADD_STATE       object
REG_AUD_ADD_PCODE      float64
REG_AUD_ADD_COUNTRY     object
dtype: object


In [None]:
# Convert numeric identifier columns to string
cols_to_str = ['REG_AUD_NUM', 'REG_AUD_ACN', 'REG_AUD_ADD_PCODE']

for col in cols_to_str:
    if col in df.columns:
        df[col] = df[col].astype('Int64').astype(str)

# Convert date columns to datetime (DD/MM/YYYY format)
date_cols = ['REG_AUD_START_DT', 'REG_AUD_SUSP_DT ']

for col in date_cols:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col], errors='coerce', dayfirst=True)
        # df[col] = pd.to_datetime(df[col], format="%Y-%m-%d", errors='coerce')

In [None]:
# Export to CSV
output_file = "auditor.csv"
auditor_output_path = os.path.join(output_folder, output_file)

df.to_csv(auditor_output_path, index=False, date_format='%d/%m/%Y')

print(f"CSV exported successfully to: {auditor_output_path}")

CSV exported successfully to: destination/excel\auditor.csv


Read Liquidator data

In [None]:
# Define schema for data types
liquidator_schema = {
    "REGISTER_NAME": str,
    "LIQ_NUM": str,
    "OFF_LIQ_NUM": str,
    "LIQ_NAME": str,
    "LIQ_START_DT": 'datetime64[ns]',
    "OFF_LIQ_START_DT": 'datetime64[ns]',
    "LIQ_STATUS": str,
    "LIQ_SUSP_DT": 'datetime64[ns]',
    "LIQ_ADD_LOCAL": str,
    "LIQ_ADD_STATE": str,
    "LIQ_ADD_PCODE": str,
    "LIQ_ADD_COUNTRY": str,
    "LIQ_FIRM": str
}

# Read data from "LIQUIDATOR_202510" spreadsheet 
df_liq = pd.read_excel(excel_file_path, 
                       sheet_name=liquidator_sheetname, 
                       header=2, 
                       dtype=liquidator_schema)

# Convert date columns to datetime
date_cols = ["LIQ_START_DT", "OFF_LIQ_START_DT", "LIQ_SUSP_DT"]

for col in date_cols:
    if col in df_liq.columns:
        df_liq[col] = pd.to_datetime(df_liq[col], errors='coerce', dayfirst=True)


# Export to CSV 
output_file = "liquidator.csv"
liquidator_output_path = os.path.join(output_folder, output_file)

df_liq.to_csv(liquidator_output_path, index=False, date_format='%d/%m/%Y')

print(f"Liquidator CSV exported successfully to: {liquidator_output_path}")

Liquidator CSV exported successfully to: destination/excel\liquidator.csv
