In [1]:
"""
Inputs:
    Tonnes data: Base tonnes for primary industries (agriculture, mining, manufacturing).
    Rates data: Production rates for 19 industries.
    Employment data: Blue-collar employment by FA and industry.
Output:
    A DBF file with calculated production tonnes for each FA across 25 industry categories
    
Key Logic:
    The core calculation follows the formula:
        Production Tonnes(FA,IC) = Blue_Emp(FA,IC) * RATE(FA,IC) + TONNES(FA,IC)
        
    Only agriculture, mining, and manufacturing (IC1-IC3) have base tonnes from prod_tonnes;
    other industries (IC4-IC19) rely solely on employment × rate.
    IC20-IC25 are hardcoded to zero
"""

import pandas as pd
from dbfread import DBF

dbi_data = DBF("./INITFM_PT_Y2056_WFY_Funded_Op2_001.DBF")
lookup_data = DBF("./INITFM_PR_Y2056_WFY_Funded_Op2_001.DBF")
reci_data = DBF("./INITFMDI_BlueCollarFA_Y2056_WFY_Funded_Op2_001.DBF")

In [28]:
tonnes_df = pd.DataFrame(iter(dbi_data))
rates_df = pd.DataFrame(iter(lookup_data))
reci_df = pd.DataFrame(iter(reci_data))

In [29]:
# Create production tonnes dictionary from tonnes file
prod_tonnes = {}
for row in tonnes_df.itertuples():
    fa_index = getattr(row, 'FA_INDEX')
    prod_tonnes[fa_index] = [
        float(getattr(row, 'A_AGRICULTU') or 0),
        float(getattr(row, 'B_MINING') or 0),
        float(getattr(row, 'C_MANUFACTU') or 0)
    ]

prod_rates = {}
for row in rates_df.itertuples():
    fa_index = getattr(row, 'FA_INDEX')
    rates = [
        float(getattr(row, 'A_AGRICULTU', 0) or 0),
        float(getattr(row, 'B_MINING', 0) or 0),
        float(getattr(row, 'C_MANUFACTU', 0) or 0),
        float(getattr(row, 'D_ELECTRICI', 0) or 0),
        float(getattr(row, 'E_CONSTRUCT', 0) or 0),
        float(getattr(row, 'F_WHOLESALE', 0) or 0),
        float(getattr(row, 'G_RETAIL_TR', 0) or 0),
        float(getattr(row, 'H_ACCOMODAT', 0) or 0),
        float(getattr(row, 'I_TRANSPORT', 0) or 0),
        float(getattr(row, 'J_INFORMATI', 0) or 0),
        float(getattr(row, 'K_FINANCIAL', 0) or 0),
        float(getattr(row, 'L_RENTAL_HIR', 0) or 0),
        float(getattr(row, 'M_PROFESSIO', 0) or 0),
        float(getattr(row, 'N_ADMINISTR', 0) or 0),
        float(getattr(row, 'O_PUBLIC_AD', 0) or 0),
        float(getattr(row, 'P_EDUCATION', 0) or 0),
        float(getattr(row, 'Q_HEALTH_CA', 0) or 0),
        float(getattr(row, 'R_ARTS_AND_', 0) or 0),
        float(getattr(row, 'S_OTHER_SER', 0) or 0)
    ]
    prod_rates[fa_index] = rates

output_data = []
for row in reci_df.itertuples():
    fa_ind = getattr(row, 'FA_INDEX')
    from_ic = []
    for ic in range(1, 26):
        if ic <= 19:
            bc_value = float(getattr(row, f'IC{ic}_BC', 0) or 0)
            rate = prod_rates.get(fa_ind, [0]*19)[ic-1]
            tonnes = prod_tonnes.get(fa_ind, [0]*3)[min(ic-1, 2)] if ic <= 3 else 0
            from_ic.append(bc_value * rate + tonnes)
        else:
            from_ic.append(0)

    output_record = {
        'FA_INDEX': fa_ind,
        'FA_LABEL': getattr(row, 'FA_LABEL'),
        **{f'FROM_IC{i+1}': val for i, val in enumerate(from_ic)}
    }
    output_data.append(output_record)

output_df = pd.DataFrame(output_data)

numeric_columns = [col for col in output_df.columns if col.startswith('FROM_IC')]
for col in numeric_columns:
    output_df[col] = output_df[col].round(6)

In [20]:
lookup_df

Unnamed: 0,FA_INDEX,FA_LABEL,A_AGRICULTU,B_MINING,C_MANUFACTU,D_ELECTRICI,E_CONSTRUCT,F_WHOLESALE,G_RETAIL_TR,H_ACCOMODAT,...,P_EDUCATION,Q_HEALTH_CA,R_ARTS_AND_,S_OTHER_SER,IMEX_CR,IMEX_BR,IMEX_LH,ICDR_CR,ICDR_BR,ICDR_LH
0,1.0,Banyule (C) - Heidelberg,333.5332,35.1114,15.5990,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.0,Banyule (C) - North,833.8330,87.7781,38.9965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.0,Bayside (C) - Brighton,338.1894,35.6012,50.5188,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,Bayside (C) - South,338.1894,35.6012,50.5188,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5.0,Boroondara (C) - Camberwell N.,138.9716,14.6296,6.9485,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,246.0,Somerton Integrated Warehousing,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
246,247.0,Somerton General Warehousing,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
247,248.0,Spare terminal freight area 1,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
248,249.0,Spare terminal freight area 2,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
