In [2]:
import pandas as pd

In [3]:
# load raw data
raw_data = pd.read_csv('postcodes_2021_expanded.csv')

In [4]:
# fill missing values with the preceding non-missing value
raw_data_filled = raw_data.fillna(method='ffill')

  raw_data_filled = raw_data.fillna(method='ffill')


In [30]:
# set year to 2021
raw_data_filled['year'] = 2021
# remove text from postcode
raw_data_filled['postcode'] = raw_data_filled['postcode'].str.replace(', SA', '')
# convert postcode to string
raw_data_filled['postcode'] = raw_data_filled['postcode'].astype(str)
# set year to integer
raw_data_filled['year'] = raw_data_filled['year'].astype(int)
# change values of cultural_background
raw_data_filled = raw_data_filled.replace({'cultural_background': {"Indigenous": "ATSI", "Non-Indigenous": "Other"}})


In [31]:
# load postcode denominator data
postcode_denominator = pd.read_csv('postcode_denominators_SRA.csv')
# set year to integer
postcode_denominator['year'] = postcode_denominator['year'].astype(int)
# get pre-2019 data
postcode_denominator_new = postcode_denominator[postcode_denominator['year'] < 2019]
# convert postcode to string
postcode_denominator_new['postcode'] = postcode_denominator_new['postcode'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  postcode_denominator_new['postcode'] = postcode_denominator_new['postcode'].astype(str)


In [32]:
# get distinct postcode and region pairs
postcode_region = postcode_denominator_new[['postcode', 'region']].drop_duplicates()
# convert postcode_region to dictionary
postcode_region_dict = postcode_region.set_index('postcode')['region'].to_dict()

In [33]:
# add raw_data_filled to postcode_denoms_new, changing year
for year in range(2018, 2023):
    raw_data_filled['year'] = year
    postcode_denominator_new = pd.concat([postcode_denominator_new, raw_data_filled])

In [34]:
# map region to postcode using apply
postcode_denominator_new['region'] = postcode_denominator_new['postcode'].map(postcode_region_dict)

In [35]:
# compute adjusted population
def computeAdjustedPopulation(row):
    if row['year'] > 2018:
        if row['cultural_background'] == 'ATSI':
            return row['population'] * 1.252342134
        else:
            return row['population'] * 1.014124163
    else:
        return row['adjusted_population']

# apply
postcode_denominator_new['adjusted_population'] = postcode_denominator_new.apply(computeAdjustedPopulation, axis=1)

In [38]:
# load seifa data
seifa_2021 = pd.read_csv('seifa_2021.csv')
seifa_2021['year'] = 2021
# convert postcode to string
seifa_2021['postcode'] = seifa_2021['postcode'].astype(str)

# create dictionary of postcode to each of the 4 seifa scores
# Iterate through the DataFrame rows and build the nested dictionary
seifa_dict = {}
for index, row in seifa_2021.iterrows():
    postcode = row['postcode']
    inner_dict = {key: row[key] for key in seifa_2021.columns if key != 'postcode'}
    seifa_dict[postcode] = inner_dict

In [39]:
# function to apply seifa dict values to SEIFA_disadvantage column
def computeSEIFADisadvantage(row):
    if row['year'] > 2018:
        # get seifa_disadvantage from the dict
        postcode = row['postcode']
        if postcode in seifa_dict:
            return seifa_dict[postcode]['SEIFA_disadvantage']
        else:
            return row['SEIFA_disadvantage']
    else:
        return row['SEIFA_disadvantage']
def computeSEIFAadvantage(row):
    if row['year'] > 2018:
        # get SEIFA_advantage_disadvantage from the dict
        postcode = row['postcode']
        if postcode in seifa_dict:
            return seifa_dict[postcode]['SEIFA_advantage_disadvantage']
        else:
            return row['SEIFA_advantage_disadvantage']
    else:
        return row['SEIFA_advantage_disadvantage']
def computeSEIFAeconomic(row):
    if row['year'] > 2018:
        # get SEIFA_socioeconomic from the dict
        postcode = row['postcode']
        if postcode in seifa_dict:
            return seifa_dict[postcode]['SEIFA_economic']
        else:
            return row['SEIFA_economic']
    else:
        return row['SEIFA_economic']
def computeSEIFAeducation(row):
    if row['year'] > 2018:
        # get SEIFA_education from the dict
        postcode = row['postcode']
        if postcode in seifa_dict:
            return seifa_dict[postcode]['SEIFA_education_occupation']
        else:
            return row['SEIFA_education_occupation']
    else:
        return row['SEIFA_education_occupation']

#  apply functions
postcode_denominator_new['SEIFA_disadvantage'] = postcode_denominator_new.apply(computeSEIFADisadvantage, axis=1)
postcode_denominator_new['SEIFA_advantage_disadvantage'] = postcode_denominator_new.apply(computeSEIFAadvantage, axis=1)
postcode_denominator_new['SEIFA_economic'] = postcode_denominator_new.apply(computeSEIFAeconomic, axis=1)
postcode_denominator_new['SEIFA_education_occupation'] = postcode_denominator_new.apply(computeSEIFAeducation, axis=1)

In [40]:
#drop all fields after column index
postcode_denominator_nonSRA = postcode_denominator_new.iloc[:, :11]

In [41]:
# write to csv
postcode_denominator_nonSRA.to_csv('postcode_denominators_2022.csv', index=False)