# Data Pre-Processing

## Location Data

**Needed Fields:**
- Postcode : int
- Locality : str (suburb)
- State : str
- Long : float
- Lat : float

In [1]:
import pandas as pd

postcode_df = pd.read_excel("Datasets/australian_postcodes.xlsx")

postcode_df = postcode_df[['Postcode', 'Locality', 'State', 'Long', 'Lat']]

postcode_df

Unnamed: 0,Postcode,Locality,State,Long,Lat
0,200,ANU,ACT,149.119000,-35.277700
1,200,Australian National University,ACT,149.118900,-35.277700
2,800,DARWIN,NT,130.836680,-12.458684
3,800,DARWIN CITY,NT,130.836680,-12.458684
4,801,DARWIN,NT,130.836680,-12.458684
...,...,...,...,...,...
18521,9013,BRISBANE,QLD,152.823141,-27.603479
18522,9015,BRISBANE,QLD,152.823141,-27.603479
18523,9464,NORTHGATE MC,QLD,153.074982,-27.397055
18524,9726,GOLD COAST MC,QLD,153.412197,-28.008783


In [2]:

postcode_filter = postcode_df.loc[:, ['Postcode', 'Locality', 'State', 'Long', 'Lat']].copy()
postcode_filter = postcode_filter.drop_duplicates(subset=['Postcode', 'Long', 'Lat'], keep="first")

postcode_filter.loc[:, 'Postcode'] = postcode_filter['Postcode'].astype(str)
postcode_filter.loc[:, 'Locality'] = postcode_filter['Locality'].astype(str)
postcode_filter.loc[:, 'State'] = postcode_filter['State'].astype(str)

postcode_filter['Postcode'] = postcode_filter['Postcode'].str.zfill(4)
postcode_filter['Locality'] = postcode_filter['Locality'].str.upper()
postcode_filter['State'] = postcode_filter['State'].str.upper()

postcode_filter.head(5)

Unnamed: 0,Postcode,Locality,State,Long,Lat
0,200,ANU,ACT,149.119,-35.2777
1,200,AUSTRALIAN NATIONAL UNIVERSITY,ACT,149.1189,-35.2777
2,800,DARWIN,NT,130.83668,-12.458684
4,801,DARWIN,NT,130.83668,-12.458684
5,803,WAGAIT BEACH,NT,130.745908,-12.433991


## State List Master Table

In [3]:
state_list = postcode_filter['State'].drop_duplicates().reset_index()
state_list['state_id'] = ['STATE{:02d}'.format(i+1) for i in range(len(state_list))]

state_list = state_list[['state_id', 'State']]

full_state = {"ACT": "AUSTRALIAN CAPITAL TERRITORY",
              "NT": "NORTHERN TERRITORY",
              "SA": "SOUTH AUSTRALIA",
              "WA": "WESTERN AUSTRALIA",
              "NSW": "NEW SOUTH WALES",
              "QLD": "QUEENSLAND",
              "VIC": "VICTORIA",
              "TAS": "TASMANIA"}

state_list['state_name'] = state_list['State'].map(full_state)
state_list["state_code"] = state_list["State"]

state_list = state_list.drop(["State"], axis='columns')
state_list

Unnamed: 0,state_id,state_name,state_code
0,STATE01,AUSTRALIAN CAPITAL TERRITORY,ACT
1,STATE02,NORTHERN TERRITORY,NT
2,STATE03,SOUTH AUSTRALIA,SA
3,STATE04,WESTERN AUSTRALIA,WA
4,STATE05,NEW SOUTH WALES,NSW
5,STATE06,QUEENSLAND,QLD
6,STATE07,VICTORIA,VIC
7,STATE08,TASMANIA,TAS
