In [63]:
# Imports
import pandas as pd
import numpy as np
from urllib.request import urlopen
import json

In [52]:
# only keep the relevant columns in df
df = pd.read_csv("listings2019_2022.csv")
df = df[['jobTitle', 'jobClassification', 'jobSubClassification', 'advertiserName', 'advertiserId', 'listingDate', 'state']]
df.rename(columns = {'advertiserName': 'companyName'}, inplace=True)

In [53]:
def state_initials(state):
    if state == 'Australian Capital Territory':
        state = 'ACT'
    elif state == 'New South Wales':
        state = 'NSW'
    elif state == 'South Australia':
        state = 'SA'
    elif state == 'Tasmania':
        state = 'TAS'
    elif state == 'Western Australia':
        state = 'WA'
    elif state == 'Northern Territory' or state == 'Northern Territories':
        state = 'NT'
    elif state == 'Queensland':
        state = 'QLD'
    elif state == 'Victoria':
        state = 'VIC'
    else:
        state = state
    return state

In [54]:
# make states more readable
df['state'] = df.apply(lambda x: state_initials(x['state']), axis = 1)

In [55]:
# Clean unwanted states
overseas_indices = df[df['state'] == 'Overseas']
df = df.drop(overseas_indices.index, axis=0)

ukireland_indices = df[df['state'] == 'UK & Ireland']
df = df.drop(ukireland_indices.index, axis=0)

In [59]:
# Create new column called state code
df['state code'] = 0

In [60]:
def state_code(state):
    if state == 'NSW':
        code = 1
    elif state == 'VIC':
        code = 2
    elif state == 'QLD':
        code = 3
    elif state == 'SA':
        code = 4
    elif state == 'WA':
        code = 5
    elif state == 'TAS':
        code = 6
    elif state == 'NT':
        code = 7
    elif state == 'ACT':
        code = 8
    else:
        code = 0
    return code

In [61]:
df['state code'] = df.apply(lambda x: state_code(x['state']), axis = 1)

In [85]:
NSW_count = df.loc[df.state == 'NSW', 'states'].count()
VIC_count = df.loc[df.state == 'VIC', 'states'].count()
SA_count = df.loc[df.state == 'SA', 'states'].count()
TAS_count = df.loc[df.state == 'TAS', 'states'].count()
WA_count = df.loc[df.state == 'WA', 'states'].count()
NT_count = df.loc[df.state == 'NT', 'states'].count()
QLD_count = df.loc[df.state == 'QLD', 'states'].count()
ACT_count = df.loc[df.state == 'ACT', 'states'].count()


In [86]:
data = {'states': ['NSW', 'VIC', 'QLD', 'SA', 'WA', 'TAS', 'NT', 'ACT'],
        'job listings': [NSW_count,VIC_count,QLD_count,SA_count,WA_count,TAS_count,NT_count,ACT_count],
        'state code': [1,2,3,4,5,6,7,8]}

df_2 = pd.DataFrame(data)
df_2

Unnamed: 0,states,job listings,state code
0,NSW,1698,1
1,VIC,1088,2
2,QLD,366,3
3,SA,144,4
4,WA,269,5
5,TAS,10,6
6,NT,7,7
7,ACT,315,8


In [68]:
with urlopen('https://raw.githubusercontent.com/rowanhogan/australian-states/master/states.geojson') as response:
    aus_states = json.load(response)

In [88]:
print(aus_states["features"][0]["properties"])

{'STATE_CODE': '1', 'STATE_NAME': 'New South Wales'}


In [69]:
import plotly.express as px

In [107]:
fig = px.choropleth(df_2,
                    geojson = aus_states,
                    locations = 'state code',
                    color = 'job listings',
                    featureidkey = 'properties.STATE_CODE', 
                    color_continuous_scale = "ylorrd",
                    labels = {'states': 'job listings'},
                    hover_name = 'states',
                    hover_data = ['job listings'])

fig.update_geos(fitbounds = 'locations', visible = False)
fig.update_layout(margin = {"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()

In [111]:
fig = px.choropleth_mapbox(df_2,
                    geojson = aus_states,
                    locations = 'state code',
                    color = 'job listings',
                    featureidkey = 'properties.STATE_CODE', 
                    color_continuous_scale = px.colors.diverging.balance,
                    labels = {'states': 'job listings'},
                    hover_name = 'states',
                    hover_data = ['job listings'],
                    mapbox_style = 'carto-positron',
                    center = {'lat': -25, 'lon': 134},
                    zoom = 2, opacity = 0.5)

fig.show()