In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
from __future__ import division
import os,sys,inspect
import re
import pandas as pd
import sqlite3
import operator

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
data_dir = os.path.join(parentdir, 'data')
job_alpha = os.path.join(parentdir, 'src')
db_path = os.path.join(job_alpha, 'job_alpha', 'jobs.db')
conn = sqlite3.connect(db_path, timeout=10)

In [3]:
query = '''
    SELECT *
    FROM jobs
'''

jobs = pd.read_sql(query, conn)
for tick in jobs['ticker'].unique():
    print('-------', tick, '--------')
    bhp = jobs[jobs['ticker'] == tick]
    bhp_grp = bhp.groupby(['category', 'subcategory'])
    for name, group in bhp_grp:
        print(name[0],'-', name[1], len(group))
    

------- bhp --------
Construction - Contracts Management 2
Education & Training - Workplace Training & Assessment 1
Engineering - Management 1
Engineering - Mechanical Engineering 1
Engineering - Other 2
Engineering - Project Management 1
Information & Communication Technology - Consultants 1
Marketing & Communications - Internal Communications 1
Mining, Resources & Energy - Health, Safety & Environment 2
Mining, Resources & Energy - Management 2
Mining, Resources & Energy - Mining - Drill & Blast 1
Mining, Resources & Energy - Mining - Engineering & Maintenance 13
Mining, Resources & Energy - Mining - Exploration & Geoscience 2
Mining, Resources & Energy - Mining - Operations 6
Mining, Resources & Energy - Other 1
Sales - Account & Relationship Management 1
Trades & Services - Automotive Trades 1
Trades & Services - Electricians 1
Trades & Services - Fitters, Turners & Machinists 1
Trades & Services - Other 2
------- rio tinto --------
Accounting - Management Accounting & Budgeting 1


In [4]:
for tick in jobs['ticker'].unique():
    print('-------', tick, '--------')
    bhp = jobs[jobs['ticker'] == tick]
    bhp_grp = bhp.groupby(['location'])
    for name, group in bhp_grp:
        areas = [a for a in group['area'].unique() if a is not None]
        print(name,areas, len(group))

------- bhp --------
Adelaide [] 1
Brisbane ['CBD & Inner Suburbs'] 6
Coober Pedy & Outback SA [] 2
Geraldton, Gascoyne & Midwest [] 1
Mackay & Coalfields [] 6
Newcastle, Maitland & Hunter [] 6
Perth ['CBD, Inner & Western Suburbs', 'Northern Suburbs & Joondalup'] 6
Port Hedland, Karratha & Pilbara [] 15
------- rio tinto --------
Brisbane [] 3
Broome & Kimberley [] 1
Bundaberg & Wide Bay Burnett [] 1
Gladstone & Central QLD [] 1
Perth ['CBD, Inner & Western Suburbs'] 10
Port Hedland, Karratha & Pilbara [] 5
Sydney [] 1
------- xero --------
ACT [] 1
Adelaide [] 1
Brisbane ['CBD & Inner Suburbs', 'Northern Suburbs'] 3
Gold Coast [] 1
Melbourne ['CBD & Inner Suburbs', 'Northern Suburbs', 'Eastern Suburbs', 'Bayside & South Eastern Suburbs'] 10
Perth ['CBD, Inner & Western Suburbs', 'Fremantle & Southern Suburbs'] 2
Sydney ['Parramatta & Western Suburbs', 'North West & Hills District', 'South West & M5 Corridor', 'CBD, Inner West & Eastern Suburbs', 'North Shore & Northern Beaches'] 8
--

In [5]:
bhp = jobs[jobs['ticker'] == 'bhp']
title = bhp['title'].values
head = bhp['header'].values
sub = bhp['shortdescription'].values
job_titles = list(zip(title, head, sub))
job_titles[0]

('Superintendent Mining Production | South Flank',
 'Flexible work arrangements available. Leadership role on a $4.5 billion project. Help build and shape the culture of a new team',
 'Join our South Flank leadership team to help drive results in accordance to the mine plan whilst fostering a safe working environment')

In [6]:
import re

def get_title(job_title):
    if job_title is None:
        return ''
    job_title = job_title.replace('&', 'and')
    return job_title.replace(' - ', '|').split('|')[0].strip().lower()

bhp = jobs[jobs['ticker'] == 'rio tinto']
titles = [get_title(t) for t in bhp['title'].values]
titles

['specialist',
 'senior advisor, cyber risk and advisory',
 'manager shutdown and engineering',
 'senior engineer mining',
 'principal advisor ba',
 'shift coordinator',
 'blast hole drill fitter (fifo)',
 'full time crane operators karratha fifo/local',
 'drill fitter',
 'drill fitter',
 'rail business analyst',
 'training coordinator',
 'training coordinator',
 'electrical training coordinator',
 'maintenance technician',
 'principal advisor',
 'riggers',
 'shutdown planner',
 'boilermaker',
 'carpenter',
 'superintendent supply',
 'specialist']

In [9]:

def calc_weight(tokens, weight):
    ''' weight the position based on the relative tier weights '''
    default = 30
    scaler = 5
    if len(tokens) < 1:
        return default
    if len(tokens) == 1:
        if len(tokens[0]) > 1:
            # +/- scaler
            return weight[0] + (len(tokens[0]) * scaler) 
        return weight[0]
    if len(tokens) > 1:
        # avg
        return sum(weight)/len(weight)
        
def get_job_rank():
    # job tiers
    t1 = ['executive', 'chief', 'advisor']
    t2 = ['senior', 'superintendent', 'engineer', 'principal', 'manager']
    t3 = ['supervisor', 'analyst', 'intermediate']
    t4 = ['specialist', 'technician', 'coordinator' ]
    t5 = ['junior', 'administrator', 'clerk', 'assistant']

    detier = ['trainee', 'student', 'graduate']

    # weights
    t1_weight = 90
    t2_weight = 70
    t3_weight = 50
    t4_weight = 30
    t5_weight = 10
    weights = [t1_weight, t2_weight, t3_weight, t4_weight, t5_weight]
    ranks = []
    for title in titles:
        #print('----', title, '----')
        tokens = []
        weight = []
        for i, tier in enumerate([t1, t2, t3, t4, t5]):
            match = r'\b(?:{})\b'.format('|'.join([t + 's?' for t in tier]))
            #print(match)
            token = re.findall(match, title)
            if not token:
                continue
            tokens.append(token)
            weight.append(weights[i])
        #print(tokens)
        #print(weight)
        #print(title, calc_weight(tokens, weight)/100)
        ranks.append((title, calc_weight(tokens, weight)/100))

    ranks.sort(key=operator.itemgetter(1), reverse=True)
    return ranks



[('senior advisor, cyber risk and advisory', 0.8),
 ('senior engineer mining', 0.8),
 ('principal advisor ba', 0.8),
 ('principal advisor', 0.8),
 ('manager shutdown and engineering', 0.7),
 ('superintendent supply', 0.7),
 ('rail business analyst', 0.5),
 ('specialist', 0.3),
 ('shift coordinator', 0.3),
 ('blast hole drill fitter (fifo)', 0.3),
 ('full time crane operators karratha fifo/local', 0.3),
 ('drill fitter', 0.3),
 ('drill fitter', 0.3),
 ('training coordinator', 0.3),
 ('training coordinator', 0.3),
 ('electrical training coordinator', 0.3),
 ('maintenance technician', 0.3),
 ('riggers', 0.3),
 ('shutdown planner', 0.3),
 ('boilermaker', 0.3),
 ('carpenter', 0.3),
 ('specialist', 0.3)]