# Additional features for modelling feasibility ratings

# 0. Import dependencies and inputs

In [1]:
%run ../notebook_preamble_Transitions.ipy

In [2]:
# Occupational profiles for all 'top level' ESCO occupations analysed in the report
data.occ_report.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1627 entries, 0 to 1626
Data columns (total 31 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        1627 non-null   int64  
 1   concept_type              1627 non-null   object 
 2   concept_uri               1627 non-null   object 
 3   preferred_label           1627 non-null   object 
 4   isco_level_1              1627 non-null   int64  
 5   isco_level_2              1627 non-null   int64  
 6   isco_level_3              1627 non-null   int64  
 7   isco_level_4              1627 non-null   int64  
 8   is_top_level              1627 non-null   bool   
 9   job_zone                  1627 non-null   float64
 10  education_level           1627 non-null   float64
 11  related_work_experience   1627 non-null   float64
 12  on_the_job_training       1627 non-null   float64
 13  annual_earnings           1627 non-null   float64
 14  total_pa

# 1. Various additional features

## 1.1 Education and experience

In [3]:
# Education and experience features
data.occ_report[['id', 'preferred_label', 'education_level', 'related_work_experience', 'on_the_job_training']]

Unnamed: 0,id,preferred_label,education_level,related_work_experience,on_the_job_training
0,1,metal drawing machine operator,2.1163,2.4383,3.6486
1,3,air traffic safety technician,4.3848,5.4808,4.0782
2,4,hospitality revenue manager,6.8261,6.3481,3.8265
3,5,medical laboratory assistant,5.7431,4.5363,3.9548
4,7,primary school teaching assistant,3.6277,3.9765,2.7602
...,...,...,...,...,...
1622,2935,cosmetics production machine operator,2.0478,3.9646,3.1842
1623,2937,demographer,8.3000,5.5000,4.6000
1624,2938,sorter labourer,3.1532,3.3559,2.8314
1625,2940,civil service administrative officer,3.9636,6.9711,3.4248


In [4]:
# For interpretation of the education and experiences values, see O*NET documentation below
df = pd.read_excel(useful_paths.data_dir + 'raw/onet/db_24_2_excel/Education, Training, and Experience Categories.xlsx')
df = df[df['Scale ID'].isin(['RL', 'RW', 'OJ'])]
df

Unnamed: 0,Element ID,Element Name,Scale ID,Scale Name,Category,Category Description
0,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),1,Less than a High School Diploma
1,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),2,High School Diploma - or the equivalent (for e...
2,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),3,Post-Secondary Certificate - awarded for train...
3,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),4,Some College Courses
4,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),5,Associate's Degree (or other 2-year degree)
5,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),6,Bachelor's Degree
6,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),7,Post-Baccalaureate Certificate - awarded for c...
7,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),8,Master's Degree
8,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),9,Post-Master's Certificate - awarded for comple...
9,2.D.1,Required Level of Education,RL,Required Level Of Education (Categories 1-12),10,First Professional Degree - awarded for comple...


## 1.2 Annual earnings

In [5]:
data.occ_report[['id', 'preferred_label', 'annual_earnings', 'total_paid_hours']]

Unnamed: 0,id,preferred_label,annual_earnings,total_paid_hours
0,1,metal drawing machine operator,22031.963636,39.904110
1,3,air traffic safety technician,32754.000000,42.036716
2,4,hospitality revenue manager,38839.816377,36.354676
3,5,medical laboratory assistant,21517.582278,35.246809
4,7,primary school teaching assistant,15945.319226,27.634427
...,...,...,...,...
1622,2935,cosmetics production machine operator,22211.820144,39.965625
1623,2937,demographer,36135.290249,35.494487
1624,2938,sorter labourer,21357.054545,39.443972
1625,2940,civil service administrative officer,33292.986333,35.240566


## 1.3 Sectors
###  Skills-based sectors

In [6]:
# Skills-based sectors and sub-sectors (inferred by clustering the occupations)
data.occ_report[['id', 'preferred_label',
                 'skills_based_sector_code', 'sub_sector_code',
                 'skills_based_sector', 'sub_sector']]

Unnamed: 0,id,preferred_label,skills_based_sector_code,sub_sector_code,skills_based_sector,sub_sector
0,1,metal drawing machine operator,4,4.1,manufacturing workers,metal workers & machine operators
1,3,air traffic safety technician,1,1.4,"technicians, construction, transport & securit...",transport & security workers
2,4,hospitality revenue manager,2,2.4,business & administration workers,business managers
3,5,medical laboratory assistant,5,5.1,engineers & researchers,researchers & science technicians
4,7,primary school teaching assistant,7,7.2,education workers,teachers & childcare workers
...,...,...,...,...,...,...
1622,2935,cosmetics production machine operator,9,9.2,food & tobacco production workers,food & tobacco production operators
1623,2937,demographer,5,5.1,engineers & researchers,researchers & science technicians
1624,2938,sorter labourer,1,1.5,"technicians, construction, transport & securit...",environmental & agriculture workers
1625,2940,civil service administrative officer,2,2.3,business & administration workers,"assistants, clerks & legal workers"


### ISCO occupational groups

In [7]:
# You might also want to try the official ISCO occupational groups (colum name = 'isco_level_{x}' with x \in {1,2,3,4})
data.occ_report[['id', 'preferred_label', 'isco_level_1', 'isco_level_2', 'isco_level_3', 'isco_level_4']]


Unnamed: 0,id,preferred_label,isco_level_1,isco_level_2,isco_level_3,isco_level_4
0,1,metal drawing machine operator,8,81,812,8121
1,3,air traffic safety technician,3,31,315,3155
2,4,hospitality revenue manager,2,24,243,2431
3,5,medical laboratory assistant,3,32,321,3212
4,7,primary school teaching assistant,5,53,531,5312
...,...,...,...,...,...,...
1622,2935,cosmetics production machine operator,8,81,813,8131
1623,2937,demographer,2,21,212,2120
1624,2938,sorter labourer,9,96,961,9612
1625,2940,civil service administrative officer,2,24,242,2422


In [8]:
# For the names of the different ISCO codes, see below
data.isco_titles

Unnamed: 0,isco,isco_title,level
0,1,Managers,1
1,2,Professionals,1
2,3,Technicians and associate professionals,1
3,4,Clerical support workers,1
4,5,Service and sales workers,1
...,...,...,...
604,9621,"Messengers, package deliverers and luggage por...",4
605,9622,Odd job persons,4
606,9623,Meter readers and vending-machine collectors,4
607,9624,Water and firewood collectors,4


# 2. Skills similarity distributions

In [9]:
# Single transition
# You can also specify job_i, job_j with the integer ID or the string of the preferred label (has to match exactly)
job_i = 732
job_j = 1885 
skills_matching = trans_utils.show_skills_overlap(job_i, job_j, skills_match='optional', verbose=True)
skills_matching

from hotel porter (id 732) to shop manager (id 1885)
--------
18/27 destination skills matched
NLP-adjusted overlap = 0.05 (total combined similarity: 0.19)


Unnamed: 0,origin_skill_id,origin_skill,destination_skill_id,destination_skill,similarity,score
0,6288,maintain customer service,12494,monitor customer service,0.908888,0.875
1,9777,implement sales strategies,11989,supervise merchandise displays,0.854552,0.316
2,1020,implement marketing strategies,1737,order supplies,0.825074,0.096
3,4351,handle chemical cleaning agents,11611,apply health and safety standards,0.809382,0.046
4,2861,provide tourism related information,1809,maintain relationship with customers,0.791304,0.019
5,2187,handle customer complaints,7321,measure customer feedback,0.765628,0.005
6,9640,assist clients with special needs,9007,adhere to organisational guidelines,0.757962,0.004
7,11855,handle delivered packages,5560,oversee promotional sales prices,0.734061,0.001
8,6969,run errands on behalf of customers,3659,sales activities,0.717332,0.0
9,3467,provide door security,12591,manage theft prevention,0.710784,0.0


In [10]:
# Create a collection of transitions (e.g. all transitions from hotel porter)
transitions_df = trans_utils.get_transitions(origin_ids=[732, 329], destination_ids='report')
transitions_df = transitions_df[transitions_df.is_safe_desirable].reset_index(drop=True)


Finding all transitions... Done!
This took 0.23 seconds.


In [11]:
# Get all score and similarity distributions for the specified transitions
all_scores = []
all_similarities = []

for r, row in transitions_df.iterrows():
    skills_matching = trans_utils.show_skills_overlap(job_i=row.origin_id, job_j=row.destination_id,
                                                      skills_match='optional', verbose=False, rounding=False)
    all_scores.append(skills_matching.score.to_list())
    all_similarities.append(skills_matching.similarity.to_list())

### Sanity check

In [12]:
print(all_similarities[6])
print(all_scores[6])

[1.0, 1.0, 0.7958164827325414, 0.7608475337042723, 0.7239788976295636, 0.6341723794274423, 0.5655392469211581, 0.45691865107714547]
[0.998498817743263, 0.998498817743263, 0.023911917327422847, 0.004245575368164735, 0.0006743712940804034, 7.569461734580237e-06, 2.4474771327372927e-07, 1.0716487921772212e-09]


In [13]:
transitions_df.loc[[6]]

Unnamed: 0,origin_id,origin_label,destination_id,destination_label,similarity,is_jobzone_ok,is_earnings_ok,is_not_high_risk,is_safer,is_strictly_safe,...,W_work,W_essential_skills,W_optional_skills,W_activities,W_work_context,sim_category,is_viable,is_desirable,is_safe_desirable,is_strictly_safe_desirable
6,329,hotel concierge,2526,funeral attendant,0.394271,True,True,True,True,True,...,0.536609,0.250636,0.25323,0.484054,0.589165,min_viable,True,True,True,True


In [14]:
trans_utils.show_skills_overlap(329, 2526)

from hotel concierge (id 329) to funeral attendant (id 2526)
--------
8/8 destination skills matched
NLP-adjusted overlap = 0.25 (total combined similarity: 0.39)


Unnamed: 0,origin_skill_id,origin_skill,destination_skill_id,destination_skill,similarity,score
0,2052,greet guests,2052,greet guests,1.0,0.998
1,6288,maintain customer service,6288,maintain customer service,1.0,0.998
2,3704,assess cleanliness of areas,736,maintain personal hygiene standards,0.795816,0.024
3,2861,provide tourism related information,7330,provide directions to guests,0.760848,0.004
4,9640,assist clients with special needs,5347,promote human rights,0.723979,0.001
5,2187,handle customer complaints,12424,show diplomacy,0.634172,0.0
6,11720,deliver correspondence,12803,manage funeral equipment,0.565539,0.0
7,11685,take room service orders,4285,transfer coffins,0.456919,0.0
