# Specific examples of transitions data

# 0. Import dependencies and inputs

In [1]:
%run ../../notebook_preamble_Transitions.ipy

In [2]:
# Location to store transitions data
outputs_folder = f'{useful_paths.data_dir}processed/transitions/specific_examples/'

# File name to use for the specific examples
file_name = 'Data_example'

# 1. Prepare transitions data

## 1.0 Specify transitions

### Example 1: Generate transitions

In [16]:
# Example 1: Generate a collection of transitions (e.g. all transitions from hotel porter and hotel concierge)
transitions_df = trans_utils.get_transitions(origin_ids=[732, 329], destination_ids='report')
transitions_df = transitions_df[transitions_df.is_safe_desirable].reset_index(drop=True)
transitions_df.info()

Finding all transitions... Done!
This took 0.04 seconds.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   origin_id                   21 non-null     int64  
 1   origin_label                21 non-null     object 
 2   destination_id              21 non-null     int64  
 3   destination_label           21 non-null     object 
 4   similarity                  21 non-null     float64
 5   is_jobzone_ok               21 non-null     bool   
 6   is_earnings_ok              21 non-null     bool   
 7   is_not_high_risk            21 non-null     bool   
 8   is_safer                    21 non-null     bool   
 9   is_strictly_safe            21 non-null     bool   
 10  job_zone_dif                21 non-null     float64
 11  earnings_ratio              21 non-null     float64
 12  risk_dif                    21 non-nu

### Example 2: Use the validation dataset

In [31]:
# Example 2: Add transition data using a pre-specified list of occupation pairs
validation_data = pd.read_csv(useful_paths.data_dir + 'processed/validation/Transitions_to_validate_BATCH_1.csv')
transition_pairs = [(row.origin_id, row.destination_id) for j, row in validation_data.iterrows()]
validation_data.head(5)

Unnamed: 0,origin_id,origin_label,origin_description,destination_id,destination_label,destination_description,perfectly_matched_skills,partially_matched_skills,unmatched_skills
0,29,legal guardian,Legal guardians legally assist and support min...,2867,recreational therapist,Recreational therapists offer treatment to per...,listen actively,,maintain healthcare user data confidentiality;...
1,29,legal guardian,Legal guardians legally assist and support min...,1036,social counsellor,Social counsellors provide support and guidanc...,listen actively; maintain the trust of service...,encourage counselled clients to examine themse...,reflexion; perform therapy sessions; demonstra...
2,29,legal guardian,Legal guardians legally assist and support min...,2877,social pedagogue,"Social pedagogues provide care, support, and e...",apply person-centred care; support children's ...,support the positiveness of youths; adolescent...,undertake continuous professional development ...
3,29,legal guardian,Legal guardians legally assist and support min...,2654,employment and vocational integration consultant,Employment and vocational integration consulta...,give advice on personal matters; listen actively,facilitate job market access; develop course o...,observe confidentiality; communicate by teleph...
4,29,legal guardian,Legal guardians legally assist and support min...,1803,special educational needs teacher,Special educational needs teachers work with a...,disability care; assess the development of you...,assist children with special needs in educatio...,special needs learning equipment; observe stud...


In [42]:
transition_pairs[0:5]

[(29, 2867), (29, 1036), (29, 2877), (29, 2654), (29, 1803)]

In [39]:
transitions_table = trans_utils.get_transition_data(transition_pairs)

Finding data for all transitions... Done!
This took 31.78 seconds.


In [41]:
transitions_table.head(5)

Unnamed: 0,origin_id,origin_label,destination_id,destination_label,similarity,is_jobzone_ok,is_earnings_ok,is_not_high_risk,is_safer,is_strictly_safe,...,W_work,W_essential_skills,W_optional_skills,W_activities,W_work_context,sim_category,is_viable,is_desirable,is_safe_desirable,is_strictly_safe_desirable
0,29,legal guardian,2867,recreational therapist,0.37022,True,True,True,True,True,...,0.536452,0.203989,0.203989,0.340975,0.731929,min_viable,True,True,True,True
1,29,legal guardian,1036,social counsellor,0.364106,True,True,True,True,True,...,0.654659,0.073114,0.073994,0.51357,0.795748,min_viable,True,True,True,True
2,29,legal guardian,2877,social pedagogue,0.352374,True,True,True,False,False,...,0.579094,0.08609,0.165216,0.374458,0.78373,min_viable,True,True,True,False
3,29,legal guardian,2654,employment and vocational integration consultant,0.333975,True,True,True,True,True,...,0.549595,0.11744,0.11927,0.374365,0.724825,min_viable,True,True,True,True
4,29,legal guardian,1803,special educational needs teacher,0.31981,True,True,True,True,True,...,0.506951,0.019986,0.245352,0.305689,0.708213,min_viable,True,True,True,True


## 1.1 Table of transitions

In [4]:
# Select only the desired columns
transitions_table = transitions_df[[
    'origin_id', 'origin_label', 'destination_id', 'destination_label',
    'similarity', 'W_essential_skills', 'W_optional_skills', 'W_activities', 'W_work_context',
    'sim_category', 'is_viable', 'is_desirable', 'is_safe_desirable']]

transitions_table.sample(5)

Unnamed: 0,origin_id,origin_label,destination_id,destination_label,similarity,W_essential_skills,W_optional_skills,W_activities,W_work_context,sim_category,is_viable,is_desirable,is_safe_desirable
1,732,hotel porter,2526,funeral attendant,0.415513,0.250105,0.250636,0.505965,0.655345,highly_viable,True,True,True
6,329,hotel concierge,2526,funeral attendant,0.394271,0.250636,0.25323,0.484054,0.589165,min_viable,True,True,True
3,329,hotel concierge,732,hotel porter,0.571877,0.665666,0.665705,0.405988,0.55015,highly_viable,True,True,True
4,329,hotel concierge,2465,club host/club hostess,0.532646,0.411235,0.411446,0.738374,0.569529,highly_viable,True,True,True
8,329,hotel concierge,1869,customer experience manager,0.347938,0.23448,0.23838,0.180345,0.738547,min_viable,True,True,True


Notes: The column `similarity` stores the combined similarity measure, i.e. the average of the four different similarity measures described in the Mapping Career Causeways report. These are also provided in the columns `W_{x}`).

Column `sim_category` indicates whether the combined similarity is above 0.4 ('highly viable') or only between 0.3 and 0.4 ('minimally viable').

The columns `is_viable`, `is_desirable`, `is_safe_desirable` should be clear (see the report for more details).

### Export transitions

In [5]:
transitions_table.to_csv(f'{outputs_folder}{file_name}_Transitions.csv', index=False)


## 1.2 Table of occupational profiles

In [6]:
# Select all IDs involved in the transitions
all_ids = set(transitions_table.origin_id.to_list()).union(set(transitions_table.destination_id.to_list()))

In [7]:
# Occupational profiles
occ_profiles = data.occ_report[data.occ_report.id.isin(all_ids)]
occ_profiles.sample(5)

Unnamed: 0,id,concept_type,concept_uri,preferred_label,isco_level_1,isco_level_2,isco_level_3,isco_level_4,is_top_level,job_zone,...,onet_occupation,level_1,level_2,skills_based_sector_code,sub_sector_code,skills_based_sector,sub_sector,remote_labor_index,physical_proximity,exposure_score
977,1746,Occupation,http://data.europa.eu/esco/occupation/9358775a...,travel agency manager,1,14,143,1439,True,4.0,...,sales managers,2,13,3,3.3,sales & services workers,shop & services managers,0.75,0.4775,0.345507
142,246,Occupation,http://data.europa.eu/esco/occupation/13d1b2b4...,customer service representative,4,42,422,4225,True,2.0,...,customer service representatives,2,12,3,3.2,sales & services workers,customer representatives,0.695652,0.575,0.41833
1376,2458,Occupation,http://data.europa.eu/esco/occupation/d18a8976...,lottery operator,4,42,421,4212,True,2.0,...,gaming supervisors,2,11,3,3.1,sales & services workers,"food, cleaning & services workers",,0.8425,
1133,2019,Occupation,http://data.europa.eu/esco/occupation/aaeec9a7...,ICT help desk agent,3,35,351,3512,True,3.0,...,computer user support specialists,2,12,3,3.2,sales & services workers,customer representatives,0.647059,0.6375,0.474342
467,841,Occupation,http://data.europa.eu/esco/occupation/4795d4e3...,advertising media buyer,2,24,243,2431,True,4.0,...,"purchasing agents, except wholesale, retail, a...",5,24,6,6.2,arts & media workers,"journalists, publishers & composers",,0.37,


Please select the columns, which you find are the most relevant. For more information about the column data, please see [here](https://github.com/nestauk/mapping-career-causeways/tree/main/supplementary_online_data/transitions#number-of-transition-options-for-esco-occupations).

In [8]:
occ_profiles.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20 entries, 25 to 1564
Data columns (total 31 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        20 non-null     int64  
 1   concept_type              20 non-null     object 
 2   concept_uri               20 non-null     object 
 3   preferred_label           20 non-null     object 
 4   isco_level_1              20 non-null     int64  
 5   isco_level_2              20 non-null     int64  
 6   isco_level_3              20 non-null     int64  
 7   isco_level_4              20 non-null     int64  
 8   is_top_level              20 non-null     bool   
 9   job_zone                  20 non-null     float64
 10  education_level           20 non-null     float64
 11  related_work_experience   20 non-null     float64
 12  on_the_job_training       20 non-null     float64
 13  annual_earnings           20 non-null     float64
 14  total_pai

### Export profiles

In [9]:
occ_profiles.to_csv(f'{outputs_folder}{file_name}_Occupation_profiles.csv', index=False)


## 1.3 Skills matching data for specific transitions

In [15]:
# Choose a row of the transition table
row = 0

# Get skills matches
skills_matching = trans_utils.show_skills_overlap(transitions_table.loc[row].origin_id,
                                                  transitions_table.loc[row].destination_id,
                                                  skills_match='optional',
                                                  verbose=True)

# Export the skills matching table
skills_matching.to_csv(f'{outputs_folder}{file_name}_Skills_match_{row}.csv', index=False)

skills_matching


from hotel porter (id 732) to doorman/doorwoman (id 807)
--------
6/6 destination skills matched
NLP-adjusted overlap = 1.00 (total combined similarity: 0.77)


Unnamed: 0,origin_skill_id,origin_skill,destination_skill_id,destination_skill,similarity,score
0,832,comply with food safety and hygiene,832,comply with food safety and hygiene,1.0,0.998
1,5243,park guest's vehicle,5243,park guest's vehicle,1.0,0.998
2,6288,maintain customer service,6288,maintain customer service,1.0,0.998
3,9640,assist clients with special needs,9640,assist clients with special needs,1.0,0.998
4,11901,detect drug abuse,11901,detect drug abuse,1.0,0.998
5,2052,greet guests,2052,greet guests,1.0,0.998


In [14]:
# Extra: You may also want to get some reference data on the skills (links to the ESCO database)
all_skills_ids = set(skills_matching.origin_skill_id.to_list()).union(set(skills_matching.destination_skill_id.to_list()))
data.skills[data.skills.id.isin(all_skills_ids)]

Unnamed: 0,concept_type,concept_uri,skill_type,reuse_level,preferred_label,alt_labels,description,id,code,skill_category,level_1,level_2,level_3,level_title,level_href,level_uri
832,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/0ff99f51-40d9...,skill/competence,cross-sector,comply with food safety and hygiene,comply with hygiene and food safety\ncomply wi...,Respect optimal food safety and hygiene during...,832,S3.3.1,S,S3,S3.3,S3.3.1,complying with health and safety procedures,https://ec.europa.eu/esco/api/resource/concept...,http://data.europa.eu/esco/skill/S3.3.1
2052,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/26a6e6d2-1c9a...,skill/competence,cross-sector,greet guests,offer guests a warm welcome\nwelcome customers...,Welcome guests in a friendly manner in a certa...,2052,S3.4.4,S,S3,S3.4,S3.4.4,providing general assistance to people,https://ec.europa.eu/esco/api/resource/concept...,http://data.europa.eu/esco/skill/S3.4.4
5243,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/624fc483-e30e...,skill/competence,cross-sector,park guest's vehicle,drive customers' cars and manoeuvre into a par...,Line up guests' vehicles safely and efficientl...,5243,S8.2.2,S,S8,S8.2,S8.2.2,driving light vehicles,https://ec.europa.eu/esco/api/resource/concept...,http://data.europa.eu/esco/skill/S8.2.2
6288,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/75dfe1ee-5935...,skill/competence,cross-sector,maintain customer service,ensure professional service\nmaintain serving ...,Keep the highest possible customer service and...,6288,A1.12.3,A,A1,A1.12,A1.12.3,provide high quality client service,https://ec.europa.eu/esco/api/resource/concept...,http://data.europa.eu/esco/skill/A1.12.3
9640,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/b6b2e665-1085...,skill/competence,cross-sector,assist clients with special needs,handle participants with special needs\nassist...,Aid clients with special needs following relev...,9640,S3.4.4,S,S3,S3.4,S3.4.4,providing general assistance to people,https://ec.europa.eu/esco/api/resource/concept...,http://data.europa.eu/esco/skill/S3.4.4
11901,KnowledgeSkillCompetence,http://data.europa.eu/esco/skill/e2c81781-19c8...,skill/competence,cross-sector,detect drug abuse,identify substance dependence\ndetect alcohol ...,Identify people under excessive use of alcohol...,11901,S3.3.0,S,S3,S3.3,S3.3.0,protecting and enforcing,https://ec.europa.eu/esco/api/resource/concept...,http://data.europa.eu/esco/skill/S3.3.0
