# Skill Change Bootstrap - Changing Order of Career Moves

In [1]:
import os
import pandas as pd
import copy
from collections import ChainMap
import numpy as np
import re
import os
import glob
import multiprocessing as mp
import time
from tqdm import tqdm
from multiprocessing import Pool
from plotnine import *


## Changing Working Directory
print(os.getcwd())
basepath = "path"
os.chdir(basepath)
print(os.listdir)

## 1. Loading Data

### 1.1. Job Sequence Data

In [2]:
#### In the following, read in the cleaned job sequence file from Burning Glass
jobs = pd.read_csv(basepath + 'filepath/filename.csv')
jobs = jobs.sort_values(['BGTResId','From_year','To_year', 'From_month', 'To_month'],
              ascending = [True, True, True, True, True])
jobs["JobPosition"] = jobs.groupby('BGTResId').cumcount()+1
jobs = jobs[['BGTResId', 'JobPosition', 'ONETCode']]


print(len(jobs))
print(jobs.head())


26851093
   BGTResId  JobPosition    ONETCode
0         4            1  11-3061.00
1         4            2  41-4012.00
2         4            3  41-4012.00
3         4            4  11-2022.00
4        13            1  41-4012.00


### 1.2. Skill Occupation Information

In [3]:
#### In the following, read in the skills, knowledge, and abilities from O*NET
skill_occ = pd.read_excel(basepath+"Skills.xlsx")

ability_occ = pd.read_excel(basepath+"Abilities.xlsx")

know_occ = pd.read_excel(basepath+"Knowledge.xlsx")

skill_occ[skill_occ['Element Name'] == "Mathematics"]['Element Name'] = "Mathematics Skills"
know_occ[skill_occ['Element Name'] == "Mathematics"]['Element Name'] = "Mathematics Knowledge"


## Combining Skills
### Adding Type
skill_occ['type'] = "skill"
ability_occ['type'] = "ability"
know_occ['type'] = "knowledge"

# ### Renaming Columns
skill_occ = skill_occ.rename(columns = {'O*NET-SOC Code':'occ_8_dig', 'Element ID':'element_ID', 'Element Name':'element_title'})
ability_occ = ability_occ.rename(columns = {'O*NET-SOC Code':'occ_8_dig', 'Element ID':'element_ID', 'Element Name':'element_title'})
know_occ = know_occ.rename(columns = {'O*NET-SOC Code':'occ_8_dig', 'Element ID':'element_ID', 'Element Name':'element_title'})


### Keeping Needed Columns and Pivoting Values
skill_occ = skill_occ[['occ_8_dig', 'element_ID', 'Scale ID', 'Data Value', 'type']] \
    .pivot_table(index = ['occ_8_dig', 'element_ID'], columns = 'Scale ID', values = 'Data Value').reset_index(drop = False)
ability_occ = ability_occ[['occ_8_dig', 'element_ID', 'Scale ID', 'Data Value', 'type']] \
    .pivot_table(index = ['occ_8_dig', 'element_ID'], columns = 'Scale ID', values = 'Data Value').reset_index(drop = False)
know_occ = know_occ[['occ_8_dig', 'element_ID', 'Scale ID', 'Data Value', 'type']] \
    .pivot_table(index = ['occ_8_dig', 'element_ID'], columns = 'Scale ID', values = 'Data Value').reset_index(drop = False)

### Combining Skills
skills_occ_raw = skill_occ.rename(columns = {'IM':'Importance', 'LV':'Level'}) \
        .append(ability_occ.rename(columns = {'IM':'Importance', 'LV':'Level'}), ignore_index = True) \
        .append(know_occ.rename(columns = {'IM':'Importance', 'LV':'Level'}), ignore_index = True) \
        [["occ_8_dig", "element_ID", "Importance", "Level"]]


print(skills_occ_raw.head())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Scale ID   occ_8_dig element_ID  Importance  Level
0         11-1011.00    2.A.1.a        4.12   4.75
1         11-1011.00    2.A.1.b        4.12   4.88
2         11-1011.00    2.A.1.c        4.00   4.38
3         11-1011.00    2.A.1.d        4.38   4.88
4         11-1011.00    2.A.1.e        3.25   3.62


### 1.3. Skill Clusters and Categories

In [4]:
#### In the following, read in the Skill Clusters as defined by the manuscript Fig. 1
skill_clusters = pd.read_csv(basepath+"Skill Clustering and subtypes.csv")
skill_clusters = skill_clusters[['element_ID','skill_Cluster']]
print(skill_clusters.head())


  element_ID skill_Cluster
0  1.A.1.a.1       General
1  1.A.1.a.2       General
2  1.A.1.a.3       General
3  1.A.1.a.4       General
4  1.A.1.b.3       General


### 1.4. Aggregating Occupation Skills into Skill Categories

In [5]:
#### In the following, read in the Skill Categories as defined by the manuscript Fig. 2
occ_skill_subtypes = pd.read_csv(basepath+"preferred Skill Clustering and subtypes.csv")
occ_skill_subtypes = occ_skill_subtypes[['element_ID','gen_related']]
occ_skill_subtypes = skills_occ_raw[['element_ID', 'occ_8_dig', 'Level']]. \
    merge(occ_skill_subtypes[['element_ID', 'gen_related']]). \
    groupby(['occ_8_dig', 'gen_related'])['Level'].mean().reset_index(drop = False)

print(occ_skill_subtypes.head())

    occ_8_dig             gen_related     Level
0  11-1011.00                 General  4.711613
1  11-1011.00     Nested Intermediate  4.087600
2  11-1011.00         Nested Specific  1.686667
3  11-1011.00  Un-nested Intermediate  1.786111
4  11-1011.00      Un-nested Specific  0.230714


## 2. Bootstrapping: Changing Sequence of Career Moves

In [6]:
#### The following function takes in a Data.Frame of job sequences and permutates job sequences

def Permutate_JobPosition(x):
    x['rand_JobPosition'] = x.groupby("BGTResId")["JobPosition"].transform(lambda z: np.random.permutation(z))
    return x

### 2.1. Single Bootstrap— keeping the entire data

In [None]:
#### Permutation (bootrstrapping) process and measuring of skill changes

jobs["rand_JobPosition"] = jobs.groupby("BGTResId")["JobPosition"].transform(lambda x: np.random.permutation(x))
jobs.explode("rand_JobPosition")
temp_jobs = jobs[["BGTResId", "ONETCode", "rand_JobPosition"]]
        
temp_jobs = temp_jobs.sort_values(['rand_JobPosition'], ascending = [True])
temp_jobs['Dest_occ'] = temp_jobs.groupby(['BGTResId'])['ONETCode'].shift(-1)
temp_jobs = temp_jobs.rename(columns = {'ONETCode' : 'Source_occ'})
        
#### Measuring of Skill Change in the Randomized Job Sequence
skill_level_change = temp_jobs. \
    merge(occ_skill_subtypes.rename(columns = {"occ_8_dig":"Source_occ", "Level": "Source_Level"}), on = "Source_occ"). \
    merge(occ_skill_subtypes.rename(columns = {"occ_8_dig":"Dest_occ", "Level": "Dest_Level"}), on = ["Dest_occ", "gen_related"])
skill_level_change['delta_level'] = skill_level_change.Dest_Level - skill_level_change.Source_Level
skill_level_change = skill_level_change[["BGTResId", "Source_occ", "Dest_occ",'gen_related','delta_level']]
skill_level_change = skill_level_change.drop_duplicates()
    

### Saving Output Files
skill_level_change.to_csv(basepath+'skill_level_change_bootstrap.csv', index=False)
    
