In [1]:
import pandas as pd
import random as rnd 
import mysql.connector
from mysql.connector import Error
import plotly
import json
import csv
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pymysql
import plotly.express as px
import plotly.graph_objects as go
from sqlalchemy import create_engine
import networkx as nx
from networkx_viewer import Viewer
from functools import reduce
from scipy.spatial.distance import pdist, squareform
import jgraph
from cyjupyter import Cytoscape
from scipy.stats import kendalltau
from scipy.stats import weightedtau

# Getting data from SQL database

In [2]:
connection = mysql.connector.connect(host='127.0.0.1',
                                         database='employment',
                                         user='root',
                                         password='thanhn123')

In [3]:
def get_data(connection, descriptor_domain,filt = False,val = None):
    '''
    Return df in the SOCs-decriptor format of a specific domain 
    descriptor_domeain:
        work context
        knowledge
        skills
        gwas
        job_zones, filter
        Abilities, filter
    
    '''
    df= pd.DataFrame(data=None)
    if descriptor_domain == 'work_context':
        df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.work_context
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code)
            WHERE scale_id LIKE 'CX';''', con = connection)
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    elif descriptor_domain == 'job_zones':
        if filt:
            df= pd.read_sql('''
                SELECT title,job_zone FROM employment.job_zones
                LEFT JOIN employment.occupation_data USING (onetsoc_code); ''', con = connection)
            df = df[df['job_zone'].isin(val)]
        else:
            df= pd.read_sql('''
                SELECT title,job_zone FROM employment.job_zones
                LEFT JOIN employment.occupation_data USING (onetsoc_code);''', con = connection)
    elif descriptor_domain == 'abilities':
        if filt:
#             df= pd.read_sql('''
#             SELECT title, element_name, element_id,data_value FROM employment.''' + descriptor_domain + '''
#             LEFT JOIN employment.content_model_reference USING (element_id) 
#             LEFT JOIN employment.occupation_data USING (onetsoc_code); ''', con = connection)
#             df = df[df['element_id'].contains(val)]
            df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.''' + descriptor_domain + '''
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code)
            WHERE element_id LIKE\''''+ val +'''%\'; ''', con = connection)
            
        else:
            df= pd.read_sql('''
            SELECT title, element_name,element_id, data_value FROM employment.''' + descriptor_domain + '''
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code);''', con = connection)
        df = df.groupby(by=['title','element_name'])['data_value'].mean()
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    elif descriptor_domain == 'work_values':
        df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.work_values
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code)
            WHERE scale_id LIKE 'EX';''', con = connection)
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    elif descriptor_domain == 'interests':
        df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.interests
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code)
            WHERE scale_id LIKE 'OI';''', con = connection)
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    elif descriptor_domain == 'work_styles':
        df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.work_styles
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code)
            WHERE scale_id LIKE 'IM';''', con = connection)
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    elif descriptor_domain == 'name':
        df= pd.read_sql('''
            SELECT * FROM employment.occupation_data;''', con = connection)
    else:
        df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.''' + descriptor_domain + '''
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code);''', con = connection)
        df = df.groupby(by=['title','element_name'])['data_value'].mean()
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    return df

# Finding distance measures from ONET 

In [4]:
def distance_table(df, jz=False):
    
#     squareform(pdist(df.iloc[:, 1:])), columns=df.title.unique(), index=df.title.unique()
    dist = []
    name_dict = {}
    if jz:
        name_dict = df['title'].sort_values().reset_index()['title'].to_dict()
        dist = pdist(df[df.columns[1:]],'euclidean')
    else:
        for i in range(len(df.index)):
            name_dict[i] = df.index[i]
        dist = pdist(df,'euclidean') #Euclidean distance for 1XN
    df_dist = pd.DataFrame(squareform(dist)) #Euclidean distance for NxN
#     return df_dist
    columns = list(df_dist.index) 
    for i in range(len(df.index)): #convert 0 values to NaN
        df_dist.loc[i,i] = np.NaN
    mean = df_dist.mean().mean()
    std = df_dist.stack().std() 

    for col in columns: #Z-score calculated 
        df_dist[col] = (df_dist[col] - mean)/std
#     return df_dist
    return df_dist.rename(columns = name_dict, index = name_dict)

In [5]:
# Get CPS data

In [6]:
df_cps = pd.read_csv('../CPS Job Changes/JobChanges_2011to19.csv')

In [7]:
df_cps.sort_values(by = 'pct_tot', ascending = False)

Unnamed: 0,ONET18_SOC_LY,ONET18_SOC,ONET18_Title,ONET18_Title_LY,count,pct_tot,count_socly,pct_socly,in_grp,in_minor_grp,in_broad_occ,employment_projection_NewJob,wage_change_BetweenJobs
75823,41-1011,41-1012,First-Line Supervisors of Non-Retail Sales Wor...,First-Line Supervisors of Retail Sales Workers,734264.510000,4.395486e-01,2207170.940,33.267224,1,1,1,-8.3,85.257732
76141,41-1012,41-1011,First-Line Supervisors of Retail Sales Workers,First-Line Supervisors of Non-Retail Sales Wor...,603472.900000,3.612536e-01,999832.010,60.357429,1,1,1,-5.5,-46.021146
77141,41-2012,41-2031,Retail Salespersons,Gambling Change Persons and Booth Cashiers,509346.270000,3.049071e-01,2630467.290,19.363338,1,1,0,-0.6,-1.700405
76588,41-2011,41-2031,Retail Salespersons,Cashiers,509346.270000,3.049071e-01,2630467.290,19.363338,1,1,0,-0.6,6.772208
177,11-1021,11-1011,Chief Executives,General and Operations Managers,352402.430000,2.109567e-01,751730.590,46.878820,1,1,0,-10.0,83.034056
...,...,...,...,...,...,...,...,...,...,...,...,...,...
88371,45-2092,25-1112,"Law Teachers, Postsecondary","Farmworkers and Laborers, Crop, Nursery, and G...",1.135947,6.800058e-07,184416.524,0.000616,0,0,0,6.9,346.293792
88369,45-2092,25-1082,"Library Science Teachers, Postsecondary","Farmworkers and Laborers, Crop, Nursery, and G...",1.135947,6.800058e-07,184416.524,0.000616,0,0,0,3.0,180.717341
88368,45-2092,25-1081,"Education Teachers, Postsecondary","Farmworkers and Laborers, Crop, Nursery, and G...",1.135947,6.800058e-07,184416.524,0.000616,0,0,0,4.8,157.524058
88367,45-2092,25-1072,"Nursing Instructors and Teachers, Postsecondary","Farmworkers and Laborers, Crop, Nursery, and G...",1.135947,6.800058e-07,184416.524,0.000616,0,0,0,17.6,193.257438


## Skills 

In [8]:
df_skills = get_data(connection,'skills')
df_skills

skills_dist = distance_table(df_skills)
skills_dist

Unnamed: 0,Accountants,Actors,Actuaries,Acupuncturists,Acute Care Nurses,Adapted Physical Education Specialists,Adhesive Bonding Machine Operators and Tenders,"Administrative Law Judges, Adjudicators, and Hearing Officers",Administrative Services Managers,Adult Basic and Secondary Education and Literacy Teachers and Instructors,...,"Welding, Soldering, and Brazing Machine Setters, Operators, and Tenders",Wellhead Pumpers,"Wholesale and Retail Buyers, Except Farm Products",Wind Energy Engineers,Wind Energy Operations Managers,Wind Energy Project Managers,Wind Turbine Service Technicians,"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",Word Processors and Typists,Zoologists and Wildlife Biologists
Accountants,,-0.230680,-1.227414,-1.171535,-0.569831,-1.003623,1.132803,-1.065039,-1.137232,-1.059764,...,0.725944,1.565390,-1.521513,-0.814871,-0.314631,-1.141396,1.107543,1.433590,-0.109065,-0.931834
Actors,-0.230680,,1.135383,-0.864040,0.663371,-0.357262,1.197330,-0.544330,0.050725,-0.943865,...,0.331293,1.241636,0.200386,1.010929,1.070415,0.546771,1.569600,1.321033,-0.680258,0.499286
Actuaries,-1.227414,1.135383,,-0.087157,-0.286412,-0.282347,1.963571,-0.399273,-0.465844,0.038218,...,1.804970,2.486738,-0.972011,-0.930407,-0.086949,-0.831523,1.659458,2.279853,1.216561,-0.704367
Acupuncturists,-1.171535,-0.864040,-0.087157,,-1.001092,-1.412509,0.446884,-0.959553,-0.798526,-1.204994,...,-0.041906,0.854747,-0.723608,-0.542806,-0.172813,-0.692439,0.568397,0.827215,-0.526829,-1.083851
Acute Care Nurses,-0.569831,0.663371,-0.286412,-1.001092,,-1.195340,0.830991,-0.255460,-0.836154,-0.280156,...,0.676692,1.417261,-0.556259,-1.302537,-0.920175,-1.047353,0.542074,1.357017,0.913517,-1.547737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wind Energy Project Managers,-1.141396,0.546771,-0.831523,-0.692439,-1.047353,-0.945562,0.908294,-0.497354,-1.831780,-0.280800,...,0.801334,1.456835,-1.831528,-0.911183,-1.592966,,0.644552,1.388393,0.472996,-1.141655
Wind Turbine Service Technicians,1.107543,1.569600,1.659458,0.568397,0.542074,0.979859,-1.277587,1.721333,0.830265,1.343563,...,-0.786879,-0.852132,1.205901,0.564226,-0.197641,0.644552,,-0.962793,0.923107,0.745142
"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",1.433590,1.321033,2.279853,0.827215,1.357017,1.592393,-1.603523,2.089086,1.384270,1.651199,...,-1.709891,-1.733867,1.659570,1.220691,0.909859,1.388393,-0.962793,,0.158726,1.480451
Word Processors and Typists,-0.109065,-0.680258,1.216561,-0.526829,0.913517,0.420100,0.150641,0.492062,0.232212,-0.009116,...,-0.506267,0.315154,0.345638,0.763999,0.925228,0.472996,0.923107,0.158726,,0.631908


In [9]:
skills_dist.loc['Retail Salespersons'].sort_values()[0:10]

Demonstrators and Product Promoters                                                            -1.965512
Bartenders                                                                                     -1.937110
Psychiatric Aides                                                                              -1.923179
Sales Representatives, Wholesale and Manufacturing, Except Technical and Scientific Products   -1.905583
Home Health Aides                                                                              -1.884423
Reservation and Transportation Ticket Agents and Travel Clerks                                 -1.880728
Travel Agents                                                                                  -1.851155
Order Clerks                                                                                   -1.816983
Counter and Rental Clerks                                                                      -1.805677
Hotel, Motel, and Resort Desk Clerks                   

### Percentages

In [10]:
cps_retail_pct = df_cps[df_cps['ONET18_Title_LY'] == 'Registered Nurses'].sort_values(by = 'pct_tot', ascending = False)
# cps_retail_pct[cps_retail_pct['ONET18_Title'] == 'Cashiers']['pct_tot']
# cps_retail_pct[0:10]['pct_tot'].sum()
cps_retail_pct

Unnamed: 0,ONET18_SOC_LY,ONET18_SOC,ONET18_Title,ONET18_Title_LY,count,pct_tot,count_socly,pct_socly,in_grp,in_minor_grp,in_broad_occ,employment_projection_NewJob,wage_change_BetweenJobs
50936,29-1141,29-1123,Physical Therapists,Registered Nurses,208251.380000,0.124664,1137881.06,18.301683,1,1,0,18.2,22.020431
50939,29-1141,29-1126,Respiratory Therapists,Registered Nurses,133490.700000,0.079911,1137881.06,11.731516,1,1,0,19.4,-16.345062
50942,29-1141,29-1129,"Therapists, All Other",Registered Nurses,84033.560000,0.050305,1137881.06,7.385092,1,1,0,14.6,-28.178207
50941,29-1141,29-1128,Exercise Physiologists,Registered Nurses,84033.560000,0.050305,1137881.06,7.385092,1,1,0,11.3,-32.917140
50940,29-1141,29-1127,Speech-Language Pathologists,Registered Nurses,78260.440000,0.046849,1137881.06,6.877735,1,1,0,24.9,7.945516
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50913,29-1141,25-4012,Curators,Registered Nurses,104.406667,0.000063,1137881.06,0.009176,0,0,0,12.6,-25.539160
51049,29-1141,53-3051,"Bus Drivers, School",Registered Nurses,47.747500,0.000029,1137881.06,0.004196,0,0,0,,
51050,29-1141,53-3052,"Bus Drivers, Transit and Intercity",Registered Nurses,47.747500,0.000029,1137881.06,0.004196,0,0,0,9.3,-41.288309
51051,29-1141,53-3053,Shuttle Drivers and Chauffeurs,Registered Nurses,47.747500,0.000029,1137881.06,0.004196,0,0,0,,


In [11]:
def percentage_default(occ,df_cps,a,b):
    df_occ = df_cps[df_cps['ONET18_Title_LY'] == occ].sort_values(by = 'pct_socly', ascending = False)
    return df_occ[a:b]['pct_socly'].sum()

In [12]:
retail_default = percentage_default('Retail Salespersons',df_cps,0,10)

In [13]:
nurse_default = percentage_default('Registered Nurses',df_cps,0,10)

In [14]:
janitors_default = percentage_default('Janitors and Cleaners, Except Maids and Housekeeping Cleaners',df_cps,0,10)

In [15]:
sales_default = percentage_default('Sales Managers',df_cps,0,10)

In [16]:
def percentage(occ,df_onet,df_cps,a,b):
    '''
    Get total percentage that ONET transition takes ups
    @param occ occupation of interests
    @param df_onet dataFrame for ONET
    @param df_cps dataFrame for CPS data
    @param a index of first position for onet
    @param index of last position
    
    return the percentage composition for the ath to bth place of ONET
    '''
    
    total = 0
    onet_list = df_onet.loc[occ].sort_values()[a:b].index.to_list()
    cps_occ = df_cps[df_cps['ONET18_Title_LY'] == occ]
    for occ in onet_list:
#         print(occ)
#         print(cps_occ[cps_occ['ONET18_Title'] == occ]['pct_tot'].values[0])
        val = cps_occ[cps_occ['ONET18_Title'] == occ]['pct_socly'].values
        if len(val) == 0:
            continue
        else:
            total += val[0]
    
    return total


In [17]:
retail_skills = percentage('Retail Salespersons',skills_dist,df_cps,0,10)
print(retail_default)
print(retail_skills)
print(retail_skills/retail_default*100)

41.541673571317794
6.2937464884452385
15.150440382812885


In [18]:
nurse_skills = percentage('Registered Nurses',skills_dist,df_cps,0,10)
print(nurse_default)
print(nurse_skills)
print(nurse_skills/nurse_default*100)

69.29806178512185
9.807177002520186
14.152166380829092


In [19]:
janitors_skills = percentage('Janitors and Cleaners, Except Maids and Housekeeping Cleaners',skills_dist,df_cps,0,10)
print(janitors_default)
print(janitors_skills)
print(janitors_skills/janitors_default*100)

59.08144679332252
7.397082505916141
12.520144491031948


In [20]:
sales_skills = percentage('Sales Managers',skills_dist,df_cps,0,10)
print(janitors_default)
print(sales_skills)
print(sales_skills/sales_default*100)

59.08144679332252
3.536867901091421
7.171481406185616


## ONET Regular

In [21]:
df_ab = get_data(connection,'abilities')
df_in = get_data(connection,'interests')
df_ws = get_data(connection,'work_styles')
df_wv = get_data(connection, 'work_values')
df_wc = get_data(connection,'work_context')
df_knowledge = get_data(connection,'knowledge')
df_gwas = get_data(connection,'work_activities')
df_jz = get_data(connection,'job_zones')
df_jz = df_jz[df_jz['title'] != 'Legislators']


In [22]:
wc_dist = distance_table(df_wc)
knowledge_dist = distance_table(df_knowledge)
skills_dist = distance_table(df_skills)
gwas_dist = distance_table(df_gwas)
jz_dist = distance_table(df_jz,True)
wv_dist = distance_table(df_wv)
in_dist = distance_table(df_in)
ab_dist = distance_table(df_ab)
ws_dist = distance_table(df_ws)
df_concat = pd.concat([wc_dist,knowledge_dist,skills_dist,gwas_dist,jz_dist.multiply(1.3)]).groupby(level =0).mean()
# df_concat

In [23]:
retail_onet = percentage('Retail Salespersons',df_concat,df_cps,0,10)
print(retail_default)
print(retail_onet)
print(retail_onet/retail_default*100)

41.541673571317794
11.674018971853148
28.101946715775565


In [24]:
nurse_onet = percentage('Registered Nurses',df_concat,df_cps,0,10)
print(nurse_default)
print(nurse_onet)
print(nurse_onet/nurse_default*100)

69.29806178512185
46.10060826568283
66.52510485593486


In [25]:
janitors_onet = percentage('Janitors and Cleaners, Except Maids and Housekeeping Cleaners',df_concat,df_cps,0,10)
print(janitors_default)
print(janitors_onet)
print(janitors_onet/janitors_default*100)

59.08144679332252
27.73909571228287
46.95060330753103


In [26]:
sales_onet = percentage('Sales Managers',df_concat,df_cps,0,10)
print(sales_default)
print(sales_onet)
print(sales_onet/sales_default*100)

49.31851176579454
1.6504121501604823
3.346435427731511


In [27]:
## ONET Custom

In [28]:
def filt(df,d):
    '''
    @params: df: Dataframe to filter, d: set of all relevant names
    @returns: filtered Dataframe
    '''
    return df.loc[df.index.isin(d)]

In [29]:
# df_rel = pd.concat([knowledge_dist.multiply(2),skills_dist.multiply(3),gwas_dist]).groupby(level =0).mean()
# df_jz_filt = get_data(connection,'job_zones',True,[2])
df_jz_filt = get_data(connection,'job_zones')
# df_ab_filt = get_data(connection,'abilities',True,'1.A.1.')
df_ab_filt = get_data(connection,'abilities')
# jz_dist_filt = distance_table(df_jz_filt,True)
jz_set = set(df_jz_filt['title'].unique()) #to filter out same job_zones jobs

In [30]:
abilities_filtered = filt(df_ab_filt,jz_set)
skills_filtered = filt(df_skills,jz_set)
knowledge_filtered = filt(df_knowledge,jz_set)
in_filtered = filt(df_in,jz_set)
gwas_filtered = filt(df_gwas,jz_set)

In [31]:
ab_dist_filt = distance_table(abilities_filtered) #NaN?
skills_dist_filt = distance_table(skills_filtered)
knowledge_dist_filt = distance_table(knowledge_filtered)
in_dist_filt = distance_table(in_filtered)
gwas_dist_filt = distance_table(gwas_filtered)
df_concat_filt = pd.concat([ab_dist_filt.multiply(4.0),skills_dist_filt.multiply(3.0),knowledge_dist_filt.multiply(2.0),in_dist_filt,gwas_dist_filt]).groupby(level =0).mean()

In [32]:
retail_alg1 = percentage('Retail Salespersons',df_concat_filt,df_cps,0,10)
print(retail_default)
print(retail_alg1)
print(retail_alg1/retail_default*100)

41.541673571317794
10.232032521498693
24.630766268798908


In [33]:
nurse_alg1 = percentage('Registered Nurses',df_concat_filt,df_cps,0,10)
print(nurse_default)
print(nurse_alg1)
print(nurse_alg1/nurse_default*100)

69.29806178512185
41.8177848702394
60.34481166285317


In [34]:
janitor_alg1 = percentage('Janitors and Cleaners, Except Maids and Housekeeping Cleaners',df_concat_filt,df_cps,0,10)
print(janitors_default)
print(janitor_alg1)
print(janitor_alg1/janitors_default*100)

59.08144679332252
28.03281191742145
47.447741108110044


In [35]:
sales_alg1 = percentage('Sales Managers',df_concat_filt,df_cps,0,10)
print(sales_default)
print(sales_alg1)
print(sales_alg1/sales_default*100)

49.31851176579454
3.536867901091421
7.171481406185616


## Comparison using Kendall Tau

In [36]:
# kendalltau([1,2,3,4,5], [1,2,3,4,5])
kendalltau([1,2,3,4,5], [2,1,3,4,5])

KendalltauResult(correlation=0.7999999999999999, pvalue=0.08333333333333333)

In [37]:
kendalltau([1,2,3,4,5], [1,2,3,5,4])

KendalltauResult(correlation=0.7999999999999999, pvalue=0.08333333333333333)

In [38]:
weightedtau([1,2,3,4,5], [2,1,3,4,5])

WeightedTauResult(correlation=0.9014598540145985, pvalue=nan)

In [39]:
weightedtau([1,2,3,4,5], [1,2,3,5,4])

WeightedTauResult(correlation=0.6715328467153284, pvalue=nan)

In [40]:
list_cps = df_cps[df_cps['ONET18_Title_LY'] == 'Retail Salespersons'].sort_values(by = 'pct_tot', ascending = False)['ONET18_Title']
list_cps= list_cps.to_list()[0:50]
list_onet = skills_dist.loc['Retail Salespersons'].sort_values().index.to_list()[0:50]
# cps_array = np.array(list_cps).astype(float)


In [41]:
# weightedtau(list_cps, list_onet)

In [42]:
def kendall_tau_rank(df_cps,df_onet,a,b,occ):
    list_cps = df_cps[df_cps['ONET18_Title_LY'] == occ].sort_values(by = 'pct_tot', ascending = False)['ONET18_Title']
    list_cps= list_cps.to_list()[a:b]
    list_onet = df_onet.loc[occ].sort_values().index.to_list()[a:b]
    
    return kendalltau(list_cps, list_onet)

In [43]:
kendall_tau_rank(df_cps,skills_dist,0,50,'Sales Managers')

KendalltauResult(correlation=0.0016333198585000842, pvalue=0.9866517739479581)

In [44]:
kendall_tau_rank(df_cps,skills_dist,0,50,'Retail Salespersons')

KendalltauResult(correlation=0.14938775510204083, pvalue=0.12582650074030705)

In [45]:
#find all occupations that are in both cps and onet
cps_occ = set(df_cps['ONET18_Title_LY'].unique())
onet_occ = set(df_skills.index.unique())

relevant_occ = cps_occ & onet_occ
relevant_occ

{'Actors',
 'Actuaries',
 'Acupuncturists',
 'Adhesive Bonding Machine Operators and Tenders',
 'Administrative Law Judges, Adjudicators, and Hearing Officers',
 'Administrative Services Managers',
 'Advertising Sales Agents',
 'Advertising and Promotions Managers',
 'Aerospace Engineers',
 'Agents and Business Managers of Artists, Performers, and Athletes',
 'Agricultural Engineers',
 'Agricultural Equipment Operators',
 'Agricultural Inspectors',
 'Agricultural Sciences Teachers, Postsecondary',
 'Agricultural Technicians',
 'Air Traffic Controllers',
 'Aircraft Cargo Handling Supervisors',
 'Aircraft Mechanics and Service Technicians',
 'Aircraft Structure, Surfaces, Rigging, and Systems Assemblers',
 'Airfield Operations Specialists',
 'Airline Pilots, Copilots, and Flight Engineers',
 'Ambulance Drivers and Attendants, Except Emergency Medical Technicians',
 'Amusement and Recreation Attendants',
 'Anesthesiologists',
 'Animal Breeders',
 'Animal Control Workers',
 'Animal Scienti

In [46]:
skills_dist
df_concat
df_concat_filt = pd.concat([skills_dist_filt.multiply(3.0),knowledge_dist_filt.multiply(2.0),in_dist_filt,gwas_dist_filt]).groupby(level =0).mean()
df_new = pd.concat([skills_dist_filt.multiply(1.3),knowledge_dist_filt.multiply(1.2),in_dist_filt,gwas_dist_filt]).groupby(level =0).mean()

In [74]:
avg_occ = 0
count = 0
occ_max = ''
max_r = 0
occ_min = ''
min_r = 1

for occ in relevant_occ:
    cps_len = len(df_cps[df_cps['ONET18_Title_LY'] == occ].sort_values(by = 'pct_tot', ascending = False)['ONET18_Title'])
    if cps_len > 50:
        corr, p = kendall_tau_rank(df_cps,df_new,0,50,occ)
        count += 1 
        avg_occ += corr
        if corr >= max_r: 
            occ_max = occ
            max_r = corr
        if corr <= min_r: 
            occ_min = occ
            min_r = corr
print(avg_occ/count)
print(count)
print(occ_max, max_r)
print(occ_min, min_r)

-0.008828516271579928
434
Psychiatric Aides 0.24897959183673468
Paper Goods Machine Setters, Operators, and Tenders -0.3910204081632653


In [48]:
# df_cps[df_cps['ONET18_Title_LY'] == 'Etchers and Engravers'].sort_values(by = 'pct_tot', ascending = False)['ONET18_Title']
# df_cps

In [49]:
skills_dist
df_concat
df_concat_filt = pd.concat([skills_dist_filt.multiply(3.0),knowledge_dist_filt.multiply(2.0),in_dist_filt,gwas_dist_filt]).groupby(level =0).mean()
df_new = pd.concat([skills_dist_filt.multiply(1.3),knowledge_dist_filt.multiply(1.2),in_dist_filt,gwas_dist_filt]).groupby(level =0).mean()
df_test = pd.concat([skills_dist_filt.multiply(1.3),knowledge_dist_filt.multiply(1.2),in_dist_filt,gwas_dist_filt]).groupby(level =0).mean()


In [50]:
# avg_occ = 0
# count = 0
# occ_max = ''
# max_r = 0
# occ_min = ''
# min_r = 1

# cov_max = ''
# max_c = 0
# cov_min = ''
# min_c = 1

# # occ_dictionary = {}
# occ_dictionary_skills = {}
# for occ in relevant_occ:
#     cps_len = len(df_cps[df_cps['ONET18_Title_LY'] == occ].sort_values(by = 'pct_tot', ascending = False)['ONET18_Title'])
#     if cps_len > 50:
#         n = percentage(occ,skills_dist,df_cps,0,50)
#         d = percentage_default(occ,df_cps,0,50)
#         perc = n/d
#         count += 1 
#         avg_occ += perc
#         occ_dictionary_skills[occ] = perc
#         if n >= max_c:
#             cov_max =occ
#             max_c = n 
#         if n <= max_c:
#             cov_min =occ
#             min_c = n  
#         if perc >= max_r: 
#             occ_max = occ
#             max_r = perc
#         if perc <= min_r: 
#             occ_min = occ
#             min_r = perc
# print(100*avg_occ/count)
# print(count)
# print(occ_max, 100*max_r)
# print(occ_min, min_r)
# print(cov_max, max_c)
# print(cov_min, min_c)

In [124]:
def percentage_coverage(df_onet,df_cps):
    avg_occ = 0
    count = 0
    occ_max = ''
    max_r = 0
    occ_min = ''
    min_r = 1

    cov_max = ''
    max_c = 0
    cov_min = ''
    min_c = 1
    
    cps_occ = set(df_cps['ONET18_Title_LY'].unique())
    onet_occ = set(df_onet.index.unique())

    relevant_occ = cps_occ & onet_occ
    occ_dictionary = {}
    for occ in relevant_occ:
        cps_len = len(df_cps[df_cps['ONET18_Title_LY'] == occ]['ONET18_Title'])
        if cps_len > 10:
            n = percentage(occ,df_onet,df_cps,0,10)
            d = percentage_default(occ,df_cps,0,10)
            perc = n/d
            count += 1 
            avg_occ += perc
            occ_dictionary[occ] = perc
            if n >= max_c:
                cov_max =occ
                max_c = n 
            if n <= max_c:
                cov_min =occ
                min_c = n  
            if perc >= max_r: 
                occ_max = occ
                max_r = perc
            if perc <= min_r: 
                occ_min = occ
                min_r = perc
    print(100*avg_occ/count)
    print(count)
    print(occ_max, 100*max_r)
    print(occ_min, min_r)
    print(cov_max, max_c)
    print(cov_min, min_c)
    
    return occ_dictionary

In [125]:
# kendalltau([1,2,3,4,5], [5,4,3,2,1])

In [126]:
# kendalltau([1,2,3,4,5], [6,8,8,9,10])

In [127]:
wc_dist 
knowledge_dist
skills_dist 
gwas_dist 
jz_dist 
wv_dist 
in_dist 
ab_dist 
ws_dist

Unnamed: 0,Accountants,Actors,Actuaries,Acupuncturists,Acute Care Nurses,Adapted Physical Education Specialists,Adhesive Bonding Machine Operators and Tenders,"Administrative Law Judges, Adjudicators, and Hearing Officers",Administrative Services Managers,Adult Basic and Secondary Education and Literacy Teachers and Instructors,...,"Welding, Soldering, and Brazing Machine Setters, Operators, and Tenders",Wellhead Pumpers,"Wholesale and Retail Buyers, Except Farm Products",Wind Energy Engineers,Wind Energy Operations Managers,Wind Energy Project Managers,Wind Turbine Service Technicians,"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",Word Processors and Typists,Zoologists and Wildlife Biologists
Accountants,,1.815713,-0.636321,-0.750213,1.140320,1.166324,-0.500916,0.098276,0.003265,0.072864,...,0.508586,-0.666325,-0.428479,-0.128328,-0.127577,-0.405769,-0.640472,-0.280050,-0.747072,-0.653184
Actors,1.815713,,2.428169,1.485427,-0.276126,-0.594035,1.641984,1.072325,0.047584,0.311707,...,2.537853,0.827633,0.149506,0.723129,0.808325,0.618452,1.793988,2.731205,1.323735,1.482199
Actuaries,-0.636321,2.428169,,0.608825,1.860021,1.668426,0.540284,0.700767,0.703254,1.183940,...,1.186268,0.102468,0.355814,-0.352983,-0.175670,-0.219551,0.046855,0.471518,0.867909,-0.324964
Acupuncturists,-0.750213,1.485427,0.608825,,0.542782,0.351052,-0.379520,-0.321057,-0.718668,-1.120101,...,0.366580,-1.116015,-0.638361,0.010508,-0.343355,-0.405271,-0.561293,0.025278,-0.591462,-0.591192
Acute Care Nurses,1.140320,-0.276126,1.860021,0.542782,,-0.895365,1.647176,-0.472309,-0.961883,-0.491329,...,2.568084,0.286751,-0.166002,0.160765,0.014966,0.411943,1.405633,2.535326,0.963312,1.073619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wind Energy Project Managers,-0.405769,0.618452,-0.219551,-0.405271,0.411943,-0.096516,-0.562295,0.013700,-0.756399,-0.539451,...,0.196728,-1.039674,-1.510818,-1.039589,-1.330304,,-0.908228,0.191979,0.017126,-1.348571
Wind Turbine Service Technicians,-0.640472,1.793988,0.046855,-0.561293,1.405633,1.008127,-1.068082,0.619727,0.182406,-0.133923,...,-0.757719,-0.775149,-0.804423,0.057153,-0.258353,-0.908228,,-1.022806,-0.145290,-1.380746
"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",-0.280050,2.731205,0.471518,0.025278,2.535326,2.062305,-0.966473,1.494597,1.171389,0.791171,...,-1.002790,-0.083308,0.296089,0.936479,0.721830,0.191979,-1.022806,,-0.015192,-0.442479
Word Processors and Typists,-0.747072,1.323735,0.867909,-0.591462,0.963312,0.811860,-0.375645,0.396779,-0.111033,-0.360391,...,0.423021,-0.577165,-0.541798,0.362863,0.504966,0.017126,-0.145290,-0.015192,,-0.206817


In [128]:
print("ONET default")
occ_default = percentage_coverage(df_concat,df_cps)
print("Algo 1")
occ_alg1 = percentage_coverage(df_concat_filt,df_cps)
print("algo 2")
occ_alg2 = percentage_coverage(df_new,df_cps)

print("skills")
occ_skills = percentage_coverage(skills_dist,df_cps)
print("work context")
occ_wc = percentage_coverage(wc_dist,df_cps)
print("knowledge")
occ_knowledge = percentage_coverage(knowledge_dist,df_cps)
print("genral work activities")
occ_gwa = percentage_coverage(gwas_dist,df_cps)
print("work value")
occ_wv = percentage_coverage(wv_dist,df_cps)
print("interest")
occ_in = percentage_coverage(in_dist,df_cps)
print("abilities")
occ_ab = percentage_coverage(ab_dist,df_cps)
print("work setting")
occ_ws = percentage_coverage(ws_dist,df_cps)

ONET default
12.30327998606195
610
Occupational Therapists 83.19753814184264
Audiologists 0.0
Occupational Therapists 77.4223233536707
Psychiatrists 9.39391078293398
Algo 1
14.940931127851517
611
First-Line Supervisors of Non-Retail Sales Workers 93.65570632675418
Biological Science Teachers, Postsecondary 0.0
Occupational Therapists 79.21193429924357
Psychiatrists 9.39391078293398
algo 2
15.133164638610522
611
First-Line Supervisors of Non-Retail Sales Workers 93.58518644453213
Biological Science Teachers, Postsecondary 0.0
Occupational Therapists 76.9736307458273
Psychiatrists 9.39391078293398
skills
10.761938070708014
610
First-Line Supervisors of Non-Retail Sales Workers 93.45955638335593
Psychiatrists 0.0
Title Examiners, Abstractors, and Searchers 68.44221971384758
Psychiatrists 0
work context
10.657859811575516
610
Occupational Therapists 80.77457658839083
Psychiatrists 0.0
Occupational Therapists 75.16755335620954
Psychiatrists 0
knowledge
12.671715501339706
610
First-Line Supe

## Top 10

In [123]:
# ONET default
# 12.30327998606195
# 610
# Occupational Therapists 83.19753814184264
# Audiologists 0.0
# Occupational Therapists 77.4223233536707
# Psychiatrists 9.39391078293398
# Algo 1
# 14.957654963925167
# 610
# First-Line Supervisors of Non-Retail Sales Workers 93.65570632675418
# Biological Science Teachers, Postsecondary 0.0
# Occupational Therapists 79.21193429924357
# Psychiatrists 9.39391078293398
# algo 2
# 15.150203611587056
# 610
# First-Line Supervisors of Non-Retail Sales Workers 93.58518644453213
# Biological Science Teachers, Postsecondary 0.0
# Occupational Therapists 76.9736307458273
# Psychiatrists 9.39391078293398
# skills
# 10.761938070708014
# 610
# First-Line Supervisors of Non-Retail Sales Workers 93.45955638335593
# Psychiatrists 0.0
# Title Examiners, Abstractors, and Searchers 68.44221971384758
# Psychiatrists 0
# work context
# 10.657859811575516
# 610
# Occupational Therapists 80.77457658839083
# Psychiatrists 0.0
# Occupational Therapists 75.16755335620954
# Psychiatrists 0
# knowledge
# 12.671715501339706
# 610
# First-Line Supervisors of Non-Retail Sales Workers 93.59691732177119
# Audiologists 0.0
# Occupational Therapists 71.56777726748102
# Psychiatrists 9.39391078293398
# genral work activities
# 11.832327429486321
# 610
# Optometrists 87.4116643432956
# Audiologists 0.0
# Title Examiners, Abstractors, and Searchers 69.44929984347104
# Psychiatrists 9.39391078293398
# work value
# 6.0683701226956055
# 610
# Radiation Therapists 70.63941879537158
# Nurse Anesthetists 0.0
# Radiation Therapists 61.58833798592869
# Psychiatrists 19.53329345958491
# interest
# 8.414766411210467
# 610
# First-Line Supervisors of Non-Retail Sales Workers 93.82561635343902
# Curators 0.0
# First-Line Supervisors of Non-Retail Sales Workers 62.215262666975434
# Psychiatrists 0.3604869380908128
# abilities
# 10.457553558969988
# 610
# Title Examiners, Abstractors, and Searchers 82.30568161448473
# Audiologists 0.0
# Title Examiners, Abstractors, and Searchers 67.99255631465411
# Psychiatrists 0.317934589004297
# work setting
# 6.070237629881494
# 610
# Title Examiners, Abstractors, and Searchers 82.30568161448473
# Audiologists 0.0
# Title Examiners, Abstractors, and Searchers 67.99255631465411
# Psychiatrists 2.0379341981291943

## Top 20

In [81]:
# ONET default
# 17.00152350132241
# 569
# First-Line Supervisors of Non-Retail Sales Workers 87.19720582811054
# Petroleum Engineers 0.0
# Occupational Therapists 79.71861719209504
# Psychiatrists 9.436463132020497
# Algo 1
# 20.157148088139543
# 569
# First-Line Supervisors of Non-Retail Sales Workers 89.3305850797751
# Private Detectives and Investigators 0.0
# Occupational Therapists 83.76743245968721
# Psychiatrists 9.436463132020497
# algo 2
# 19.845296270656902
# 569
# First-Line Supervisors of Non-Retail Sales Workers 89.23131888818325
# Private Detectives and Investigators 0.0
# Occupational Therapists 83.71387649923727
# Psychiatrists 9.754397721024795
# skills
# 14.50071320223328
# 569
# First-Line Supervisors of Non-Retail Sales Workers 87.99728791972292
# Audiologists 0.0
# Title Examiners, Abstractors, and Searchers 68.51075986436102
# Psychiatrists 9.570318319104194
# work context
# 14.698606243888175
# 569
# Occupational Therapists 80.08669903163094
# Petroleum Engineers 0.0
# Occupational Therapists 79.26549025969348
# Psychiatrists 9.244104885791575
# knowledge
# 18.27738898232911
# 569
# First-Line Supervisors of Non-Retail Sales Workers 87.20541559832638
# Audiologists 0.0
# Occupational Therapists 75.67014849552311
# Psychiatrists 9.733121546481536
# genral work activities
# 15.745385724125168
# 569
# Occupational Therapists 80.08669903163094
# Audiologists 0.0
# Occupational Therapists 79.26549025969348
# Psychiatrists 15.990771674096809
# work value
# 8.305812619552738
# 569
# Nurse Practitioners 65.48348694740163
# Nurse Anesthetists 0.0
# Radiation Therapists 61.58833798592869
# Psychiatrists 24.637312362381486
# interest
# 11.946956823721644
# 569
# First-Line Supervisors of Non-Retail Sales Workers 87.70273233909465
# Curators 0.0
# Postal Service Clerks 84.043529949622
# Psychiatrists 0.5094201598936182
# abilities
# 13.640188056376925
# 569
# Title Examiners, Abstractors, and Searchers 75.51224315682042
# Audiologists 0.0
# Title Examiners, Abstractors, and Searchers 68.44221971384758
# Psychiatrists 2.6472439946287474
# work setting
# 8.710758743472496
# 569
# Title Examiners, Abstractors, and Searchers 75.01612990858587
# Audiologists 0.0
# Respiratory Therapists 70.77391178357794
# Psychiatrists 11.431844981063175

## Top 50

In [65]:
# ONET default
# 26.620927695763267
# 434
# Occupational Therapists 84.63236189185216
# Museum Technicians and Conservators 0.0
# Occupational Therapists 84.54229292527187
# Psychiatrists 19.202197876361616
# Algo 1
# 30.065967663680865
# 434
# Occupational Therapists 84.02195401297996
# Museum Technicians and Conservators 0.0
# Occupational Therapists 83.93253466559514
# Psychiatrists 22.848961919671954
# algo 2
# 30.445321068270225
# 434
# Occupational Therapists 84.69041395806892
# Museum Technicians and Conservators 0.0
# Occupational Therapists 84.60028321027994
# Psychiatrists 16.506389973814574
# skills
# 22.128976873277793
# 434
# First-Line Supervisors of Non-Retail Sales Workers 78.79857653826994
# Audiologists 0.0
# Title Examiners, Abstractors, and Searchers 71.41240257266489
# Psychiatrists 9.804356239080036
# work context
# 23.713811141031805
# 434
# Occupational Therapists 80.74047037766475
# Museum Technicians and Conservators 0.0
# Occupational Therapists 80.65454330951303
# Psychiatrists 14.433168301049843
# knowledge
# 27.98233953212392
# 434
# Occupational Therapists 83.96390194676319
# Materials Scientists 0.002744464047177644
# Occupational Therapists 83.87454438058705
# Psychiatrists 28.11578404984587
# genral work activities
# 23.093421339453133
# 434
# Occupational Therapists 81.41336937151577
# Museum Technicians and Conservators 0.0
# Occupational Therapists 81.32672617875599
# Psychiatrists 16.614730670584397
# work value
# 14.735038211529552
# 434
# Occupational Therapists 83.73795579085653
# Mechanical Drafters 0.0
# Occupational Therapists 83.64883868514151
# Psychiatrists 35.93617801837723
# interest
# 21.332168245358215
# 434
# Occupational Therapists 78.47689809054984
# Curators 0.0
# Occupational Therapists 78.39338000180177
# Psychiatrists 17.43556422986619
# abilities
# 22.075793929028823
# 434
# Occupational Therapists 80.92923823839733
# Community Health Workers 0.0
# Occupational Therapists 80.84311027633505
# Psychiatrists 14.214946244347924
# work setting
# 14.117143131454771
# 434
# Title Examiners, Abstractors, and Searchers 68.24014209372007
# Audiologists 0.0
# Title Examiners, Abstractors, and Searchers 67.99255631465411
# Psychiatrists 18.313271225235116


In [56]:
# df_score = pd.DataFrame.from_dict(occ_dictionary)
# list(occ_dictionary.keys())

# df_score = pd.DataFrame.from_dict(occ_dictionary, orient = 'index',  columns= ['score'])
# df_score = df_score.sort_values(by = 'score', ascending = False)

In [57]:
# Use textposition='auto' for direct text
# fig = go.Figure(data=[go.Bar(
#             x=df_score.index[:30], y=df_score['score'][:30],
#             textposition='auto',
#         )])
# fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})

# fig.show()

In [58]:
def distribution(df):
    df_output = pd.DataFrame.from_dict(df, orient = 'index',  columns= ['score'])
    df_output = df_output.sort_values(by = 'score', ascending = False)
    fig = go.Figure(data=[go.Bar(
                x=df_output.index[:30], y=df_output['score'][:10],
                textposition='auto',
            )])
    fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})

    fig.show()

In [105]:
distribution(occ_skills)

In [106]:
distribution(occ_default)

In [107]:
distribution(occ_alg1)

In [108]:
distribution(occ_alg2)

### Occupational Therapists

In [84]:
df_concat.loc['Occupational Therapists'].sort_values()[0:10]

Recreational Therapists                             -2.047346
Nurse Midwives                                      -1.997816
Nurse Practitioners                                 -1.871316
Registered Nurses                                   -1.833238
Licensed Practical and Licensed Vocational Nurses   -1.807966
Physical Therapists                                 -1.800402
Music Therapists                                    -1.753355
Midwives                                            -1.664768
Critical Care Nurses                                -1.628603
Speech-Language Pathologists                        -1.619891
Name: Occupational Therapists, dtype: float64

In [83]:
df_cps[df_cps['ONET18_Title_LY'] == 'Occupational Therapists'].sort_values(by = 'pct_tot', ascending = False)[:10]

Unnamed: 0,ONET18_SOC_LY,ONET18_SOC,ONET18_Title,ONET18_Title_LY,count,pct_tot,count_socly,pct_socly,in_grp,in_minor_grp,in_broad_occ,employment_projection_NewJob,wage_change_BetweenJobs
50445,29-1122,29-1141,Registered Nurses,Occupational Therapists,76567.07,0.045835,115545.99,66.26545,1,1,0,7.2,-13.712047
50441,29-1122,29-1127,Speech-Language Pathologists,Occupational Therapists,5196.69,0.003111,115545.99,4.497508,1,1,1,24.9,-6.856024
50442,29-1122,29-1128,Exercise Physiologists,Occupational Therapists,4673.12,0.002797,115545.99,4.044381,1,1,1,11.3,-42.115573
50443,29-1122,29-1129,"Therapists, All Other",Occupational Therapists,4673.12,0.002797,115545.99,4.044381,1,1,1,14.6,-38.026445
50440,29-1122,29-1123,Physical Therapists,Occupational Therapists,4434.86,0.002655,115545.99,3.838177,1,1,1,18.2,5.288932
50393,29-1122,11-9111,Medical and Health Services Managers,Occupational Therapists,2998.1,0.001795,115545.99,2.594724,0,0,0,31.5,18.87855
50392,29-1122,11-9013,"Farmers, Ranchers, and Other Agricultural Mana...",Occupational Therapists,2727.86,0.001633,115545.99,2.360844,0,0,0,-6.5,-16.234084
50439,29-1122,29-1071,Physician Assistants,Occupational Therapists,2586.27,0.001548,115545.99,2.238304,1,0,0,31.3,32.149853
50446,29-1122,29-1151,Nurse Anesthetists,Occupational Therapists,2038.31,0.00122,115545.99,1.764068,1,1,0,13.7,105.754163
50447,29-1122,29-1161,Nurse Midwives,Occupational Therapists,1629.885,0.000976,115545.99,1.410594,1,1,0,11.6,23.653281


## First-Line Supervisors of Non-Retail Sales Workers

In [86]:
df_concat.loc['First-Line Supervisors of Non-Retail Sales Workers'].sort_values()[0:20]

General and Operations Managers                                                              -1.831166
Gaming Managers                                                                              -1.815987
Administrative Services Managers                                                             -1.779102
First-Line Supervisors of Office and Administrative Support Workers                          -1.677219
Compliance Managers                                                                          -1.633127
First-Line Supervisors of Transportation and Material-Moving Machine and Vehicle Operators   -1.614790
Purchasing Managers                                                                          -1.605301
Education Administrators, Postsecondary                                                      -1.586060
Lodging Managers                                                                             -1.571804
Auditors                                                                 

In [101]:
df_new.loc['First-Line Supervisors of Non-Retail Sales Workers'].sort_values()[0:10]

First-Line Supervisors of Office and Administrative Support Workers   -2.526221
Gaming Managers                                                       -2.309424
First-Line Supervisors of Retail Sales Workers                        -2.285236
Purchasing Managers                                                   -2.252855
Sales Managers                                                        -2.247404
Financial Managers, Branch or Department                              -2.236861
Postmasters and Mail Superintendents                                  -2.156335
Transportation Managers                                               -2.132636
Spa Managers                                                          -2.093977
Logisticians                                                          -2.073113
Name: First-Line Supervisors of Non-Retail Sales Workers, dtype: float64

In [104]:
df_cps[df_cps['ONET18_Title_LY'] == 'First-Line Supervisors of Non-Retail Sales Workers'].sort_values(by = 'pct_socly', ascending = False)[:10][['ONET18_Title','employment_projection_NewJob','wage_change_BetweenJobs','pct_socly']]

Unnamed: 0,ONET18_Title,employment_projection_NewJob,wage_change_BetweenJobs,pct_socly
76141,First-Line Supervisors of Retail Sales Workers,-5.5,-46.021146,60.357429
75986,General and Operations Managers,5.8,34.808013,1.044561
75988,Marketing Managers,6.7,83.055092,0.773958
75989,Sales Managers,3.5,69.421258,0.773958
76145,Retail Salespersons,-0.6,-66.22148,0.625479
76211,Stationary Engineers and Boiler Operators,2.2,-16.861436,0.591123
76024,Accountants and Auditors,4.3,-4.284919,0.561387
76138,Childcare Workers,1.7,-67.584864,0.546774
76198,First-Line Supervisors of Production and Opera...,-0.3,-17.974402,0.530215
76181,Construction Laborers,5.4,-50.695604,0.504579


## Retail Salespersons

In [114]:
df_new.loc['Retail Salespersons'].sort_values()[0:10]

Demonstrators and Product Promoters                                        -2.258904
Counter and Rental Clerks                                                  -2.186155
Door-To-Door Sales Workers, News and Street Vendors, and Related Workers   -2.077470
Bartenders                                                                 -2.060698
Hotel, Motel, and Resort Desk Clerks                                       -2.059661
Gaming Supervisors                                                         -1.996016
Hosts and Hostesses, Restaurant, Lounge, and Coffee Shop                   -1.917422
Cashiers                                                                   -1.902887
Waiters and Waitresses                                                     -1.876960
Insurance Sales Agents                                                     -1.867913
Name: Retail Salespersons, dtype: float64

In [115]:
df_cps[df_cps['ONET18_Title_LY'] == 'Retail Salespersons'].sort_values(by = 'pct_socly', ascending = False)[:10][['ONET18_Title','employment_projection_NewJob','wage_change_BetweenJobs','pct_socly']]

Unnamed: 0,ONET18_Title,employment_projection_NewJob,wage_change_BetweenJobs,pct_socly
77839,Cashiers,-7.4,-6.342669,7.110967
77840,Gambling Change Persons and Booth Cashiers,2.5,1.729819,7.110967
77844,Insurance Sales Agents,5.5,101.729819,4.564365
77847,"Sales Representatives of Services, Except Adve...",6.0,122.3229,3.581331
77857,"Sales and Related Workers, All Other",1.9,26.029654,3.381114
77848,"Sales Representatives, Wholesale and Manufactu...",4.1,220.840198,3.33604
77849,"Sales Representatives, Wholesale and Manufactu...",0.9,137.314662,3.33604
77853,Real Estate Sales Agents,2.3,93.822076,3.156278
77852,Real Estate Brokers,2.6,136.490939,3.156278
77876,Receptionists and Information Clerks,3.6,19.028007,2.808294


## Tutors

In [116]:
df_new.loc['Tutors'].sort_values()[0:10]

Adult Basic and Secondary Education and Literacy Teachers and Instructors   -2.168012
Elementary School Teachers, Except Special Education                        -2.100491
Special Education Teachers, Kindergarten and Elementary School              -2.091943
Mathematical Science Teachers, Postsecondary                                -2.072074
Home Economics Teachers, Postsecondary                                      -1.970068
Special Education Teachers, Middle School                                   -1.940251
Graduate Teaching Assistants                                                -1.906366
Special Education Teachers, Preschool                                       -1.838197
Kindergarten Teachers, Except Special Education                             -1.830166
Secondary School Teachers, Except Special and Career/Technical Education    -1.810165
Name: Tutors, dtype: float64

In [117]:
df_cps[df_cps['ONET18_Title_LY'] == 'Tutors'].sort_values(by = 'pct_socly', ascending = False)[:10][['ONET18_Title','employment_projection_NewJob','wage_change_BetweenJobs','pct_socly']]

Unnamed: 0,ONET18_Title,employment_projection_NewJob,wage_change_BetweenJobs,pct_socly
42172,"Elementary School Teachers, Except Special Edu...",3.5,,7.64658
42173,"Middle School Teachers, Except Special and Car...",3.6,,7.64658
42174,"Career/Technical Education Teachers, Middle Sc...",3.1,,7.64658
42175,"Secondary School Teachers, Except Special and ...",3.8,,4.402986
42176,"Career/Technical Education Teachers, Secondary...",2.3,,4.402986
42170,"Preschool Teachers, Except Special Education",2.5,,3.80634
42171,"Kindergarten Teachers, Except Special Education",3.7,,3.80634
42318,Retail Salespersons,-0.6,,1.821566
42329,Customer Service Representatives,-2.0,,1.580006
42310,Childcare Workers,1.7,,1.47891


## Private Detectives and Investigators 

In [120]:
df_new.loc['Private Detectives and Investigators'].sort_values()[0:10]

Licensing Examiners and Inspectors                  -2.188887
Retail Loss Prevention Specialists                  -2.071166
Assessors                                           -2.013458
Real Estate Brokers                                 -1.959689
Insurance Adjusters, Examiners, and Investigators   -1.941846
Claims Examiners, Property and Casualty Insurance   -1.929849
Tax Examiners and Collectors, and Revenue Agents    -1.915755
Compliance Managers                                 -1.911921
Immigration and Customs Inspectors                  -1.882182
Insurance Sales Agents                              -1.840380
Name: Private Detectives and Investigators, dtype: float64

In [121]:
df_cps[df_cps['ONET18_Title_LY'] == 'Private Detectives and Investigators'].sort_values(by = 'pct_socly', ascending = False)[:10][['ONET18_Title','employment_projection_NewJob','wage_change_BetweenJobs','pct_socly']]

Unnamed: 0,ONET18_Title,employment_projection_NewJob,wage_change_BetweenJobs,pct_socly
62946,Gambling Surveillance Officers and Gambling In...,5.6,-32.289951,39.33045
62947,Security Guards,2.8,-41.227348,39.33045
62952,Customer Service Representatives,-2.0,-31.260297,3.173654
62924,Social and Community Service Managers,17.0,32.948929,1.695984
62948,Dishwashers,0.5,-52.512356,1.452062
62957,"Motor Vehicle Operators, All Other",3.1,-34.843493,1.449332
62958,"Laborers and Freight, Stock, and Material Move...",4.2,-41.556837,1.180194
62951,"Bookkeeping, Accounting, and Auditing Clerks",-5.7,-18.369028,0.943149
62943,Detectives and Criminal Investigators,1.1,64.70346,0.929118
62944,Police and Sheriff's Patrol Officers,5.7,25.041186,0.929118
