In [38]:
import pandas as pd
import random as rnd 
import mysql.connector
from mysql.connector import Error
import plotly
import json
import csv
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pymysql
import plotly.express as px
import plotly.graph_objects as go
from sqlalchemy import create_engine
import networkx as nx
from networkx_viewer import Viewer
from functools import reduce
from scipy.spatial.distance import pdist, squareform

# Scales and Abbreviations Used for the Starter and Change Analyses 

In [39]:
info_dict = {'descriptor_domain':['abilites','work_context','interests',
                               'knowledge','skills','work_styles','work_values','gwas','job_zones'],
            'starter':['IM,LV',None,'OI',None,None,'IM','EX',None,None],
            'change':[None,'CX',None,'IM, LV','IM, LV',None,None,'IM, LV','JZ']}
df_info = pd.DataFrame(data = info_dict)
df_info

Unnamed: 0,descriptor_domain,starter,change
0,abilites,"IM,LV",
1,work_context,,CX
2,interests,OI,
3,knowledge,,"IM, LV"
4,skills,,"IM, LV"
5,work_styles,IM,
6,work_values,EX,
7,gwas,,"IM, LV"
8,job_zones,,JZ


In [40]:
connection = mysql.connector.connect(host='localhost',
                                         database='employment',
                                         user='root',
                                         password='1234')

In [41]:
def get_data(connection, descriptor_domain):
    '''
    Return df in the SOCs-decriptor format of a specific domain 
    descriptor_domeain:
        work context
        knowledge
        skills
        gwas
        job_zones
    
    '''
    df= pd.DataFrame(data=None)
    if descriptor_domain == 'work_context':
        df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.work_context
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code)
            WHERE scale_id LIKE 'CX';''', con = connection)
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    elif descriptor_domain == 'job_zones':
        df= pd.read_sql('''
            SELECT title,job_zone FROM employment.job_zones
            LEFT JOIN employment.occupation_data USING (onetsoc_code);''', con = connection)
        df
    else:
        df= pd.read_sql('''
            SELECT title, element_name, data_value FROM employment.''' + descriptor_domain + '''
            LEFT JOIN employment.content_model_reference USING (element_id) 
            LEFT JOIN employment.occupation_data USING (onetsoc_code);''', con = connection)
        df = df.groupby(by=['title','element_name'])['data_value'].mean()
        df = df.reset_index().pivot(index = 'title', columns = 'element_name', values = 'data_value')
    return df

### Work Context (CX)

In [42]:
df_wc = get_data(connection,'work_context')
# df_wc.pivot(index = 'title', columns = 'element_name', values = 'data_value')
df_wc

element_name,Consequence of Error,Contact With Others,Coordinate or Lead Others,"Cramped Work Space, Awkward Positions",Deal With External Customers,Deal With Physically Aggressive People,Deal With Unpleasant or Angry People,Degree of Automation,Electronic Mail,Exposed to Contaminants,...,Spend Time Standing,"Spend Time Using Your Hands to Handle, Control, or Feel Objects, Tools, or Controls",Spend Time Walking and Running,Structured versus Unstructured Work,Telephone,Time Pressure,Very Hot or Cold Temperatures,"Wear Common Protective or Safety Equipment such as Safety Shoes, Glasses, Gloves, Hearing Protection, Hard Hats, or Life Jackets","Wear Specialized Protective or Safety Equipment such as Breathing Apparatus, Safety Harness, Full Protection Suits, or Radiation Protection",Work With Work Group or Team
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Accountants,2.88,4.59,3.05,1.12,3.92,1.03,2.25,3.20,4.97,1.23,...,1.89,1.77,1.70,4.71,4.89,4.61,1.19,1.15,1.00,3.66
Actors,1.97,4.71,3.16,1.89,3.65,1.48,2.67,1.35,4.68,2.29,...,3.83,2.07,2.56,2.88,3.67,4.59,1.50,1.32,1.15,5.00
Actuaries,3.28,3.62,3.12,1.06,2.66,1.03,1.91,2.50,5.00,1.00,...,1.69,1.83,1.26,3.91,4.62,3.62,1.03,1.00,1.00,3.97
Acupuncturists,2.90,4.80,3.18,1.52,3.70,1.36,2.51,1.44,4.29,1.78,...,3.34,3.69,1.90,4.46,4.41,3.03,1.23,2.23,1.00,3.56
Acute Care Nurses,4.77,4.81,4.00,2.89,4.52,3.00,4.07,2.46,4.48,3.56,...,3.78,3.04,3.56,4.30,5.00,4.56,1.19,4.24,2.92,4.77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wind Energy Project Managers,2.35,4.00,3.77,1.16,3.79,1.16,2.47,1.55,4.85,1.43,...,2.18,1.91,1.73,4.69,4.85,3.50,1.92,2.26,1.15,4.35
Wind Turbine Service Technicians,3.72,4.60,3.76,4.17,2.86,1.33,2.10,1.96,4.34,3.76,...,3.68,3.69,2.68,3.72,3.99,3.61,4.12,4.96,4.74,4.56
"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",2.80,3.64,3.18,2.99,2.28,1.23,2.76,2.13,1.57,4.99,...,4.74,4.81,3.30,3.70,2.61,3.96,3.51,4.32,1.99,4.19
Word Processors and Typists,2.16,4.69,3.71,1.41,4.46,1.94,3.47,2.33,4.86,2.10,...,1.83,2.76,1.97,4.26,5.00,3.96,1.54,1.00,1.00,4.05


### Knowledge

In [43]:
df_knowledge = get_data(connection,'knowledge')
df_knowledge

element_name,Administration and Management,Biology,Building and Construction,Chemistry,Clerical,Communications and Media,Computers and Electronics,Customer and Personal Service,Design,Economics and Accounting,...,Philosophy and Theology,Physics,Production and Processing,Psychology,Public Safety and Security,Sales and Marketing,Sociology and Anthropology,Telecommunications,Therapy and Counseling,Transportation
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Accountants,3.255,0.500,0.710,0.530,4.400,2.460,3.325,3.605,0.625,4.880,...,0.595,0.535,1.140,1.290,1.085,0.960,0.640,1.215,0.615,1.120
Actors,2.395,0.560,1.355,0.520,2.150,4.375,2.160,3.030,2.205,1.425,...,2.780,0.805,1.085,3.570,1.890,3.020,3.880,1.390,2.250,1.365
Actuaries,4.015,0.815,0.815,0.610,2.420,2.195,4.140,2.840,1.655,4.720,...,0.720,0.770,1.550,2.235,1.405,2.845,1.955,0.970,1.045,1.110
Acupuncturists,2.620,3.750,1.225,2.435,2.725,2.535,2.595,4.635,1.290,2.355,...,3.405,1.465,1.540,4.695,2.250,3.210,3.210,1.220,4.305,1.190
Acute Care Nurses,2.925,3.410,0.785,2.800,2.895,2.215,2.805,4.985,1.000,1.395,...,2.990,1.895,1.480,4.910,3.000,1.700,3.340,1.735,4.250,1.250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wind Energy Project Managers,4.420,1.540,4.185,1.815,3.340,2.945,3.370,3.795,3.860,3.620,...,1.395,2.730,2.760,2.005,3.070,2.790,1.765,2.385,0.990,2.900
Wind Turbine Service Technicians,2.995,1.575,3.490,2.510,3.080,2.055,4.385,2.490,3.250,1.745,...,0.660,3.570,2.275,2.325,3.275,1.130,1.260,2.985,1.345,2.935
"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",1.735,0.800,2.280,1.780,0.985,0.990,1.685,1.685,2.160,0.950,...,0.685,1.420,2.925,1.020,2.355,1.260,0.575,0.650,0.845,1.680
Word Processors and Typists,2.350,0.505,0.500,0.505,5.125,1.710,3.810,4.285,0.515,1.270,...,0.500,0.500,0.820,2.030,1.320,0.890,0.755,1.265,0.730,0.545


### Skills

In [44]:
df_skills = get_data(connection,'skills')
df_skills

element_name,Active Learning,Active Listening,Complex Problem Solving,Coordination,Critical Thinking,Equipment Maintenance,Equipment Selection,Installation,Instructing,Judgment and Decision Making,...,Science,Service Orientation,Social Perceptiveness,Speaking,Systems Analysis,Systems Evaluation,Technology Design,Time Management,Troubleshooting,Writing
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Accountants,3.185,3.940,3.185,3.000,3.815,0.50,0.500,0.500,2.815,3.500,...,1.750,3.000,3.000,3.750,3.435,2.940,1.185,3.060,0.815,3.435
Actors,2.620,3.685,2.750,2.940,3.000,0.50,0.500,0.500,2.685,2.940,...,1.000,2.185,3.750,3.940,1.810,1.810,0.815,3.000,0.500,3.130
Actuaries,3.750,4.060,4.310,3.000,4.500,0.50,0.500,0.500,3.250,4.500,...,2.060,3.000,2.880,3.940,4.190,4.250,1.375,3.310,0.500,3.690
Acupuncturists,3.310,3.685,3.125,2.940,3.815,0.50,0.875,0.500,2.685,3.500,...,2.370,3.500,3.685,3.560,2.685,2.935,1.310,2.685,1.125,3.375
Acute Care Nurses,3.815,3.940,3.750,3.815,4.000,0.50,1.440,0.500,3.690,3.875,...,3.060,3.940,4.310,4.000,3.060,3.125,1.560,3.185,1.810,3.560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wind Energy Project Managers,3.690,3.750,3.315,3.750,4.125,0.75,0.815,0.500,3.065,3.560,...,1.435,3.000,3.560,3.815,3.060,3.190,1.375,3.500,1.690,3.880
Wind Turbine Service Technicians,2.940,3.065,3.190,3.250,3.310,3.56,2.750,1.500,3.000,3.060,...,2.060,2.315,2.625,3.000,2.815,2.815,1.940,2.815,3.750,2.620
"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",2.310,2.815,2.625,2.060,2.940,3.00,2.690,1.565,1.815,2.500,...,1.000,1.630,2.125,2.750,1.315,1.810,1.630,2.435,3.000,1.815
Word Processors and Typists,2.125,3.185,2.000,2.370,2.625,1.12,0.620,0.500,1.500,2.190,...,0.500,2.690,2.560,2.940,1.750,1.380,1.315,2.940,1.060,3.250


### GWAs

In [45]:
df_gwas = get_data(connection,'work_activities')
df_gwas

element_name,Analyzing Data or Information,Assisting and Caring for Others,Coaching and Developing Others,Communicating with Persons Outside Organization,"Communicating with Supervisors, Peers, or Subordinates",Controlling Machines and Processes,Coordinating the Work and Activities of Others,Developing Objectives and Strategies,Developing and Building Teams,Documenting/Recording Information,...,Provide Consultation and Advice to Others,Repairing and Maintaining Electronic Equipment,Repairing and Maintaining Mechanical Equipment,Resolving Conflicts and Negotiating with Others,Scheduling Work and Activities,Selling or Influencing Others,Staffing Organizational Units,Thinking Creatively,Training and Teaching Others,Updating and Using Relevant Knowledge
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Accountants,4.430,2.465,3.040,3.635,4.590,1.405,3.325,3.475,3.305,4.095,...,2.655,0.810,0.830,3.285,3.270,2.335,2.210,3.360,3.560,3.795
Actors,1.580,2.140,2.385,3.625,4.410,1.115,2.275,1.655,2.235,2.215,...,1.720,0.850,0.865,2.130,2.275,1.720,1.005,4.525,2.490,2.050
Actuaries,5.690,1.425,3.875,4.090,4.700,0.645,3.630,3.905,3.655,3.835,...,4.590,0.590,0.500,3.060,3.390,2.890,3.080,4.330,3.475,4.940
Acupuncturists,3.300,4.635,1.595,3.630,2.900,1.265,2.210,3.310,2.160,3.860,...,3.115,1.225,1.040,2.725,2.390,2.690,1.180,4.095,1.975,3.795
Acute Care Nurses,3.925,5.630,4.265,3.055,4.770,3.480,4.090,3.350,4.350,4.910,...,3.285,1.535,1.225,4.425,3.800,2.445,3.325,3.500,4.075,4.630
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wind Energy Project Managers,4.300,2.100,2.935,5.160,4.770,1.470,4.765,3.670,4.255,3.595,...,3.830,0.965,1.640,4.320,4.325,3.770,2.410,3.915,2.910,4.495
Wind Turbine Service Technicians,3.175,2.955,3.185,2.620,3.980,4.645,3.355,2.895,2.950,3.495,...,2.820,4.805,5.055,2.365,3.280,1.795,1.640,3.485,3.465,4.290
"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",2.215,1.750,1.830,1.070,3.075,4.405,2.465,1.715,2.010,2.325,...,1.655,1.900,2.620,1.535,1.685,1.045,0.860,2.555,2.145,2.700
Word Processors and Typists,2.925,3.115,2.190,3.405,4.175,2.190,2.060,2.045,1.925,3.670,...,2.020,1.250,1.165,2.930,2.645,1.250,1.355,2.845,2.165,3.215


### Job Zone (JZ)

In [46]:
df_jz = get_data(connection,'job_zones')
df_jz = df_jz[df_jz['title'] != 'Legislators']
df_jz

Unnamed: 0,title,job_zone
0,Chief Executives,5.0
1,Chief Sustainability Officers,5.0
2,General and Operations Managers,4.0
4,Advertising and Promotions Managers,4.0
5,Marketing Managers,4.0
...,...,...
964,"Pump Operators, Except Wellhead Pumpers",2.0
965,Wellhead Pumpers,2.0
966,Refuse and Recyclable Material Collectors,2.0
967,Mine Shuttle Car Operators,2.0


# Combined

In [47]:
df = reduce(lambda x,y: pd.merge(x,y, on='title', how='outer'), [df_wc,df_knowledge,df_skills,df_gwas,df_jz])
df[df.columns[1:]]
name_dict = df['title'].to_dict()
cols = df_wc.index
cols
df

Unnamed: 0,title,Consequence of Error,Contact With Others,Coordinate or Lead Others,"Cramped Work Space, Awkward Positions",Deal With External Customers,Deal With Physically Aggressive People,Deal With Unpleasant or Angry People,Degree of Automation,Electronic Mail,...,Repairing and Maintaining Electronic Equipment,Repairing and Maintaining Mechanical Equipment,Resolving Conflicts and Negotiating with Others,Scheduling Work and Activities,Selling or Influencing Others,Staffing Organizational Units,Thinking Creatively,Training and Teaching Others,Updating and Using Relevant Knowledge,job_zone
0,Accountants,2.88,4.59,3.05,1.12,3.92,1.03,2.25,3.20,4.97,...,0.810,0.830,3.285,3.270,2.335,2.210,3.360,3.560,3.795,4.0
1,Actors,1.97,4.71,3.16,1.89,3.65,1.48,2.67,1.35,4.68,...,0.850,0.865,2.130,2.275,1.720,1.005,4.525,2.490,2.050,2.0
2,Actuaries,3.28,3.62,3.12,1.06,2.66,1.03,1.91,2.50,5.00,...,0.590,0.500,3.060,3.390,2.890,3.080,4.330,3.475,4.940,4.0
3,Acupuncturists,2.90,4.80,3.18,1.52,3.70,1.36,2.51,1.44,4.29,...,1.225,1.040,2.725,2.390,2.690,1.180,4.095,1.975,3.795,5.0
4,Acute Care Nurses,4.77,4.81,4.00,2.89,4.52,3.00,4.07,2.46,4.48,...,1.535,1.225,4.425,3.800,2.445,3.325,3.500,4.075,4.630,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
963,Wind Energy Project Managers,2.35,4.00,3.77,1.16,3.79,1.16,2.47,1.55,4.85,...,0.965,1.640,4.320,4.325,3.770,2.410,3.915,2.910,4.495,4.0
964,Wind Turbine Service Technicians,3.72,4.60,3.76,4.17,2.86,1.33,2.10,1.96,4.34,...,4.805,5.055,2.365,3.280,1.795,1.640,3.485,3.465,4.290,3.0
965,"Woodworking Machine Setters, Operators, and Te...",2.80,3.64,3.18,2.99,2.28,1.23,2.76,2.13,1.57,...,1.900,2.620,1.535,1.685,1.045,0.860,2.555,2.145,2.700,2.0
966,Word Processors and Typists,2.16,4.69,3.71,1.41,4.46,1.94,3.47,2.33,4.86,...,1.250,1.165,2.930,2.645,1.250,1.355,2.845,2.165,3.215,2.0


In [48]:
def distance_table(df,name_dict):
    
    dist = pdist(df[df.columns[1:]], 'euclidean')
    df_dist = pd.DataFrame(squareform(dist))
    columns = list(df_dist.index)
    df_dist = df_dist.replace(0, np.NaN) 
    for col in columns:
        df_dist[col] = (df_dist[col] - df_dist[col].mean())/df_dist[col].std(ddof=0)
#     return rows
    return df_dist.rename(columns = name_dict, index = name_dict)

In [57]:
# [df_wc,df_knowledge,df_skills,df_gwas,df_jz]
wc_dist = distance_table(df_wc,name_dict)
knowledge_dist = distance_table(df_knowledge,name_dict)
skills_dist = distance_table(df_skills,name_dict)
gwas_dist = distance_table(df_gwas,name_dict)
jz_dist = distance_table(df_jz,name_dict).multiply(1.3)
df_concat = pd.concat([wc_dist,knowledge_dist,skills_dist,gwas_dist,jw_dist]).groupby(level =0).mean()
df_concat

Unnamed: 0,Accountants,Actors,Actuaries,Acupuncturists,Acute Care Nurses,Adapted Physical Education Specialists,Adhesive Bonding Machine Operators and Tenders,"Administrative Law Judges, Adjudicators, and Hearing Officers",Administrative Services Managers,Adult Basic and Secondary Education and Literacy Teachers and Instructors,...,"Welding, Soldering, and Brazing Machine Setters, Operators, and Tenders",Wellhead Pumpers,"Wholesale and Retail Buyers, Except Farm Products",Wind Energy Engineers,Wind Energy Operations Managers,Wind Energy Project Managers,Wind Turbine Service Technicians,"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",Word Processors and Typists,Zoologists and Wildlife Biologists
Accountants,,-0.061667,-1.590857,-0.462153,-0.079994,-0.094898,0.514835,-0.450990,-0.744171,-0.836471,...,1.024863,1.038490,-0.733139,0.051139,0.665554,0.069123,1.227153,0.963824,-0.518217,0.320250
Actors,0.527672,,0.321181,-0.420259,0.401987,-0.223646,0.384856,0.648591,0.584510,-0.655524,...,0.832548,1.290079,0.841198,1.369248,1.655358,1.146145,1.324122,0.731540,0.185273,1.281749
Actuaries,-1.485650,0.052004,,0.345877,0.513614,0.509848,1.367742,-0.837788,-0.089549,-0.544223,...,1.076231,1.173811,-0.418895,-0.389477,0.585182,-0.265121,1.194718,1.006379,0.201947,0.021579
Acupuncturists,-0.496391,-1.025095,-0.039738,,-1.122547,-1.150964,0.586693,-0.696180,0.134612,-1.039517,...,0.500725,0.722005,-0.124649,0.251811,0.684628,0.126220,0.735346,0.461334,-0.317275,0.023918
Acute Care Nurses,-0.073878,-0.016927,0.249645,-0.881890,,-1.245207,0.556089,-0.401249,-0.155086,-0.307497,...,0.750952,0.658446,0.422753,0.233191,0.083553,0.259498,0.516300,0.740174,0.600500,-0.153902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wind Energy Project Managers,-0.224455,0.326009,-0.417917,0.096570,0.122054,-0.112881,0.841300,-0.574345,-0.751794,0.208595,...,0.670324,0.352993,-1.017602,-1.491844,-1.310365,,0.081608,0.764595,0.266394,-1.312157
Wind Turbine Service Technicians,1.368755,1.258337,1.278524,1.468316,0.955145,1.098771,-0.279593,1.052828,0.885362,1.536411,...,-0.524536,-1.369360,1.315256,0.379245,-0.936072,0.598718,,-0.473363,1.188926,0.635415
"Woodworking Machine Setters, Operators, and Tenders, Except Sawing",1.278016,0.717002,1.288033,1.306858,1.471850,1.580803,-1.090240,1.122485,1.587950,1.277705,...,-1.841880,-0.882206,1.312603,1.458760,1.286681,1.650009,-0.417749,,0.386819,1.877697
Word Processors and Typists,-0.545704,-0.297010,0.124995,-0.119583,0.719849,0.642928,0.576293,-0.557867,0.317450,-0.324981,...,0.281413,0.482277,-0.578362,0.563710,1.193103,0.604867,0.915829,0.114730,,0.723832
