In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os



DATA_DIR = '../../../data/onet_data/processed/measure/'
DATA_DIR_REFERENCE = '../../../data/onet_data/processed/reference/'


In [547]:
content_model_refrence = pd.read_csv(
    "https://www.onetcenter.org/dl_files/database/db_29_1_text/Content%20Model%20Reference.txt",
    sep='\t')
content_model_refrence.rename(columns = lambda x : x.replace(' ', '_').upper(), inplace=True)


model_element_name_dict = dict(zip(content_model_refrence.ELEMENT_ID,content_model_refrence.ELEMENT_NAME))

scale_reference = pd.read_csv("https://www.onetcenter.org/dl_files/database/db_29_1_text/Scales%20Reference.txt", sep='\t')

scale_reference.rename(columns = lambda x : x.replace(' ', '_').upper(), inplace=True)

occ_data = pd.read_csv(DATA_DIR_REFERENCE + 'OCCUPATION_DATA.CSV')

occ_data.rename(columns = lambda x : x.replace(' ', '_').upper(), inplace=True)

In [111]:
# # Load O*NET work_activities data
work_activities = pd.read_csv(os.path.join(DATA_DIR, 'WORK_ACTIVITIES.csv'))

element_list = work_activities.ELEMENT_ID.unique()
# ======================================================================================
# Dictionary of Work Activities Categorized by WFH Feasibility
# ======================================================================================

wfh_categories = {
    # ----------------------------------------------------------------------------------
    # **A. Physical/On-Site Work Requirements (NEGATIVE for WFH)**
    # Activities requiring physical presence, manual labor, or specialized on-site tools.
    # ----------------------------------------------------------------------------------
    "Physical_OnSite": {
        "4.A.1.b.2": {
            "description": "Inspecting Equipment, Structures, or Materials",
            "comment": "Requires physical inspection of equipment/materials (on-site)."
        },
        "4.A.3.a.1": {
            "description": "Performing General Physical Activities",
            "comment": "Climbing, lifting, balancing — needs physical presence."
        },
        "4.A.3.a.2": {
            "description": "Handling and Moving Objects",
            "comment": "Manual labor (e.g., moving materials) incompatible with remote work."
        },
        "4.A.3.a.3": {
            "description": "Controlling Machines and Processes",
            "comment": "Direct operation of on-site machinery required."
        },
        "4.A.3.a.4": {
            "description": "Operating Vehicles/Equipment",
            "comment": "Driving vehicles or operating mechanized equipment (on-site)."
        },
        "4.A.3.b.4": {
            "description": "Repairing Mechanical Equipment",
            "comment": "Requires physical tools and on-site presence."
        },
        "4.A.3.b.5": {
            "description": "Repairing Electronic Equipment",
            "comment": "Specialized tools and hands-on work (on-site)."
        },
        "4.A.4.a.5": {
            "description": "Assisting and Caring for Others",
            "comment": "Physical care (e.g., medical) requires in-person presence."
        },
        "4.A.4.a.8": {
            "description": "Performing for/Working Directly with the Public",
            "comment": "Face-to-face interaction (e.g., retail, hospitality)."
        }
    },

    # ----------------------------------------------------------------------------------
    # **B. Digital/Remote-Friendly Activities (POSITIVE for WFH)**
    # Activities that can be performed digitally or with minimal physical constraints.
    # ----------------------------------------------------------------------------------
    "Digital_RemoteFriendly": {
        "4.A.2.a.2": {
            "description": "Processing Information",
            "comment": "Data entry/analysis — core remote-friendly task."
        },
        "4.A.2.a.4": {
            "description": "Analyzing Data or Information",
            "comment": "Remote data analysis is standard."
        },
        "4.A.2.b.1": {
            "description": "Making Decisions and Solving Problems",
            "comment": "Cognitive tasks require no physical presence."
        },
        "4.A.2.b.2": {
            "description": "Thinking Creatively",
            "comment": "Creative work benefits from flexible environments."
        },
        "4.A.2.b.3": {
            "description": "Updating and Using Relevant Knowledge",
            "comment": "Remote learning/adaptation is feasible."
        },
        "4.A.2.b.4": {
            "description": "Developing Objectives and Strategies",
            "comment": "Strategic planning can be done remotely using digital tools."
        },
        "4.A.2.b.5": {
            "description": "Scheduling Work and Activities",
            "comment": "Digital calendars/tools enable remote scheduling."
        },
        "4.A.2.b.6": {
            "description": "Organizing, Planning, and Prioritizing Work",
            "comment": "Digital planning tools enable remote work."
        },
        "4.A.3.b.1": {
            "description": "Working with Computers",
            "comment": "Digital work is inherently remote-friendly."
        },
        "4.A.3.b.2": {
            "description": "Drafting Technical Devices/Parts/Equipment",
            "comment": "Digital documentation (e.g., CAD software) is remote-compatible."
        },
        "4.A.3.b.6": {
            "description": "Documenting/Recording Information",
            "comment": "Digital documentation is remote-compatible."
        },
        "4.A.4.a.1": {
            "description": "Interpreting the Meaning of Information",
            "comment": "Explaining data can be done remotely (e.g., reports, Zoom)."
        },
        "4.A.4.b.1": {
            "description": "Coordinating Work and Activities of Others",
            "comment": "Remote coordination via tools like Slack or Trello."
        },
        "4.A.4.b.4": {
            "description": "Guiding, Directing, and Motivating Subordinates",
            "comment": "Remote management (e.g., virtual check-ins) is feasible."
        },
        "4.A.4.b.5": {
            "description": "Coaching and Developing Others",
            "comment": "Remote mentoring (e.g., Zoom, online courses)."
        },
        "4.A.4.b.6": {
            "description": "Providing Consultation and Advice",
            "comment": "Remote advising (e.g., Zoom) is feasible."
        },
        "4.A.4.c.1": {
            "description": "Performing Administrative Activities",
            "comment": "Digital paperwork (e.g., spreadsheets, emails)."
        },
        "4.A.4.c.2": {
            "description": "Staffing Organizational Units",
            "comment": "Remote hiring (e.g., virtual interviews, LinkedIn)."
        },
        "4.A.4.c.3": {
            "description": "Monitoring and Controlling Resources",
            "comment": "Digital tracking tools (e.g., Excel, ERP systems)."
        }
    },

    # ----------------------------------------------------------------------------------
    # **C. Neutral/Context-Dependent Activities**
    # Activities that can be done remotely or on-site depending on tools/context.
    # ----------------------------------------------------------------------------------
    "Neutral_ContextDependent": {
        "4.A.1.a.1": {
            "description": "Getting Information",
            "comment": "Depends on source (digital vs. physical observation)."
        },
        "4.A.1.a.2": {
            "description": "Monitoring Processes/Materials",
            "comment": "Could use sensors (remote) or physical checks (on-site)."
        },
        "4.A.1.b.1": {
            "description": "Identifying Objects, Actions, Events",
            "comment": "Digital identification vs. physical inspection."
        },
        "4.A.1.b.3": {
            "description": "Estimating Quantifiable Characteristics",
            "comment": "Can use digital tools or physical measurement."
        },
        "4.A.2.a.1": {
            "description": "Judging Qualities of Objects/Services/People",
            "comment": "Remote assessment possible (e.g., digital portfolios)."
        },
        "4.A.2.a.3": {
            "description": "Evaluating Compliance with Standards",
            "comment": "Depends on whether audits are physical or digital."
        },
        "4.A.4.a.2": {
            "description": "Communicating with Supervisors/Peers",
            "comment": "Digital communication (email/Slack) vs. in-person."
        },
        "4.A.4.a.3": {
            "description": "Communicating with External Parties",
            "comment": "Email/phone (remote) vs. in-person meetings."
        },
        "4.A.4.a.4": {
            "description": "Establishing and Maintaining Interpersonal Relationships",
            "comment": "Can be done remotely (e.g., virtual coffee chats) or in-person."
        },
        "4.A.4.a.6": {
            "description": "Selling or Influencing Others",
            "comment": "Online sales vs. in-person negotiations."
        },
        "4.A.4.a.7": {
            "description": "Resolving Conflicts and Negotiating",
            "comment": "Possible remotely (e.g., Zoom mediation) but often easier in-person."
        },
        "4.A.4.b.2": {
            "description": "Developing and Building Teams",
            "comment": "Virtual team-building vs. in-person events."
        },
        "4.A.4.b.3": {
            "description": "Training and Teaching Others",
            "comment": "Remote training (e.g., webinars) vs. in-person workshops."
        }
    }
}

# for scale in work_activities.SCALE_ID.unique():
#     scale_data = scale_reference[scale_reference["Scale ID"] == scale]
#     scale_name = scale_data.SCALE_NAME.values[0]   
#     scale_min = scale_data.MINIMUM.values[0]
#     scale_max = scale_data.MAXIMUM.values[0]
#     print(f"Scale: {scale}, Name: {scale_name}, Min: {scale_min}, Max: {scale_max}")

# Convert the work_activities data into a wide format
work_activities_long = work_activities.pivot(
            index=['ONET_SOC_CODE', 'ELEMENT_ID','RECOMMEND_SUPPRESS'],
            columns='SCALE_ID',
            values='DATA_VALUE'
        ).reset_index().merge(
# Add the NOT_RELEVANT column to the wide-format data
            work_activities.loc[work_activities.SCALE_ID == "LV", ["ONET_SOC_CODE", "ELEMENT_ID", "NOT_RELEVANT"]],
            on=['ONET_SOC_CODE', 'ELEMENT_ID']
        )

work_activities_long  = work_activities_long[
    # Filter out suppressed work activities
    (work_activities_long.RECOMMEND_SUPPRESS == "N") & 
    # Filter out not relevant work activities
    (work_activities_long.NOT_RELEVANT ==  "N")
]

# Normalize IM (1–5 → 0–1)
work_activities_long["IM_norm"] = (work_activities_long["IM"] - 1) / 4

# Normalize LV (0–7 → 0–1)
work_activities_long["LV_norm"] = work_activities_long["LV"] / 7

# Compute weighted contribution
work_activities_long["CONTRIBUTION"] = (
    work_activities_long["IM_norm"] * work_activities_long["LV_norm"]
)

# Assign a category to each work activity based on the WFH feasibility dictionary
def get_wfh_category(element_id):
    for category, elements in wfh_categories.items():
        if element_id in elements:
            return category
    return None  # Return None if the element_id is not found in any category

work_activities_long["WFH_CATEGORY"] = work_activities_long.ELEMENT_ID.apply(get_wfh_category)

# Next, create a function to calculate the WFH feasibility score for each occupation
occ_work_activity_category = work_activities_long.groupby(["ONET_SOC_CODE", "WFH_CATEGORY"]).CONTRIBUTION.sum().reset_index()
occ_work_activity_category = occ_work_activity_category.pivot(index='ONET_SOC_CODE', columns='WFH_CATEGORY', values='CONTRIBUTION').reset_index()

# Define weights for each category
weights = {
    "Digital_RemoteFriendly": 0.5,
    "Physical_OnSite": -1.0,
    "Neutral_ContextDependent": 0.1
}

# Compute the WFH feasibility score for each occupation
occ_work_activity_category["SCORE"] = (
    occ_work_activity_category["Digital_RemoteFriendly"] * weights["Digital_RemoteFriendly"] +
    occ_work_activity_category["Physical_OnSite"] * weights["Physical_OnSite"] +
    occ_work_activity_category["Neutral_ContextDependent"] * weights["Neutral_ContextDependent"]
)

occ_work_activity_category = occ_work_activity_category.merge(occ_data, on='ONET_SOC_CODE')

# Sort occupations by WFH feasibility score
occ_work_activity_category = occ_work_activity_category.sort_values("SCORE", ascending=False)

# Print the top occupations with the highest WFH feasibility scores
print("Top 10 Occupations with the Highest WFH Feasibility Scores:")
for  occ in occ_work_activity_category.head(10).TITLE.values:
    print(f"\t-{occ}")

Top 10 Occupations with the Highest WFH Feasibility Scores:
	-Chief Executives
	-Computer and Information Systems Managers
	-Bioinformatics Scientists
	-Petroleum Engineers
	-Education Administrators, Postsecondary
	-Electrical Engineers
	-Library Science Teachers, Postsecondary
	-Biological Science Teachers, Postsecondary
	-Sociology Teachers, Postsecondary
	-Anthropology and Archeology Teachers, Postsecondary


In [122]:
# # Load O*NET work_activities data
work_context = pd.read_csv('https://www.onetcenter.org/dl_files/database/db_29_1_text/Work%20Context.txt', sep='\t')

element_list = work_context["Element ID"].unique()

for el in element_list:
    print(el)
    print(content_model_refrence[content_model_refrence['Element ID'] == el]['Element Name'].values[0])
    print(content_model_refrence[content_model_refrence['Element ID'] == el]['Description'].values[0])
    print('=====================================' * 4)

4.C.1.a.2.c
Public Speaking
How often do you have to perform public speaking in this job?
4.C.1.a.2.f
Telephone
How often do you have telephone conversations in this job?
4.C.1.a.2.h
Electronic Mail
How often do you use electronic mail in this job?
4.C.1.a.2.j
Letters and Memos
How often does the job require written letters and memos?
4.C.1.a.2.l
Face-to-Face Discussions
How often do you have to have face-to-face discussions with individuals or teams in this job?
4.C.1.a.4
Contact With Others
How much does this job require the worker to be in contact with others (face-to-face, by telephone, or otherwise) in order to perform it?
4.C.1.b.1.e
Work With Work Group or Team
How important is it to work with others in a group or team in this job?
4.C.1.b.1.f
Deal With External Customers
How important is it to work with external customers or the public in this job?
4.C.1.b.1.g
Coordinate or Lead Others
How important is it to coordinate or lead others in accomplishing work activities in this job

In [120]:
work_context

Unnamed: 0,O*NET-SOC Code,Element ID,Element Name,Scale ID,Category,Data Value,N,Standard Error,Lower CI Bound,Upper CI Bound,Recommend Suppress,Not Relevant,Date,Domain Source
0,11-1011.00,4.C.1.a.2.c,Public Speaking,CX,,3.07,37.0,0.2851,2.4923,3.6486,N,,08/2023,Incumbent
1,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,1.0,0.13,37.0,0.1370,0.0160,1.0770,N,,08/2023,Incumbent
2,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,2.0,39.49,37.0,11.0101,20.4073,62.4299,N,,08/2023,Incumbent
3,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,3.0,33.07,37.0,7.1359,20.4456,48.7245,N,,08/2023,Incumbent
4,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,4.0,7.79,37.0,4.3613,2.4093,22.4457,N,,08/2023,Incumbent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291196,53-7121.00,4.C.3.d.4,Work Schedules,CTP,3.0,0.89,32.0,0.9750,0.0947,7.8674,N,,08/2019,Incumbent
291197,53-7121.00,4.C.3.d.8,Duration of Typical Work Week,CT,,2.82,32.0,0.0983,2.6238,3.0000,N,,08/2019,Incumbent
291198,53-7121.00,4.C.3.d.8,Duration of Typical Work Week,CTP,1.0,0.00,32.0,0.0000,,,N,,08/2019,Incumbent
291199,53-7121.00,4.C.3.d.8,Duration of Typical Work Week,CTP,2.0,17.56,32.0,9.8350,5.0617,45.9881,N,,08/2019,Incumbent


# Work Activities and Work Context

In [475]:
work_activities = pd.read_csv(os.path.join(DATA_DIR, 'WORK_ACTIVITIES.csv'))

print("Work Activities Data:")
# display(work_activities.head())

print("==" * 50)

scale_ids = work_activities.SCALE_ID.unique()

for scale in scale_ids:
    scale_data = scale_reference[scale_reference.SCALE_ID == scale]
    scale_name = scale_data.SCALE_NAME.values[0]
    scale_min = scale_data.MINIMUM.values[0]
    scale_max = scale_data.MAXIMUM.values[0]
    print(f"Scale: {scale}, Name: {scale_name}, Min: {scale_min}, Max: {scale_max}")
    print("==" * 50, end="\n\n")
# work_context = pd.read_csv("https://www.onetcenter.org/dl_files/database/db_29_1_text/Work%20Context.txt", sep='\t')
work_context_categories = pd.read_csv("https://www.onetcenter.org/dl_files/database/db_29_1_text/Work%20Context%20Categories.txt", sep='\t')
work_context.rename(columns = lambda x: x.replace(' ', '_').upper(), inplace=True)
work_context_categories.rename(columns = lambda x: x.replace(' ', '_').upper(), inplace=True)

print("\n\n" + "##" * 50 + "\n\n")

print("Work Context Data:")
# display(work_context.head())

print("==" * 50)

wc_elements = work_context.ELEMENT_ID.unique()

for el in wc_elements:
    el_data = work_context[work_context['ELEMENT_ID'] == el]
    el_name = el_data['ELEMENT_NAME'].values[0]
    print(f"Element: {el}, Name: {el_name}")
    el_scales = el_data.SCALE_ID.unique()
    for el_s in el_scales:
        el_scale_name = scale_reference[scale_reference["SCALE_ID"] == el_s].SCALE_NAME.values[0]
        el_scale_min = scale_reference[scale_reference["SCALE_ID"] == el_s].MINIMUM.values[0]
        el_scale_max = scale_reference[scale_reference["SCALE_ID"] == el_s].MAXIMUM.values[0]
        print(f"\tScale: {el_s}: {el_scale_name}, Min: {el_scale_min}, Max: {el_scale_max}")
        el_data_scale = work_context_categories[(work_context_categories['ELEMENT_ID'] == el) & (work_context_categories['SCALE_ID'] == el_s)]
        if len(el_data_scale) > 0:
            el_data_scale_cat = " || ".join([f"{row[1]['CATEGORY']}: {row[1]['CATEGORY_DESCRIPTION']}" for row in el_data_scale.iterrows()])
            print(f"\t\t{el_data_scale_cat}")
    print("==" * 50, end="\n\n")

Work Activities Data:
Scale: IM, Name: Importance, Min: 1, Max: 5

Scale: LV, Name: Level, Min: 0, Max: 7



####################################################################################################


Work Context Data:
Element: 4.C.1.a.2.c, Name: Public Speaking
	Scale: CX: Context, Min: 1, Max: 5
	Scale: CXP: Context (Categories 1-5), Min: 0, Max: 100
		1: Never || 2: Once a year or more but not every month || 3: Once a month or more but not every week || 4: Once a week or more but not every day || 5: Every day

Element: 4.C.1.a.2.f, Name: Telephone
	Scale: CX: Context, Min: 1, Max: 5
	Scale: CXP: Context (Categories 1-5), Min: 0, Max: 100
		1: Never || 2: Once a year or more but not every month || 3: Once a month or more but not every week || 4: Once a week or more but not every day || 5: Every day

Element: 4.C.1.a.2.h, Name: Electronic Mail
	Scale: CX: Context, Min: 1, Max: 5
	Scale: CXP: Context (Categories 1-5), Min: 0, Max: 100
		1: Never || 2: Once a year or more bu

In [545]:
for el in work_activities.ELEMENT_ID.unique():
    el_name, el_desc = content_model_refrence.loc[content_model_refrence.ELEMENT_ID == el, ["ELEMENT_NAME", "DESCRIPTION"]].values[0]
    print(f"Element: {el}, Name: {el_name}")
    print(f"\tDescription: {el_desc}")
    print("==" * 50, end="\n\n")

Element: 4.A.1.a.1, Name: Getting Information
	Description: Observing, receiving, and otherwise obtaining information from all relevant sources.

Element: 4.A.1.a.2, Name: Monitoring Processes, Materials, or Surroundings
	Description: Monitoring and reviewing information from materials, events, or the environment, to detect or assess problems.

Element: 4.A.1.b.1, Name: Identifying Objects, Actions, and Events
	Description: Identifying information by categorizing, estimating, recognizing differences or similarities, and detecting changes in circumstances or events.

Element: 4.A.1.b.2, Name: Inspecting Equipment, Structures, or Materials
	Description: Inspecting equipment, structures, or materials to identify the cause of errors or other problems or defects.

Element: 4.A.1.b.3, Name: Estimating the Quantifiable Characteristics of Products, Events, or Information
	Description: Estimating sizes, distances, and quantities; or determining time, costs, resources, or materials needed to per

In [546]:
for el in work_context.ELEMENT_ID.unique():
    el_name, el_desc = content_model_refrence.loc[content_model_refrence.ELEMENT_ID == el, ["ELEMENT_NAME", "DESCRIPTION"]].values[0]
    print(f"Element: {el}, Name: {el_name}")
    print(f"\tDescription: {el_desc}")
    print("==" * 50, end="\n\n")

Element: 4.C.1.a.2.c, Name: Public Speaking
	Description: How often do you have to perform public speaking in this job?

Element: 4.C.1.a.2.f, Name: Telephone
	Description: How often do you have telephone conversations in this job?

Element: 4.C.1.a.2.h, Name: Electronic Mail
	Description: How often do you use electronic mail in this job?

Element: 4.C.1.a.2.j, Name: Letters and Memos
	Description: How often does the job require written letters and memos?

Element: 4.C.1.a.2.l, Name: Face-to-Face Discussions
	Description: How often do you have to have face-to-face discussions with individuals or teams in this job?

Element: 4.C.1.a.4, Name: Contact With Others
	Description: How much does this job require the worker to be in contact with others (face-to-face, by telephone, or otherwise) in order to perform it?

Element: 4.C.1.b.1.e, Name: Work With Work Group or Team
	Description: How important is it to work with others in a group or team in this job?

Element: 4.C.1.b.1.f, Name: Deal W

## Abilities and Skills to Work Activities and Work Context
 

In [402]:
ability_2_wc = pd.read_csv(DATA_DIR_REFERENCE + 'ABILITIES_TO_WORK_CONTEXT.CSV')
print("Abilities to Work Context Data:")
display(ability_2_wc.head())
ability_2_wa = pd.read_csv(DATA_DIR_REFERENCE + 'ABILITIES_TO_WORK_ACTIVITIES.CSV')
print("Abilities to Work Activities Data:")
display(ability_2_wa.head())
skill_2_wc = pd.read_csv(DATA_DIR_REFERENCE + 'SKILLS_TO_WORK_CONTEXT.CSV')
print("Skills to Work Context Data:")
display(skill_2_wc.head())
skill_2_wa = pd.read_csv(DATA_DIR_REFERENCE + 'SKILLS_TO_WORK_ACTIVITIES.CSV')
print("Skills to Work Activities Data:")
display(skill_2_wa.head())


Abilities to Work Context Data:


Unnamed: 0,ABILITIES_ELEMENT_ID,ABILITIES_ELEMENT_NAME,WORK_CONTEXT_ELEMENT_ID,WORK_CONTEXT_ELEMENT_NAME
0,1.A.1.a.1,Oral Comprehension,4.C.1.a.2.c,Public Speaking
1,1.A.1.a.1,Oral Comprehension,4.C.1.a.2.f,Telephone
2,1.A.1.a.1,Oral Comprehension,4.C.1.a.2.l,Face-to-Face Discussions
3,1.A.1.a.1,Oral Comprehension,4.C.1.a.4,Contact With Others
4,1.A.1.a.1,Oral Comprehension,4.C.1.b.1.e,Work With Work Group or Team


Abilities to Work Activities Data:


Unnamed: 0,ABILITIES_ELEMENT_ID,ABILITIES_ELEMENT_NAME,WORK_ACTIVITIES_ELEMENT_ID,WORK_ACTIVITIES_ELEMENT_NAME
0,1.A.1.a.1,Oral Comprehension,4.A.1.a.1,Getting Information
1,1.A.1.a.1,Oral Comprehension,4.A.1.a.2,"Monitoring Processes, Materials, or Surroundings"
2,1.A.1.a.1,Oral Comprehension,4.A.1.b.1,"Identifying Objects, Actions, and Events"
3,1.A.1.a.1,Oral Comprehension,4.A.2.a.1,"Judging the Qualities of Objects, Services, or..."
4,1.A.1.a.1,Oral Comprehension,4.A.2.a.2,Processing Information


Skills to Work Context Data:


Unnamed: 0,SKILLS_ELEMENT_ID,SKILLS_ELEMENT_NAME,WORK_CONTEXT_ELEMENT_ID,WORK_CONTEXT_ELEMENT_NAME
0,2.A.1.a,Reading Comprehension,4.C.1.a.2.h,Electronic Mail
1,2.A.1.b,Active Listening,4.C.1.a.2.c,Public Speaking
2,2.A.1.b,Active Listening,4.C.1.a.2.f,Telephone
3,2.A.1.b,Active Listening,4.C.1.a.2.l,Face-to-Face Discussions
4,2.A.1.b,Active Listening,4.C.1.a.4,Contact With Others


Skills to Work Activities Data:


Unnamed: 0,SKILLS_ELEMENT_ID,SKILLS_ELEMENT_NAME,WORK_ACTIVITIES_ELEMENT_ID,WORK_ACTIVITIES_ELEMENT_NAME
0,2.A.1.a,Reading Comprehension,4.A.1.a.1,Getting Information
1,2.A.1.a,Reading Comprehension,4.A.1.a.2,"Monitoring Processes, Materials, or Surroundings"
2,2.A.1.a,Reading Comprehension,4.A.1.b.1,"Identifying Objects, Actions, and Events"
3,2.A.1.a,Reading Comprehension,4.A.2.a.1,"Judging the Qualities of Objects, Services, or..."
4,2.A.1.a,Reading Comprehension,4.A.2.a.2,Processing Information


In [472]:
set(ability_2_wa.WORK_ACTIVITIES_ELEMENT_NAME).intersection(set(skill_2_wa.WORK_ACTIVITIES_ELEMENT_NAME))

{'Analyzing Data or Information',
 'Assisting and Caring for Others',
 'Coaching and Developing Others',
 'Communicating with People Outside the Organization',
 'Communicating with Supervisors, Peers, or Subordinates',
 'Controlling Machines and Processes',
 'Coordinating the Work and Activities of Others',
 'Developing Objectives and Strategies',
 'Developing and Building Teams',
 'Documenting/Recording Information',
 'Drafting, Laying Out, and Specifying Technical Devices, Parts, and Equipment',
 'Establishing and Maintaining Interpersonal Relationships',
 'Estimating the Quantifiable Characteristics of Products, Events, or Information',
 'Evaluating Information to Determine Compliance with Standards',
 'Getting Information',
 'Guiding, Directing, and Motivating Subordinates',
 'Identifying Objects, Actions, and Events',
 'Inspecting Equipment, Structures, or Materials',
 'Interpreting the Meaning of Information for Others',
 'Judging the Qualities of Objects, Services, or People',
 

In [474]:
print("Two activities example:", end="\n\n")
acts = ["4.A.2.a.4", "4.A.3.b.4"]

for act in acts:
    act_data = ability_2_wa.loc[ability_2_wa.WORK_ACTIVITIES_ELEMENT_ID == act]
    skill_data = skill_2_wa.loc[skill_2_wa.WORK_ACTIVITIES_ELEMENT_ID == act]


    print(f"Activity 1: {act_data.WORK_ACTIVITIES_ELEMENT_ID.values[0]} - {act_data.WORK_ACTIVITIES_ELEMENT_NAME.values[0]}")
    act_1_abilities = [f"{row[1]['ABILITIES_ELEMENT_ID']}: {row[1]['ABILITIES_ELEMENT_NAME']}" for row in act_data.iterrows()]
    skill_1_skills = [f"{row[1]['SKILLS_ELEMENT_ID']}: {row[1]['SKILLS_ELEMENT_NAME']}" for row in skill_data.iterrows()]

    print("\tAbilities:")
    for a in act_1_abilities:
        print(f"\t\t{a}")
    print("\tSkills:")
    for s in skill_1_skills:
        print(f"\t\t{s}")
    
    print("==" * 50, end="\n\n")


Two activities example:

Activity 1: 4.A.2.a.4 - Analyzing Data or Information
	Abilities:
		1.A.1.a.2: Written Comprehension
		1.A.1.b.4: Deductive Reasoning
		1.A.1.b.5: Inductive Reasoning
		1.A.1.b.6: Information Ordering
		1.A.1.b.7: Category Flexibility
		1.A.1.g.1: Selective Attention
	Skills:
		2.A.1.a: Reading Comprehension
		2.A.2.a: Critical Thinking

Activity 1: 4.A.3.b.4 - Repairing and Maintaining Mechanical Equipment
	Abilities:
		1.A.1.b.3: Problem Sensitivity
		1.A.1.b.4: Deductive Reasoning
		1.A.1.b.5: Inductive Reasoning
		1.A.1.b.6: Information Ordering
		1.A.1.d.1: Memorization
		1.A.1.e.3: Perceptual Speed
		1.A.1.f.2: Visualization
		1.A.2.a.1: Arm-Hand Steadiness
		1.A.2.a.2: Manual Dexterity
		1.A.2.a.3: Finger Dexterity
		1.A.2.b.1: Control Precision
		1.A.3.c.1: Extent Flexibility
		1.A.4.a.1: Near Vision
	Skills:
		2.A.2.a: Critical Thinking
		2.B.2.i: Complex Problem Solving
		2.B.3.c: Equipment Selection
		2.B.3.g: Operations Monitoring
		2.B.3.j: Equipme

In [470]:
print("Two context example:", end="\n\n")
contx = ["4.C.1.a.2.f", "4.C.1.a.2.l"]

for cont in contx:
    act_data = ability_2_wc.loc[ability_2_wc.WORK_CONTEXT_ELEMENT_ID == cont]
    skill_data = skill_2_wc.loc[ability_2_wc.WORK_CONTEXT_ELEMENT_ID == cont]


    print(f"Activity 1: {act_data.WORK_CONTEXT_ELEMENT_ID.values[0]} - {act_data.WORK_CONTEXT_ELEMENT_NAME.values[0]}")
    act_1_abilities = [f"{row[1]['ABILITIES_ELEMENT_ID']}: {row[1]['ABILITIES_ELEMENT_NAME']}" for row in act_data.iterrows()]
    skill_1_skills = [f"{row[1]['SKILLS_ELEMENT_ID']}: {row[1]['SKILLS_ELEMENT_NAME']}" for row in skill_data.iterrows()]

    print("\tAbilities:")
    for a in act_1_abilities:
        print(f"\t\t{a}")
    print("\tSkills:")
    for s in skill_1_skills:
        print(f"\t\t{s}")
    
    print("==" * 50, end="\n\n")

Two context example:

Activity 1: 4.C.1.a.2.f - Telephone
	Abilities:
		1.A.1.a.1: Oral Comprehension
		1.A.1.a.3: Oral Expression
		1.A.4.b.4: Speech Recognition
		1.A.4.b.5: Speech Clarity
	Skills:
		2.A.1.b: Active Listening
		2.A.1.d: Speaking

Activity 1: 4.C.1.a.2.l - Face-to-Face Discussions
	Abilities:
		1.A.1.a.1: Oral Comprehension
		1.A.1.a.3: Oral Expression
		1.A.1.b.3: Problem Sensitivity
		1.A.1.g.1: Selective Attention
		1.A.4.b.4: Speech Recognition
		1.A.4.b.5: Speech Clarity
	Skills:
		2.A.1.b: Active Listening
		2.A.1.d: Speaking
		2.A.2.a: Critical Thinking
		2.B.1.c: Persuasion



# Tools and Technology

In [479]:
tech_data = pd.read_csv(DATA_DIR + 'TECHNOLOGY_SKILLS.CSV')
print("Technology Skills Data:")
display(tech_data.head())
tools_data = pd.read_csv(DATA_DIR + 'TOOLS_USED.CSV')
print("Tools Used Data:")
display(tools_data.head())
unspsc_data = pd.read_csv(DATA_DIR_REFERENCE + 'UNSPSC_REFERENCE.CSV')
print("UNSPSC Reference Data:")
display(unspsc_data.head())

Technology Skills Data:


Unnamed: 0,ONET_SOC_CODE,EXAMPLE,COMMODITY_CODE,COMMODITY_TITLE,HOT_TECHNOLOGY,IN_DEMAND
0,11-1011.00,Adobe Acrobat,43232202,Document management software,Y,N
1,11-1011.00,AdSense Tracker,43232306,Data base user interface and query software,N,N
2,11-1011.00,Atlassian JIRA,43232201,Content workflow software,Y,N
3,11-1011.00,Blackbaud The Raiser's Edge,43232303,Customer relationship management CRM software,N,N
4,11-1011.00,ComputerEase construction accounting software,43231601,Accounting software,N,N


Tools Used Data:


Unnamed: 0,ONET_SOC_CODE,EXAMPLE,COMMODITY_CODE,COMMODITY_TITLE
0,11-1011.00,10-key calculators,44101809,Desktop calculator
1,11-1011.00,Desktop computers,43211507,Desktop computers
2,11-1011.00,Laptop computers,43211503,Notebook computers
3,11-1011.00,Personal computers,43211508,Personal computers
4,11-1011.00,Personal digital assistants PDA,43211504,Personal digital assistant PDAs or organizers


UNSPSC Reference Data:


Unnamed: 0,COMMODITY_CODE,COMMODITY_TITLE,CLASS_CODE,CLASS_TITLE,FAMILY_CODE,FAMILY_TITLE,SEGMENT_CODE,SEGMENT_TITLE
0,10111302,Pet grooming products,10111300,Domestic pet treatments and accessories and eq...,10110000,Domestic pet products,10000000,Live Plant and Animal Material and Accessories...
1,10111306,Domestic pet training kits,10111300,Domestic pet treatments and accessories and eq...,10110000,Domestic pet products,10000000,Live Plant and Animal Material and Accessories...
2,10131601,Cages or its accessories,10131600,Animal containment,10130000,Animal containment and habitats,10000000,Live Plant and Animal Material and Accessories...
3,10131602,Kennels,10131600,Animal containment,10130000,Animal containment and habitats,10000000,Live Plant and Animal Material and Accessories...
4,10131605,Animal transport cage,10131600,Animal containment,10130000,Animal containment and habitats,10000000,Live Plant and Animal Material and Accessories...


In [484]:
work_activities

Unnamed: 0,ONET_SOC_CODE,ELEMENT_ID,ELEMENT_NAME,SCALE_ID,DATA_VALUE,N,STANDARD_ERROR,LOWER_CI_BOUND,UPPER_CI_BOUND,RECOMMEND_SUPPRESS,NOT_RELEVANT,DATE,DOMAIN_SOURCE
0,11-1011.00,4.A.1.a.1,Getting Information,IM,4.56,29.0,0.1559,4.2369,4.8756,N,,08/2023,Incumbent
1,11-1011.00,4.A.1.a.1,Getting Information,LV,4.89,30.0,0.1727,4.5393,5.2458,N,N,08/2023,Incumbent
2,11-1011.00,4.A.1.a.2,"Monitoring Processes, Materials, or Surroundings",IM,4.25,30.0,0.2125,3.8130,4.6823,N,,08/2023,Incumbent
3,11-1011.00,4.A.1.a.2,"Monitoring Processes, Materials, or Surroundings",LV,5.21,30.0,0.3872,4.4133,5.9971,N,N,08/2023,Incumbent
4,11-1011.00,4.A.1.b.1,"Identifying Objects, Actions, and Events",IM,4.23,29.0,0.1544,3.9180,4.5507,N,,08/2023,Incumbent
...,...,...,...,...,...,...,...,...,...,...,...,...,...
72073,53-7121.00,4.A.4.c.1,Performing Administrative Activities,LV,2.27,27.0,0.3184,1.6108,2.9197,N,N,08/2019,Incumbent
72074,53-7121.00,4.A.4.c.2,Staffing Organizational Units,IM,1.93,27.0,0.2132,1.4962,2.3726,N,,08/2019,Incumbent
72075,53-7121.00,4.A.4.c.2,Staffing Organizational Units,LV,1.60,27.0,0.2965,0.9936,2.2125,N,N,08/2019,Incumbent
72076,53-7121.00,4.A.4.c.3,Monitoring and Controlling Resources,IM,2.56,27.0,0.2582,2.0266,3.0881,N,,08/2019,Incumbent


In [495]:
merged = work_activities[work_activities['SCALE_ID'] == 'IM'].pivot(
    index='ONET_SOC_CODE',
    columns='ELEMENT_ID',
    values='DATA_VALUE'
).reset_index().fillna(0)

In [496]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Separate features and scale
X = merged.drop("ONET_SOC_CODE", axis=1)
X_scaled = StandardScaler().fit_transform(X)

# Retain 95% of variance
pca = PCA(n_components=0.95)
X_pca = pca.fit_transform(X_scaled)

In [521]:
from sklearn.cluster import KMeans

# Cluster into 3 groups
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_pca)

# Add clusters to merged data
merged["Teleworkability_Cluster"] = clusters

# Get cluster centroids in original feature space
centroids = kmeans.cluster_centers_ @ pca.components_  # Reverse PCA transformation

# Create a DataFrame to show top work activities driving each cluster
cluster_features = pd.DataFrame(centroids, columns=X.columns)

cluster_features.rename(columns=lambda x : x + " : " + model_element_name_dict[x], inplace=True)

# Example: Top 5 work activities for Cluster 0 (Low Remote Potential)
for i in range(3):
    cluster0_top = cluster_features.iloc[i].abs().sort_values(ascending=False).head(10).index.tolist()
    print(f"Cluster {i} Key Work Activities:")
    for feature in cluster0_top:
        print(f"\t- {feature}")

Cluster 0 Key Work Activities:
	- 4.A.4.b.4 : Guiding, Directing, and Motivating Subordinates
	- 4.A.4.b.2 : Developing and Building Teams
	- 4.A.4.c.2 : Staffing Organizational Units
	- 4.A.4.b.1 : Coordinating the Work and Activities of Others
	- 4.A.2.b.5 : Scheduling Work and Activities
	- 4.A.4.c.3 : Monitoring and Controlling Resources
	- 4.A.4.b.6 : Providing Consultation and Advice to Others
	- 4.A.2.b.1 : Making Decisions and Solving Problems
	- 4.A.2.b.4 : Developing Objectives and Strategies
	- 4.A.4.b.5 : Coaching and Developing Others
Cluster 1 Key Work Activities:
	- 4.A.3.b.1 : Working with Computers
	- 4.A.2.b.3 : Updating and Using Relevant Knowledge
	- 4.A.4.a.1 : Interpreting the Meaning of Information for Others
	- 4.A.2.b.6 : Organizing, Planning, and Prioritizing Work
	- 4.A.4.a.3 : Communicating with People Outside the Organization
	- 4.A.2.a.4 : Analyzing Data or Information
	- 4.A.4.b.6 : Providing Consultation and Advice to Others
	- 4.A.4.a.4 : Establishing a

In [520]:
# Rank clusters by their first PCA component (assuming PC1 = "remote friendliness")
cluster_scores = {0: 1, 1: 3, 2: 5}  # Adjust based on centroid analysis
merged["Teleworkability_Score"] = merged["Teleworkability_Cluster"].map(cluster_scores)
# Add occupation titles
merged

ELEMENT_ID,ONET_SOC_CODE,4.A.1.a.1,4.A.1.a.2,4.A.1.b.1,4.A.1.b.2,4.A.1.b.3,4.A.2.a.1,4.A.2.a.2,4.A.2.a.3,4.A.2.a.4,...,4.A.4.b.2,4.A.4.b.3,4.A.4.b.4,4.A.4.b.5,4.A.4.b.6,4.A.4.c.1,4.A.4.c.2,4.A.4.c.3,Teleworkability_Cluster,Teleworkability_Score
0,11-1011.00,4.56,4.25,4.23,2.51,3.26,4.18,4.46,4.49,4.38,...,4.63,4.01,4.74,4.71,3.95,4.02,3.78,4.11,0,1
1,11-1011.03,4.78,3.48,3.85,2.11,3.54,3.85,3.96,3.67,4.22,...,4.19,3.63,3.78,3.93,3.93,3.30,3.26,3.88,0,1
2,11-1021.00,4.42,3.91,4.22,2.62,3.23,4.10,4.08,3.59,4.04,...,3.81,3.41,4.20,3.91,3.66,3.40,3.70,3.86,0,1
3,11-2011.00,4.32,2.78,3.60,1.77,2.79,3.37,3.51,2.35,3.26,...,2.96,2.74,2.98,2.62,2.31,3.11,2.29,2.52,2,5
4,11-2021.00,4.33,3.27,4.24,1.45,3.59,3.37,3.36,2.61,3.75,...,4.08,3.14,3.84,3.36,3.18,2.59,2.25,3.18,2,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874,53-7071.00,4.38,4.23,4.34,4.18,3.36,3.64,3.90,4.21,3.68,...,3.01,3.73,3.28,2.98,2.42,2.75,1.84,2.99,1,3
875,53-7072.00,4.23,4.47,4.61,4.90,3.91,3.62,3.91,3.78,3.96,...,2.82,3.15,2.68,2.86,2.78,2.52,2.18,3.02,0,1
876,53-7073.00,3.54,4.11,4.06,4.34,3.69,3.00,3.79,3.67,3.70,...,3.24,3.48,2.88,3.16,2.81,2.53,1.94,3.19,1,3
877,53-7081.00,3.17,3.13,2.52,3.38,1.92,2.28,1.80,3.01,1.61,...,2.38,1.69,1.92,2.10,1.72,1.72,1.00,1.39,1,3


# Tasks

In [376]:
work_context

Unnamed: 0,O*NET-SOC_CODE,ELEMENT_ID,ELEMENT_NAME,SCALE_ID,CATEGORY,DATA_VALUE,N,STANDARD_ERROR,LOWER_CI_BOUND,UPPER_CI_BOUND,RECOMMEND_SUPPRESS,NOT_RELEVANT,DATE,DOMAIN_SOURCE
0,11-1011.00,4.C.1.a.2.c,Public Speaking,CX,,3.07,37.0,0.2851,2.4923,3.6486,N,,08/2023,Incumbent
1,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,1.0,0.13,37.0,0.1370,0.0160,1.0770,N,,08/2023,Incumbent
2,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,2.0,39.49,37.0,11.0101,20.4073,62.4299,N,,08/2023,Incumbent
3,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,3.0,33.07,37.0,7.1359,20.4456,48.7245,N,,08/2023,Incumbent
4,11-1011.00,4.C.1.a.2.c,Public Speaking,CXP,4.0,7.79,37.0,4.3613,2.4093,22.4457,N,,08/2023,Incumbent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291196,53-7121.00,4.C.3.d.4,Work Schedules,CTP,3.0,0.89,32.0,0.9750,0.0947,7.8674,N,,08/2019,Incumbent
291197,53-7121.00,4.C.3.d.8,Duration of Typical Work Week,CT,,2.82,32.0,0.0983,2.6238,3.0000,N,,08/2019,Incumbent
291198,53-7121.00,4.C.3.d.8,Duration of Typical Work Week,CTP,1.0,0.00,32.0,0.0000,,,N,,08/2019,Incumbent
291199,53-7121.00,4.C.3.d.8,Duration of Typical Work Week,CTP,2.0,17.56,32.0,9.8350,5.0617,45.9881,N,,08/2019,Incumbent


In [149]:
print("=*" * 50)
task_ratings = pd.read_csv(DATA_DIR + "TASK_RATINGS.csv")
# print("Task Ratings:")
# display(task_ratings.head())
task_statements = pd.read_csv(DATA_DIR + "TASK_STATEMENTS.csv")
# print("Task Statements:")
# display(task_statements.head())
task_categories = pd.read_csv(DATA_DIR_REFERENCE + "TASK_CATEGORIES.csv")
# print("Task Categories:")
# display(task_categories.head())
scale_ids = task_ratings.SCALE_ID.unique()
for scale in scale_ids:
    scale_data = scale_reference[scale_reference["Scale ID"] == scale]
    scale_name = scale_data.SCALE_NAME.values[0]
    scale_min = scale_data.MINIMUM.values[0]
    scale_max = scale_data.MAXIMUM.values[0]
    print(f"Scale: {scale}, Name: {scale_name}, Min: {scale_min}, Max: {scale_max}")
    scale_cat = task_categories[task_categories["SCALE_ID"] == scale]
    if len(scale_cat) > 0:
        print("Categories:")
        for row in scale_cat.iterrows():
            print(f"\t{row[1]['CATEGORY']}: {row[1]['CATEGORY_DESCRIPTION']}")
    print("=*" * 50, end="\n\n")
    

=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*
Scale: FT, Name: Frequency of Task (Categories 1-7), Min: 0, Max: 100
Categories:
	1: Yearly or less
	2: More than yearly
	3: More than monthly
	4: More than weekly
	5: Daily
	6: Several times daily
	7: Hourly or more
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*

Scale: IM, Name: Importance, Min: 1, Max: 5
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*

Scale: RT, Name: Relevance of Task, Min: 0, Max: 100
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*



In [185]:
tasks_2_dwas = pd.read_csv(DATA_DIR_REFERENCE + "TASKS_TO_DWAS.csv")
# print("Task to DWAs:")
# display(tasks_2_dwas.head())
dwas_data = pd.read_csv(DATA_DIR_REFERENCE + "DWA_REFERENCE.csv")
# print("DWAs:")
# display(dwas.head())
iwas_data = pd.read_csv(DATA_DIR_REFERENCE + "IWA_REFERENCE.csv")
# print("IWAs:")
# display(iwas.head())
content_model_refrence = pd.read_csv("https://www.onetcenter.org/dl_files/database/db_29_1_text/Content%20Model%20Reference.txt", sep='\t')
content_model_refrence.rename(columns= lambda x: x.replace(" ", "_").upper(), inplace=True)
# print("Content Model Reference:")
# display(content_model_refrence.head())
dwas = dwas_data.DWA_ID.unique()
iwas = dwas_data.IWA_ID.unique()
gwa = dwas_data.ELEMENT_ID.unique()

print("Number of DWAs:", len(dwas))
print("Number of IWAs:", len(iwas))
print("Number of GWAs:", len(gwa))

for el in gwa:
    print(el)
    print(content_model_refrence[content_model_refrence['ELEMENT_ID'] == el]['ELEMENT_NAME'].values[0])
    print(content_model_refrence[content_model_refrence['ELEMENT_ID'] == el]['DESCRIPTION'].values[0])
    el_iwas = iwas_data[iwas_data['ELEMENT_ID'] == el]
    print("IWAs:")
    for iwa in el_iwas.IWA_ID.unique():
        for row in el_iwas.iterrows():
            print(f'\t- {row[1]["IWA_ID"]}: {row[1]["IWA_TITLE"]}')
        
    print('=====================================' * 4)

Number of DWAs: 2087
Number of IWAs: 332
Number of GWAs: 37
4.A.1.a.1
Getting Information
Observing, receiving, and otherwise obtaining information from all relevant sources.
IWAs:
	- 4.A.1.a.1.I01: Study details of artistic productions.
	- 4.A.1.a.1.I02: Read documents or materials to inform work processes.
	- 4.A.1.a.1.I03: Investigate criminal or legal matters.
	- 4.A.1.a.1.I04: Gather information from physical or electronic sources.
	- 4.A.1.a.1.I05: Consult legal materials or public records.
	- 4.A.1.a.1.I06: Gather data about operational or development activities.
	- 4.A.1.a.1.I07: Obtain information about goods or services.
	- 4.A.1.a.1.I08: Research issues related to earth sciences.
	- 4.A.1.a.1.I09: Research organizational behavior, processes, or performance.
	- 4.A.1.a.1.I10: Investigate the environmental impact of industrial or development activities.
	- 4.A.1.a.1.I11: Gather information for news stories.
	- 4.A.1.a.1.I12: Collect information about patients or clients.
	- 4.

In [None]:
t_dwa = tasks_2_dwas[tasks_2_dwas['TASK_ID'] == t].DWA_ID.values[0]
t_gwa, t_iwa  = dwas_data.loc[dwas_data['DWA_ID'] ==t_dwa, ["ELEMENT_ID",	"IWA_ID"]].values[0]
t_dwa_name = dwas_data[dwas_data['DWA_ID'] == t_dwa]['DWA_TITLE'].values[0]
t_iwa = iwas_data[iwas_data['IWA_ID'] == t_iwa]['IWA_TITLE'].values[0]
t_gwa = content_model_refrence[content_model_refrence['ELEMENT_ID'] == t_gwa]['ELEMENT_NAME'].values[0]

In [269]:
occupation_tasks_data_freq = occupation_tasks_data.loc[occupation_tasks_data.SCALE_ID == "FT", ["TASK_ID", "CATEGORY", "DATA_VALUE"]].pivot(
    index='TASK_ID', columns='CATEGORY', values='DATA_VALUE'
).reset_index().rename(columns = dict(zip(task_categories["CATEGORY"], task_categories["CATEGORY_DESCRIPTION"])))

occupation_tasks_data_freq[occupation_tasks_data_freq.TASK_ID == t].iloc[0, 1:].to_dict()

{'Yearly or less': 16.67,
 'More than yearly': 33.33,
 'More than monthly': 22.22,
 'More than weekly': 16.67,
 'Daily': 11.11,
 'Several times daily': 0.0,
 'Hourly or more': 0.0}

In [275]:
print("Occupation Task Example:", end="\n\n")

occupation = "15-2051.01"
occupation_name = occ_data[occ_data.ONET_SOC_CODE == occupation].TITLE.values[0]
print(f"Occupation: {occupation}, {occupation_name}", end="\n\n")
occupation_tasks_data = task_ratings[task_ratings.ONET_SOC_CODE == occupation]
occupation_tasks = occupation_tasks_data.TASK_ID.unique()

occupation_tasks_data_freq = occupation_tasks_data.loc[occupation_tasks_data.SCALE_ID == "FT", ["TASK_ID", "CATEGORY", "DATA_VALUE"]].pivot(
    index='TASK_ID', columns='CATEGORY', values='DATA_VALUE'
).reset_index().rename(columns = dict(zip(task_categories["CATEGORY"], task_categories["CATEGORY_DESCRIPTION"])))

for t in occupation_tasks:
    t_data = task_statements[task_statements.TASK_ID == t]
    t_name = t_data.TASK.values[0]
    t_type = t_data.TASK_TYPE.values[0]
    t_importance = occupation_tasks_data[(occupation_tasks_data.TASK_ID == t) & (occupation_tasks_data.SCALE_ID == "IM")].DATA_VALUE.values[0]
    t_relevance = occupation_tasks_data[(occupation_tasks_data.TASK_ID == t) & (occupation_tasks_data.SCALE_ID == "RT")].DATA_VALUE.values[0]
    t_dwa = tasks_2_dwas[tasks_2_dwas['TASK_ID'] == t].DWA_ID.values[0]
    t_gwa, t_iwa  = dwas_data.loc[dwas_data['DWA_ID'] ==t_dwa, ["ELEMENT_ID",	"IWA_ID"]].values[0]
    t_dwa_name = dwas_data[dwas_data['DWA_ID'] == t_dwa]['DWA_TITLE'].values[0]
    t_iwa_name = iwas_data[iwas_data['IWA_ID'] == t_iwa]['IWA_TITLE'].values[0]
    t_gwa_name = content_model_refrence[content_model_refrence['ELEMENT_ID'] == t_gwa]['ELEMENT_NAME'].values[0]
    
    print(f"\t- Task: {t} ({t_type}): {t_name}")
    print(f"\t\tImportance: {t_importance}", f"\tRelevance: {t_relevance}")
    print(f"\t\tFrequencies:")
    t_freq = occupation_tasks_data_freq[occupation_tasks_data_freq.TASK_ID == t].iloc[0, 1:].to_dict()
    for cat, freq in t_freq.items():
        print(f"\t\t\t{cat}: {freq}")
    print("=====================================" * 4)
    print(f"\t\tGWA: {t_gwa} ({t_gwa_name})")
    print(f"\t\t\tIWA: {t_iwa} ({t_iwa_name})")
    print(f"\t\t\t\tDWA: {t_dwa} ({t_dwa_name})")
    print("=====================================" * 4)
    print("\n")

Occupation Task Example:

Occupation: 15-2051.01, Business Intelligence Analysts

	- Task: 16150 (Core): Generate standard or custom reports summarizing business, financial, or economic data for review by executives, managers, clients, and other stakeholders.
		Importance: 4.64 	Relevance: 100.0
		Frequencies:
			Yearly or less: 0.0
			More than yearly: 4.55
			More than monthly: 13.64
			More than weekly: 31.82
			Daily: 18.18
			Several times daily: 13.64
			Hourly or more: 18.18
		GWA: 4.A.3.b.6 (Documenting/Recording Information)
			IWA: 4.A.3.b.6.I03 (Present research or technical information.)
				DWA: 4.A.3.b.6.I03.D05 (Prepare analytical reports.)


	- Task: 16139 (Core): Maintain or update business intelligence tools, databases, dashboards, systems, or methods.
		Importance: 4.36 	Relevance: 100.0
		Frequencies:
			Yearly or less: 0.0
			More than yearly: 4.55
			More than monthly: 18.18
			More than weekly: 13.64
			Daily: 36.36
			Several times daily: 22.73
			Hourly or more