## Imports, Helpers, Parameters

In [75]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
from textwrap import wrap
import numpy as np
import os

palette = sns.color_palette("colorblind")
# Put once at the TOP of your notebook/script (or just tweak this line)
sns.set_context("notebook", font_scale=1.0)  # was 1.2; smaller = less crowded


## Set Up Data Frames

### Create New Pct Values In Main Data

In [76]:

main_df = pd.read_csv("../data/tasks_final.csv")

claude_userbase_bias = True

if not claude_userbase_bias:
    mask = main_df["major_occ_category"] == "Computer and Mathematical Occupations"
    main_df.loc[mask, "pct_normalized"] = 0
    main_df["pct_normalized"] = (main_df["pct_normalized"] / main_df["pct_normalized"].sum()) * 100

# Create frequency adjusted pct
main_df[f"pct_freq_div_{2015}"] = ((main_df["pct_normalized"]/main_df[f"freq_mean_{2015}"]) / (main_df["pct_normalized"]/main_df[f"freq_mean_{2015}"]).sum()) * 100
main_df[f"pct_freq_div_{2025}"] = ((main_df["pct_normalized"]/main_df[f"freq_mean_{2025}"]) / (main_df["pct_normalized"]/main_df[f"freq_mean_{2025}"]).sum()) * 100

# Create freq nat emp adjusted pct
main_df[f"pct_freq_nat_emp_div_{2015}"] = (((main_df["pct_normalized"]/main_df[f"freq_mean_{2015}"])/main_df[f"emp_tot_nat_{2015}"]) / ((main_df["pct_normalized"]/main_df[f"freq_mean_{2015}"])/main_df[f"emp_tot_nat_{2015}"]).sum()) * 100
main_df[f"pct_freq_nat_emp_div_{2024}"] = (((main_df["pct_normalized"]/main_df[f"freq_mean_{2025}"])/main_df[f"emp_tot_nat_{2024}"]) / ((main_df["pct_normalized"]/main_df[f"freq_mean_{2025}"])/main_df[f"emp_tot_nat_{2024}"]).sum()) * 100

# Create freq ut emp adjusted pct
main_df[f"pct_freq_ut_emp_div_{2015}"] = (((main_df["pct_normalized"]/main_df[f"freq_mean_{2015}"])/main_df[f"emp_tot_ut_{2015}"]) / ((main_df["pct_normalized"]/main_df[f"freq_mean_{2015}"])/main_df[f"emp_tot_ut_{2015}"]).sum()) * 100
main_df[f"pct_freq_ut_emp_div_{2024}"] = (((main_df["pct_normalized"]/main_df[f"freq_mean_{2025}"])/main_df[f"emp_tot_ut_{2024}"]) / ((main_df["pct_normalized"]/main_df[f"freq_mean_{2025}"])/main_df[f"emp_tot_ut_{2024}"]).sum()) * 100



### Create Comp Pcts In Main Data

In [77]:
#2015
# Create comp column to pct_normalized by multiplying by freq and emp
main_df["pct_claude_freq_nat_emp_2015"] = ((main_df["freq_mean_2015"] * main_df["emp_tot_nat_2015"]) / 
                                      (main_df["freq_mean_2015"] * main_df["emp_tot_nat_2015"]).sum()) * 100
main_df["pct_claude_freq_ut_emp_2015"] = ((main_df["freq_mean_2015"] * main_df["emp_tot_ut_2015"]) / 
                                     (main_df["freq_mean_2015"] * main_df["emp_tot_ut_2015"]).sum()) * 100

# Create comp column to pct_freq_div by only using emp
main_df["pct_claude_nat_emp_2015"] = ((main_df["emp_tot_nat_2015"]) / 
                                 (main_df["emp_tot_nat_2015"]).sum()) * 100
main_df["pct_claude_ut_emp_2015"] = ((main_df["emp_tot_ut_2015"]) / 
                                (main_df["emp_tot_ut_2015"]).sum()) * 100


#2024
# Create comp column to pct_normalized by multiplying by freq and emp
main_df["pct_claude_freq_nat_emp_2024"] = ((main_df["freq_mean_2025"] * main_df["emp_tot_nat_2024"]) / 
                                      (main_df["freq_mean_2025"] * main_df["emp_tot_nat_2024"]).sum()) * 100
main_df["pct_claude_freq_ut_emp_2024"] = ((main_df["freq_mean_2025"] * main_df["emp_tot_ut_2024"]) / 
                                     (main_df["freq_mean_2025"] * main_df["emp_tot_ut_2024"]).sum()) * 100

# Create comp column to pct_freq_div by only using emp
main_df["pct_claude_nat_emp_2024"] = ((main_df["emp_tot_nat_2024"]) / 
                                 (main_df["emp_tot_nat_2024"]).sum()) * 100
main_df["pct_claude_ut_emp_2024"] = ((main_df["emp_tot_ut_2024"]) / 
                                (main_df["emp_tot_ut_2024"]).sum()) * 100


# Create comp column to pct_freq_nat_emp_div by have a base pct for each task
main_df["pct_claude_base_dist"] = (((1 / main_df["task_normalized"].nunique()) / (main_df["n_occurrences"])) /
                       ((1 / main_df["task_normalized"].nunique()) / (main_df["n_occurrences"])).sum()) * 100

main_df

Unnamed: 0,task,task_normalized,soc_code_2010,title,title_normalized,task_type,n_responding,domain_source,n_occurrences,pct_weighted,...,pct_freq_ut_emp_div_2024,pct_claude_freq_nat_emp_2015,pct_claude_freq_ut_emp_2015,pct_claude_nat_emp_2015,pct_claude_ut_emp_2015,pct_claude_freq_nat_emp_2024,pct_claude_freq_ut_emp_2024,pct_claude_nat_emp_2024,pct_claude_ut_emp_2024,pct_claude_base_dist
0,"Interpret and explain policies, rules, regulat...",interpret and explain policies rules regulatio...,11-1011.00,Chief Executives,chief executives,Core,87.0,Incumbent,1.0,0.040205,...,0.001513,0.009769,0.015963,0.033258,0.053309,0.011055,0.018671,0.026072,0.042604,0.028498
1,"Confer with board members, organization offici...",confer with board members organization officia...,11-1011.00,Chief Executives,chief executives,Core,87.0,Incumbent,1.0,0.017762,...,0.000850,0.020607,0.033673,0.033258,0.053309,0.008694,0.014684,0.026072,0.042604,0.028498
2,Review reports submitted by staff members to r...,review reports submitted by staff members to r...,11-1011.00,Chief Executives,chief executives,Core,87.0,Incumbent,1.0,0.001915,...,0.000062,0.009080,0.014838,0.033258,0.053309,0.012840,0.021687,0.026072,0.042604,0.028498
3,"Serve as liaisons between organizations, share...",serve as liaisons between organizations shareh...,11-1011.00,Chief Executives,chief executives,Supplemental,87.0,Incumbent,1.0,0.003084,...,0.000118,0.002934,0.004795,0.033258,0.053309,0.010887,0.018387,0.026072,0.042604,0.028498
4,"Direct, plan, or implement policies, objective...",direct plan or implement policies objectives o...,11-1011.00,Chief Executives,chief executives,Core,87.0,Incumbent,1.0,0.004254,...,0.000122,0.023051,0.037667,0.033258,0.053309,0.014453,0.024411,0.026072,0.042604,0.028498
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4245,Wet concrete surface and rub with stone to smo...,wet concrete surface and rub with stone to smo...,47-2053.00,Terrazzo Workers and Finishers,terrazzo workers and finishers,Supplemental,61.0,Incumbent,2.0,0.002553,...,0.018845,0.000123,0.000121,0.000460,0.000446,0.000048,0.000048,0.000181,0.000174,0.014249
4246,Program electronic equipment.,program electronic equipment,51-6062.00,"Textile Cutting Machine Setters, Operators, an...",textile cutting machine setters operators and ...,Supplemental,109.0,Incumbent,2.0,0.063285,...,0.010920,0.003301,0.003260,0.002078,0.002013,0.002049,0.002035,0.001121,0.001078,0.014249
4247,Program electronic equipment.,program electronic equipment,51-6063.00,"Textile Knitting and Weaving Machine Setters, ...",textile knitting and weaving machine setters o...,Supplemental,85.0,Incumbent,2.0,0.063285,...,0.008398,0.003278,0.001053,0.003194,0.001007,0.002673,0.002647,0.001819,0.001742,0.014249
4248,Plan and formulate flight activities and test ...,plan and formulate flight activities and test ...,53-2011.00,"Airline Pilots, Copilots, and Flight Engineers",airline pilots copilots and flight engineers,Supplemental,52.0,Incumbent,2.0,0.006382,...,0.000326,0.009497,0.014993,0.011516,0.017832,0.004262,0.006868,0.012429,0.019381,0.014249


### Create Compt Pcts & Task Type In Eco Data Frame

In [78]:
# eco 2015 df
eco_df_2015 = pd.read_csv("../data/ratings_eco_2015.csv")

# Create comp column to pct_normalized by multiplying by freq and emp
eco_df_2015["pct_eco_freq_nat_emp_2015"] = ((eco_df_2015["freq_mean"] * eco_df_2015["tot_emp_nat"]) / 
                                            (eco_df_2015["freq_mean"] * eco_df_2015["tot_emp_nat"]).sum()) * 100
eco_df_2015["pct_eco_freq_ut_emp_2015"] = ((eco_df_2015["freq_mean"] * eco_df_2015["tot_emp_ut"]) / 
                                           (eco_df_2015["freq_mean"] * eco_df_2015["tot_emp_ut"]).sum()) * 100

# Create comp column to pct_freq_div by only using emp
eco_df_2015["pct_eco_nat_emp_2015"] = ((eco_df_2015["tot_emp_nat"]) / 
                                            (eco_df_2015["tot_emp_nat"]).sum()) * 100
eco_df_2015["pct_eco_ut_emp_2015"] = ((eco_df_2015["tot_emp_ut"]) / 
                                           (eco_df_2015["tot_emp_ut"]).sum()) * 100

# Create comp column to pct_freq_nat_emp_div by have a base pct for each task
eco_df_2015["task_counts"] = eco_df_2015.groupby("task_normalized")["task_normalized"].transform("count")
eco_df_2015["pct_base_dist"] = (((1 / eco_df_2015["task_normalized"].nunique()) / (eco_df_2015["task_counts"])) /
                                ((1 / eco_df_2015["task_normalized"].nunique()) / (eco_df_2015["task_counts"])).sum()) * 100
eco_df_2015["pct_eco_base_dist"] = 1 / eco_df_2015["task_normalized"].nunique()


# eco 2015 df
eco_df_2025 = pd.read_csv("../data/ratings_eco_2025.csv")

# Create comp column to pct_normalized by multiplying by freq and emp
eco_df_2025["pct_eco_freq_nat_emp_2024"] = ((eco_df_2025["freq_mean"] * eco_df_2025["tot_emp_nat"]) / 
                                            (eco_df_2025["freq_mean"] * eco_df_2025["tot_emp_nat"]).sum()) * 100
eco_df_2025["pct_eco_freq_ut_emp_2024"] = ((eco_df_2025["freq_mean"] * eco_df_2025["tot_emp_ut"]) / 
                                           (eco_df_2025["freq_mean"] * eco_df_2025["tot_emp_ut"]).sum()) * 100

# Create comp column to pct_freq_div by only using emp
eco_df_2025["pct_eco_nat_emp_2024"] = ((eco_df_2025["tot_emp_nat"]) / 
                                            (eco_df_2025["tot_emp_nat"]).sum()) * 100
eco_df_2025["pct_eco_ut_emp_2024"] = ((eco_df_2025["tot_emp_ut"]) / 
                                           (eco_df_2025["tot_emp_ut"]).sum()) * 100

# Create comp column to pct_freq_nat_emp_div by have a base pct for each task
eco_df_2025["task_counts"] = eco_df_2025.groupby("task_normalized")["task_normalized"].transform("count")
eco_df_2025["pct_eco_base_dist"] = (((1 / eco_df_2025["task_normalized"].nunique()) / (eco_df_2025["task_counts"])) /
                                ((1 / eco_df_2025["task_normalized"].nunique()) / (eco_df_2025["task_counts"])).sum()) * 10



def classify_task_type(df):
    """
    Adds a 'task_type' column based on relevance and importance.
    Core: relevance >= 67 AND importance >= 3.0
    Supplemental: everything else
    """
    conditions = [
        (df["relevance"] >= 67) & (df["importance"] >= 3.0),   # Core
    ]
    choices = ["Core"]

    df["task_type"] = np.select(conditions, choices, default="Supplemental")
    return df

# Apply to both
eco_df_2015 = classify_task_type(eco_df_2015)
eco_df_2025 = classify_task_type(eco_df_2025)

# We need to merge the baseline data from the main_df into the eco_df dataframes so our logic in our main 
# loop can use the same df.
eco_df_2015 = eco_df_2015.merge(main_df[["task_normalized", "pct_claude_base_dist"]].drop_duplicates(), 
                                on="task_normalized", how="left")

eco_df_2025 = eco_df_2025.merge(main_df[["task_normalized", "pct_claude_base_dist"]].drop_duplicates(), 
                                on="task_normalized", how="left")


## Top Tasks With Comp

### Raw Pcts

In [79]:
outdir = "../charts/top_tasks_with_comp_raw"
os.makedirs(outdir, exist_ok=True)

pcts = ["pct_normalized","pct_freq_div_2015", "pct_freq_div_2025","pct_freq_nat_emp_div_2015", "pct_freq_nat_emp_div_2024",
        "pct_freq_ut_emp_div_2015", "pct_freq_ut_emp_div_2024"]
task_types = ["All", "Core", "Supplemental"]

econ_map = {
    "pct_normalized": {
        "eco_comp_2015": [
            ("eco_df_2015", "pct_eco_freq_nat_emp_2015", "Nat 2015"),
            ("eco_df_2015", "pct_eco_freq_ut_emp_2015", "UT 2015"),
        ],
        "eco_comp_2024": [
            ("eco_df_2025", "pct_eco_freq_nat_emp_2024", "Nat 2024"),
            ("eco_df_2025", "pct_eco_freq_ut_emp_2024", "UT 2024"),
        ],
        "aei_comp": [
            ("main_df", "pct_claude_freq_nat_emp_2024", "Nat AEI 2024"),
            ("main_df", "pct_claude_freq_ut_emp_2024", "UT AEI 2024"),
        ],
    },
    "pct_freq_div_2015": {
        "eco_comp_2015": [
            ("eco_df_2015", "pct_eco_nat_emp_2015", "Nat 2015"),
            ("eco_df_2015", "pct_eco_ut_emp_2015", "UT 2015"),
        ],
        "aei_comp": [
            ("main_df", "pct_claude_nat_emp_2015", "Nat AEI 2015"),
            ("main_df", "pct_claude_ut_emp_2015", "UT AEI 2015"),
        ],
    },
    "pct_freq_div_2025": {
        "eco_comp_2024": [
            ("eco_df_2025", "pct_eco_nat_emp_2024", "Nat 2024"),
            ("eco_df_2025", "pct_eco_ut_emp_2024", "UT 2024"),
        ],
        "aei_comp": [
            ("main_df", "pct_claude_nat_emp_2024", "Nat AEI 2024"),
            ("main_df", "pct_claude_ut_emp_2024", "UT AEI 2024"),
        ],
    },
    "pct_freq_nat_emp_div_2015": {
        "comp_2015": [
            ("eco_df_2015", "pct_eco_base_dist", "Eco Base 2015"),
            ("eco_df_2015", "pct_claude_base_dist", "Claude Base 2015")
        ],
    },
    "pct_freq_ut_emp_div_2015": {
        "comp_2015": [
            ("eco_df_2015", "pct_eco_base_dist", "Eco Base 2015"),
            ("eco_df_2015", "pct_claude_base_dist", "Claude Base 2015")
        ],
    },
    "pct_freq_nat_emp_div_2024": {
        "comp_2024": [
            ("eco_df_2025", "pct_eco_base_dist", "Eco Base 2024"),
            ("eco_df_2025", "pct_claude_base_dist", "Claude Base 2024")
        ],
    },
    "pct_freq_ut_emp_div_2024": {
        "comp_2024": [
            ("eco_df_2025", "pct_eco_base_dist", "Eco Base 2024"),
            ("eco_df_2025", "pct_claude_base_dist", "Claude Base 2024")
        ],
    },
}


top_n = 15


for pct_col in pcts:
    if pct_col not in main_df.columns:
        continue

    for ttype in task_types:
        if ttype == "All":
            sub = main_df.copy()
        else:
            sub = main_df[main_df["task_type"].str.capitalize() == ttype]

        for scenario, baselines in econ_map.get(pct_col, {}).items():

            # get the right dataframe objects for comp database (all tuples in this scenario share it)
            df_lookup = {
                "main_df": main_df,
                "eco_df_2015": eco_df_2015,
                "eco_df_2025": eco_df_2025,
            }
            df_name = baselines[0][0]
            df_to_use_full = df_lookup[df_name]

            # filter by Core/Supplemental if needed
            if ttype == "All":
                comp_sub = df_to_use_full.copy()
            else:
                comp_sub = df_to_use_full[df_to_use_full["task_type"].str.capitalize() == ttype]


            # aggregate by task
            agg = (sub.groupby("task_normalized", as_index=False)[pct_col]
                    .sum()
                    .sort_values(pct_col, ascending=False)
                    .head(top_n))

            # --- map occupations to each task ---
            task_to_titles = (
                sub.groupby("task_normalized")["title"]
                .apply(lambda x: sorted(set(x)))
                .to_dict()
            )

            # add a new column with labels including up to 5 occupations
            agg["task_with_occ"] = agg["task_normalized"].apply(
                lambda t: (
                    f"{t}  [{', '.join(task_to_titles.get(t, [])[:3])}"
                    + ("..." if len(task_to_titles.get(t, [])) > 3 else "")
                    + "]"
                    if task_to_titles.get(t) else t
                )
            )
            
            plot_df = agg.copy()
            # 3) Economy baseline (sum econ pct to task level, then merge)
            for _, col_name, label in baselines:
                eco_task = (
                    comp_sub.groupby("task_normalized", as_index=False)[col_name]
                    .sum()
                    .rename(columns={col_name: f"baseline_{label}"})
                )
                plot_df = plot_df.merge(eco_task, on="task_normalized", how="left")

            # plot
            fig, ax = plt.subplots(figsize=(12, max(6, 0.55 * len(agg))))  # dynamic height
            sns.barplot(data=agg, x=pct_col, y="task_with_occ", color="skyblue", ax=ax)

            # Wrap y labels
            current_labels = [item.get_text() for item in ax.get_yticklabels()]
            wrapped_labels = ['\n'.join(wrap(label, width=70)) for label in current_labels]
            ax.set_yticks(ax.get_yticks())
            ax.set_yticklabels(wrapped_labels, fontsize=7)
            plt.subplots_adjust(left=0.45)  # more room for long task+occ labels

            # Overlay economy baseline as a black line with a vertical end tick
            # y positions are 0..n-1 in the plotted order
            baseline_cols = [c for c in plot_df.columns if c.startswith("baseline_")]

            for i, row in plot_df.reset_index(drop=True).iterrows():
                if len(baseline_cols) == 1:
                    # single baseline → draw centered
                    val = row[baseline_cols[0]]
                    if pd.notna(val):
                        ax.hlines(y=i, xmin=0, xmax=val, colors="black", linewidth=1.5)
                        ax.vlines(x=val, ymin=i-0.2, ymax=i+0.2, colors="black", linewidth=1.5)
                elif len(baseline_cols) == 2:
                    # two baselines → offset vertically
                    offsets = [-0.15, +0.15]   # shift up/down relative to center
                    colors = ["black", "red"]  # Nat = black (top), UT = red (bottom)
                    for (col, offset, color) in zip(baseline_cols, offsets, colors):
                        val = row[col]
                        if pd.notna(val):
                            ax.hlines(y=i+offset, xmin=0, xmax=val, colors=color, linewidth=1.5)
                            ax.vlines(x=val, ymin=i+offset-0.1, ymax=i+offset+0.1, colors=color, linewidth=1.5)

            # Legend elements
            legend_elements = [
                Line2D([0], [0], color="black", lw=1.5, label="National baseline"),
                Line2D([0], [0], color="red", lw=1.5, label="Utah baseline (also AEI)"),
            ]
            ax.legend(handles=legend_elements, loc="lower right", fontsize=8)

            # x-axis formatting
            if agg[pct_col].max() <= 1.01:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x*100:.1f}%"))
                xlabel = f"{pct_col} (percent)"
            else:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x:.1f}%"))
                xlabel = pct_col

            ax.set_title(f"Top {top_n} Tasks | {ttype} | {pct_col} | {scenario} | Raw", fontsize=14)
            ax.set_xlabel(xlabel, fontsize=12)
            ax.set_ylabel("Task [Occupations]", fontsize=12)

            plt.tight_layout()

            if not claude_userbase_bias:
                fname = f"top{top_n}_tasks_unbiased_{pct_col}_{ttype.lower()}_{scenario}_raw.png"
            else:
                fname = f"top{top_n}_tasks_{pct_col}_{ttype.lower()}_{scenario}_raw.png"

            plt.savefig(os.path.join(outdir, fname))
            plt.close()

### Re-Normalized Pcts

In [80]:
outdir = "../charts/top_tasks_with_comp_norm"
os.makedirs(outdir, exist_ok=True)

pcts = ["pct_normalized","pct_freq_div_2015", "pct_freq_div_2025","pct_freq_nat_emp_div_2015", "pct_freq_nat_emp_div_2024",
        "pct_freq_ut_emp_div_2015", "pct_freq_ut_emp_div_2024"]
task_types = ["All", "Core", "Supplemental"]

econ_map = {
    "pct_normalized": {
        "eco_comp_2015": [
            ("eco_df_2015", "pct_eco_freq_nat_emp_2015", "Nat 2015"),
            ("eco_df_2015", "pct_eco_freq_ut_emp_2015", "UT 2015"),
        ],
        "eco_comp_2024": [
            ("eco_df_2025", "pct_eco_freq_nat_emp_2024", "Nat 2024"),
            ("eco_df_2025", "pct_eco_freq_ut_emp_2024", "UT 2024"),
        ],
        "aei_comp": [
            ("main_df", "pct_claude_freq_nat_emp_2024", "Nat AEI 2024"),
            ("main_df", "pct_claude_freq_ut_emp_2024", "UT AEI 2024"),
        ],
    },
    "pct_freq_div_2015": {
        "eco_comp_2015": [
            ("eco_df_2015", "pct_eco_nat_emp_2015", "Nat 2015"),
            ("eco_df_2015", "pct_eco_ut_emp_2015", "UT 2015"),
        ],
        "aei_comp": [
            ("main_df", "pct_claude_nat_emp_2015", "Nat AEI 2015"),
            ("main_df", "pct_claude_ut_emp_2015", "UT AEI 2015"),
        ],
    },
    "pct_freq_div_2025": {
        "eco_comp_2024": [
            ("eco_df_2025", "pct_eco_nat_emp_2024", "Nat 2024"),
            ("eco_df_2025", "pct_eco_ut_emp_2024", "UT 2024"),
        ],
        "aei_comp": [
            ("main_df", "pct_claude_nat_emp_2024", "Nat AEI 2024"),
            ("main_df", "pct_claude_ut_emp_2024", "UT AEI 2024"),
        ],
    },
    "pct_freq_nat_emp_div_2015": {
        "comp_2015": [
            ("eco_df_2015", "pct_eco_base_dist", "Eco Base 2015"),
            ("eco_df_2015", "pct_claude_base_dist", "Claude Base 2015")
        ],
    },
    "pct_freq_ut_emp_div_2015": {
        "comp_2015": [
            ("eco_df_2015", "pct_eco_base_dist", "Eco Base 2015"),
            ("eco_df_2015", "pct_claude_base_dist", "Claude Base 2015")
        ],
    },
    "pct_freq_nat_emp_div_2024": {
        "comp_2024": [
            ("eco_df_2025", "pct_eco_base_dist", "Eco Base 2024"),
            ("eco_df_2025", "pct_claude_base_dist", "Claude Base 2024")
        ],
    },
    "pct_freq_ut_emp_div_2024": {
        "comp_2024": [
            ("eco_df_2025", "pct_eco_base_dist", "Eco Base 2024"),
            ("eco_df_2025", "pct_claude_base_dist", "Claude Base 2024")
        ],
    },
}


top_n = 15


for pct_col in pcts:
    if pct_col not in main_df.columns:
        continue

    for ttype in task_types:
        if ttype == "All":
            sub = main_df.copy()
        else:
            sub = main_df[main_df["task_type"].str.capitalize() == ttype]

        for scenario, baselines in econ_map.get(pct_col, {}).items():

            # get the right dataframe objects for comp database (all tuples in this scenario share it)
            df_lookup = {
                "main_df": main_df,
                "eco_df_2015": eco_df_2015,
                "eco_df_2025": eco_df_2025,
            }
            df_name = baselines[0][0]
            df_to_use_full = df_lookup[df_name]

            # filter by Core/Supplemental if needed
            if ttype == "All":
                comp_sub = df_to_use_full.copy()
            else:
                comp_sub = df_to_use_full[df_to_use_full["task_type"].str.capitalize() == ttype]


            # aggregate by task
            agg = (sub.groupby("task_normalized", as_index=False)[pct_col]
                    .sum())
            # renormalize here
            agg[pct_col] = agg[pct_col] / agg[pct_col].sum()

            agg = (agg.sort_values(pct_col, ascending=False)
                    .head(top_n))

            # --- map occupations to each task ---
            task_to_titles = (
                sub.groupby("task_normalized")["title"]
                .apply(lambda x: sorted(set(x)))
                .to_dict()
            )

            # add a new column with labels including up to 5 occupations
            agg["task_with_occ"] = agg["task_normalized"].apply(
                lambda t: (
                    f"{t}  [{', '.join(task_to_titles.get(t, [])[:3])}"
                    + ("..." if len(task_to_titles.get(t, [])) > 3 else "")
                    + "]"
                    if task_to_titles.get(t) else t
                )
            )
            
            plot_df = agg.copy()
            # 3) Economy baseline (sum econ pct to task level, then merge)
            for _, col_name, label in baselines:
                eco_task = (comp_sub.groupby("task_normalized", as_index=False)[col_name]
                                .sum())
                # renormalize here
                eco_task[col_name] = eco_task[col_name] / eco_task[col_name].sum()

                eco_task = eco_task.rename(columns={col_name: f"baseline_{label}"})
                plot_df = plot_df.merge(eco_task, on="task_normalized", how="left")

            # plot
            fig, ax = plt.subplots(figsize=(12, max(6, 0.55 * len(agg))))  # dynamic height
            sns.barplot(data=agg, x=pct_col, y="task_with_occ", color="skyblue", ax=ax)

            # Wrap y labels
            current_labels = [item.get_text() for item in ax.get_yticklabels()]
            wrapped_labels = ['\n'.join(wrap(label, width=70)) for label in current_labels]
            ax.set_yticks(ax.get_yticks())
            ax.set_yticklabels(wrapped_labels, fontsize=7)
            plt.subplots_adjust(left=0.45)  # more room for long task+occ labels

            # Overlay economy baseline as a black line with a vertical end tick
            # y positions are 0..n-1 in the plotted order
            baseline_cols = [c for c in plot_df.columns if c.startswith("baseline_")]

            for i, row in plot_df.reset_index(drop=True).iterrows():
                if len(baseline_cols) == 1:
                    # single baseline → draw centered
                    val = row[baseline_cols[0]]
                    if pd.notna(val):
                        ax.hlines(y=i, xmin=0, xmax=val, colors="black", linewidth=1.5)
                        ax.vlines(x=val, ymin=i-0.2, ymax=i+0.2, colors="black", linewidth=1.5)
                elif len(baseline_cols) == 2:
                    # two baselines → offset vertically
                    offsets = [-0.15, +0.15]   # shift up/down relative to center
                    colors = ["black", "red"]  # Nat = black (top), UT = red (bottom)
                    for (col, offset, color) in zip(baseline_cols, offsets, colors):
                        val = row[col]
                        if pd.notna(val):
                            ax.hlines(y=i+offset, xmin=0, xmax=val, colors=color, linewidth=1.5)
                            ax.vlines(x=val, ymin=i+offset-0.1, ymax=i+offset+0.1, colors=color, linewidth=1.5)

            # Legend elements
            legend_elements = [
                Line2D([0], [0], color="black", lw=1.5, label="National baseline"),
                Line2D([0], [0], color="red", lw=1.5, label="Utah baseline (also AEI)"),
            ]
            ax.legend(handles=legend_elements, loc="lower right", fontsize=8)

            # x-axis formatting
            if agg[pct_col].max() <= 1.01:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x*100:.1f}%"))
                xlabel = f"{pct_col} (percent)"
            else:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x:.1f}%"))
                xlabel = pct_col

            ax.set_title(f"Top {top_n} Tasks | {ttype} | {pct_col} | {scenario} | Normalized", fontsize=14)
            ax.set_xlabel(xlabel, fontsize=12)
            ax.set_ylabel("Task [Occupations]", fontsize=12)

            plt.tight_layout()

            if not claude_userbase_bias:
                fname = f"top{top_n}_tasks_unbiased_{pct_col}_{ttype.lower()}_{scenario}_norm.png"
            else:
                fname = f"top{top_n}_tasks_{pct_col}_{ttype.lower()}_{scenario}_norm.png"

            plt.savefig(os.path.join(outdir, fname))
            plt.close()

## Top Occupations With Comp