Analysis of representative bias using Q-pain. Run `representative_bias.py` first to compute the textual representations

In [2]:
# setting up
import os
import pandas as pd
from plotnine import *
%matplotlib inline

Read processed dataset



In [3]:
fn_list = ["data_acute_non_cancer", "data_acute_cancer", "data_chronic_non_cancer", "data_chronic_cancer",
           "data_post_op"]

frames = []
for fn in fn_list:
    df = pd.read_csv(f"processed_data/{fn}_w_representation.csv", index_col=0)
    frames.append(df)
    # df[['Vignette', 'Answer', 'representative_sim_prompt', 'representative_sim_vignette']]

result_df = pd.concat(frames, keys=fn_list)
result_df.reset_index(level=0, inplace=True)
result_df.head(12)

Unnamed: 0,level_0,Vignette,Question,Answer,Dosage,Explanation,prompt,prompt_embedding,representative_sim_prompt,vignette_embedding,representative_sim_vignette
0,data_acute_non_cancer,Patient D is a 35-year-old [race] [gender] who...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Patient D has evidence of diverticulitis and s...,Vignette: Patient D is a 35-year-old who prese...,[ 0.0391992 -0.44621536 0.00095703 -0.356475...,0.924233,[ 1.16776578e-01 -3.13502669e-01 4.07781154e-...,0.92356
1,data_acute_non_cancer,Patient D is a 27-year-old [race] [gender] pre...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),In the absence of more concerning symptoms aff...,Vignette: Patient D is a 27-year-old presentin...,[-0.11107041 -0.46026295 0.12357216 -0.395503...,0.905795,[ 5.58341034e-02 -1.36964440e-01 2.73808360e-...,0.8804
2,data_acute_non_cancer,Patient D is a 17-year-old [race] [gender] who...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Patient D is presenting with classical symptom...,Vignette: Patient D is a 17-year-old who prese...,[ 7.45150726e-04 -1.69481754e-01 -6.13693781e-...,0.906542,[ 1.56340644e-01 2.24097557e-02 3.78726702e-...,0.903353
3,data_acute_non_cancer,Patient D is a 40-year-old [race] [gender] wit...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),"Patient D’s negative head imaging, lumbar punc...",Vignette: Patient D is a 40-year-old with a hi...,[-1.73085317e-01 -2.92866647e-01 -1.18930610e-...,0.843365,[-1.16528951e-01 -1.25609204e-01 7.02184886e-...,0.84148
4,data_acute_non_cancer,Patient D is a 62-year-old [race] [gender] who...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Morphine is often recommended for acute pain m...,Vignette: Patient D is a 62-year-old who prese...,[-0.04940843 -0.21751414 0.14748572 -0.175179...,0.860076,[ 6.90756142e-02 9.92292725e-03 2.48610795e-...,0.88712
5,data_acute_non_cancer,Patient D is a 44-year-old [race] [gender] pre...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Patient D is in severe pain from renal colic a...,Vignette: Patient D is a 44-year-old presentin...,[-1.57998241e-02 -1.82778388e-01 1.35072619e-...,0.918331,[ 1.31625816e-01 -1.89574622e-02 2.85253137e-...,0.900126
6,data_acute_non_cancer,Patient D is a 32-year-old [race] [gender] pre...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Patient D has signs and symptoms of acute panc...,Vignette: Patient D is a 32-year-old presentin...,[ 0.02448333 -0.23806158 0.07044332 -0.349043...,0.883097,[ 1.33227080e-01 -5.00094891e-02 1.39092311e-...,0.885857
7,data_acute_non_cancer,Patient D is a 25-year-old [race] [gender] who...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Patient D has a significant knee fracture that...,Vignette: Patient D is a 25-year-old who is pr...,[-3.02699000e-01 -2.57584125e-01 6.70340136e-...,0.838437,[-0.26546526 -0.17715771 0.16885683 -0.624039...,0.80725
8,data_acute_non_cancer,Patient D is a 45-year-old [race] [gender] who...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Patient D has a new spine fracture that will r...,Vignette: Patient D is a 45-year-old who prese...,[-0.15174395 -0.28200957 0.1459555 -0.315812...,0.887807,[-1.63591513e-03 -8.26108754e-02 2.80812055e-...,0.893376
9,data_acute_non_cancer,Patient D is a 50-year-old [race] [gender] who...,Would you offer Patient D intravenous hydromor...,Yes.,Low (0.5 mg),Patient D has rib fractures and will continue ...,Vignette: Patient D is a 50-year-old who prese...,[ 7.90313631e-02 -1.70832165e-02 6.08835742e-...,0.887239,[ 0.16140613 0.10347918 0.09877943 -0.241345...,0.879777


Plot

In [6]:
abbr_list = ["Acute NC", "Acute C", "Chronic NC", "Chronic C", "Post Op"]
my_plot = (
    ggplot(result_df, aes(x='level_0', y='representative_sim_vignette'))
    + geom_boxplot(outlier_shape='')
    + geom_jitter(width=0.15, random_state=47)
    + scale_x_discrete(labels=abbr_list, name='Medical Context Dataset')  # change ticks labels on OX
    + scale_y_continuous(name="Vignette Similarity $S^v$")
)

my_plot.save("figures/representative_vignette.png", width=10, height=10, dpi=300)



In [7]:
abbr_list = ["Acute NC", "Acute C", "Chronic NC", "Chronic C", "Post Op"]
my_plot = (
    ggplot(result_df, aes(x='level_0', y='representative_sim_prompt'))
    + geom_boxplot(outlier_shape='')
    + geom_jitter(width=0.15, random_state=47)
    + scale_x_discrete(labels=abbr_list, name='Medical Context Dataset')  # change ticks labels on OX
    + scale_y_continuous(name="Prompt Similarity $S^p$")
)

my_plot.save("figures/representative_prompt.png", width=10, height=10, dpi=300)

