In [25]:
import pandas as pd
pd.set_option('display.max_columns', None)

def expand_and_concat_dicts(df, col):
    """
    Expand a column of dictionaries into separate columns and concatenate with the original dataframe.
    """

    # Expand the column of dictionaries into separate columns
    try:
        expanded_df = pd.DataFrame(df[col].apply(eval).tolist())
    except Exception as e:
        expanded_df = pd.DataFrame(df[col].tolist())
        
    # Rename the columns to avoid duplicates
    expanded_df.columns = [f"{col}_{colname}" for colname in expanded_df.columns]
    
    # Concatenate the expanded dataframe with the original dataframe
    concatenated_df = pd.concat([df, expanded_df], axis=1)
    concatenated_df = concatenated_df.drop(columns=[col])
        
    return concatenated_df

In [47]:
df_prometheus = pd.read_csv('output/samples/en/phase_3/prometheus_evaluation.tsv', sep='\t', dtype={"filenameid": str})
df_pairs = df_prometheus[["filenameid", "clinical_case", "discharge_summary"]]
df_human = pd.read_csv('output/samples/en/phase_3/human_eval.tsv', sep='\t', dtype={"doc_id": str})
df_human = df_human.rename(columns={"doc_id": "filenameid"})
df_human["Key Information CC"] = df_human["Key Information CC"].apply(eval)
df_human["Key Information DS"] = df_human["Key Information DS"].apply(eval)

cols_eval = ["Medical Entities Completeness", "Structure - Headers",	"Structure - Content", "Content Accuracy",	"Made-up Content", "Overall Quality"]
for col in cols_eval:
    df_human[col] = df_human[col].astype(int)
    
df_leti = df_human[df_human["annotator"] == "Leti"]
df_laura = df_human[df_human["annotator"] == "Laura"]


df_human.head()

Unnamed: 0,filenameid,annotator,Key Information CC,Key Information DS,Medical Entities Completeness,Structure - Headers,Structure - Content,Content Accuracy,Made-up Content,Coherence,Overall Quality,Feedback
0,27198691,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,5,5,5,5,False,5,The only thing I wanted to comment on is the d...
1,29399767,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,5,5,4,4,False,4,The final section Discharge instructions has b...
2,29877258,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",4,5,5,3,1,False,3,There are missing data of some clinical proced...
3,29899452_1,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",4,3,3,3,2,False,2,DS has confused the diagnoses with the medical...
4,30170386,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, sex, a...",5,4,5,4,4,False,2,Diagnosis:** Cutaneous Sarcoidosis (CS). This...


In [59]:
df_human_tot = df_laura.merge(df_leti, on="filenameid", suffixes=("_laura", "_leti"))

for col in cols_eval:
    df_human_tot["diff_" + col] = (df_human_tot[f"{col}_laura"] - df_human_tot[f"{col}_leti"] ).abs()
df_human_tot["diff_Key Information DS"] = df_human_tot.apply(lambda row: set(row["Key Information DS_laura"]) - set(row["Key Information DS_leti"]), axis=1)
df_human_tot["diff_len_Key Information DS"] = df_human_tot["diff_Key Information DS"].apply(len)

df_human_tot["max_diff"] = df_human_tot[[f"diff_{col}" for col in cols_eval + ["len_Key Information DS"]]].max(axis=1)
df_human_tot["sum_diff"] = df_human_tot[[f"diff_{col}" for col in cols_eval + ["len_Key Information DS"]]].sum(axis=1)
df_human_tot = df_pairs.merge(df_human_tot, on="filenameid", how="left")
df_human_tot = df_human_tot.sort_values(by=["sum_diff", "max_diff"], ascending=[False, False])
df_human_tot

Unnamed: 0,filenameid,clinical_case,discharge_summary,annotator_laura,Key Information CC_laura,Key Information DS_laura,Medical Entities Completeness_laura,Structure - Headers_laura,Structure - Content_laura,Content Accuracy_laura,Made-up Content_laura,Coherence_laura,Overall Quality_laura,Feedback_laura,annotator_leti,Key Information CC_leti,Key Information DS_leti,Medical Entities Completeness_leti,Structure - Headers_leti,Structure - Content_leti,Content Accuracy_leti,Made-up Content_leti,Coherence_leti,Overall Quality_leti,Feedback_leti,diff_Medical Entities Completeness,diff_Structure - Headers,diff_Structure - Content,diff_Content Accuracy,diff_Made-up Content,diff_Overall Quality,diff_Key Information DS,diff_len_Key Information DS,max_diff,sum_diff
4,31056078,"Initial presentation\nA 55-year-old, Caucasian...",**Discharge Summary**\n\n**Patient Information...,Laura,"[clinical procedures, sex, age, symptoms, sign...","[sex, age, past medical conditions, medication...",2,2,1,3,4,False,2,In the DS is missing some information about fa...,Leti,"[diseases and co-morbidities, medications, cli...","[symptoms, signs and clinical findings, diseas...",4,4,3,4,5,False,3,Demasiado resumido en general.,2,2,2,1,1,1,{},0,2,9
6,29899452_1,The patient was a 29-year-old man with PIK3CA ...,**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",4,3,3,3,2,False,2,DS has confused the diagnoses with the medical...,Leti,"[diseases and co-morbidities, medications, sex...","[diseases and co-morbidities, medications, cli...",4,5,3,3,5,False,4,No menciona la dilatación de VI ni la elevació...,0,2,0,0,3,2,"{symptoms, signs and clinical findings}",1,3,8
15,31379463,Patient medical history\n\nThe described patie...,**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, medica...","[symptoms, signs and clinical findings, medica...",3,5,4,4,4,False,2,DS: The patient was supplemented with 40 mmol ...,Leti,"[diseases and co-morbidities, medications, sex...","[symptoms, signs and clinical findings, diseas...",2,2,2,5,5,False,2,"Demasiado resumido, no bien estructurado",1,3,2,1,1,0,{},0,3,8
16,30170386,A 77-year-old man diagnosed with cutaneous sar...,**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, sex, a...",5,4,5,4,4,False,2,Diagnosis:** Cutaneous Sarcoidosis (CS). This...,Leti,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,5,5,5,5,False,5,Sin pegas,0,1,0,1,1,3,{},0,3,6
3,29399767,A 54-year-old male had had a HeartMate 3™ impl...,**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,5,5,4,4,False,4,The final section Discharge instructions has b...,Leti,"[symptoms, signs and clinical findings, medica...","[symptoms, signs and clinical findings, medica...",4,3,5,5,5,False,4,Diagnosis:** Permanent atrial fibrillation wit...,1,2,0,1,1,0,{diseases and co-morbidities},1,2,6
25,29877258,"A 35-year-old woman (gravida 1, para 1; uneven...",**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",4,5,5,3,1,False,3,There are missing data of some clinical proced...,Leti,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,5,3,4,3,True,3,La paciente acude a revisión del HeartMate no ...,1,0,2,1,2,0,{},0,2,6
1,32864940,"GSA, male, 55 years old, white, forklift opera...",**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",3,5,3,3,4,False,2,DS miss the duration of symptoms. In section m...,Leti,"[diseases and co-morbidities, medications, sex...","[symptoms, signs and clinical findings, diseas...",2,4,2,4,4,False,3,a lot of information is missed,1,1,1,1,0,1,{},0,1,5
22,36055898,We present the case of a 22-year-old female pa...,**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,5,5,4,4,False,4,DS missed information about one procedure ( EC...,Leti,"[diseases and co-morbidities, sex, age, past m...","[symptoms, signs and clinical findings, diseas...",4,5,4,5,5,False,5,well done,1,0,1,1,1,1,{},0,1,5
18,27198691,A 75-year-old man with permanent atrial fibril...,**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,5,5,5,5,False,5,The only thing I wanted to comment on is the d...,Leti,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",5,3,5,5,4,False,4,Podría mejorar los titulos de cada sección. El...,0,2,0,0,1,1,{},0,2,4
9,33782774,"A 20-year-old man (height: 138 cm, weight: 39 ...",**Discharge Summary**\n\n**Patient Information...,Laura,"[symptoms, signs and clinical findings, diseas...","[symptoms, signs and clinical findings, diseas...",2,4,4,4,4,False,3,"The DS is very summarized, with important info...",Leti,"[diseases and co-morbidities, sex, age, past m...","[symptoms, signs and clinical findings, diseas...",3,4,2,4,4,False,3,"very summarized, important information is mnis...",1,0,2,0,0,0,{},0,2,3


In [73]:
def print_example(i):
    example1 = df_human_tot.iloc[i]

    print(f"Example filenameid: {example1['filenameid']}")
    print("\n"*2)
    print(f"Clinical Case:\n{example1['clinical_case']}")
    print("\n"*2)
    print(f"Discharge Summary:\n {example1['discharge_summary']}")
    print("\n"*2)

    # print("Scores from Laura:")
    for col in cols_eval:
        print(f"{col}:\t Laura -- {example1[f'{col}_laura']}\t Leti -- {example1[f'{col}_leti']}")
    
    # print(f"diff_Key Information CC:\t Laura -- {example1[f'diff_len_Key Information CC']}\nLeti -- {example1[f'diff_len_Key Information CC']}")

    
    print("\n"*2)
    print("Feedback Laura:", example1["Feedback_laura"])
    print("Feedback Leti:", example1["Feedback_leti"])
    
print_example(-3)  # Print the last example

Example filenameid: 35414581



Clinical Case:
Patient information
A woman in her 70s diagnosed with HF in 1999, having comorbidities such as chronic obstructive pulmonary disease, asthma, hypertension, diabetes and kidney failure (stage 4). The patient lived with a full-time carer, in one of the most deprived (decile 1 for both the Index of Multiple Deprivation and the Health Deprivation and Disability Domain) areas of England. As per usual practice, the patient was mostly looked after by her general practitioner (GP).

The patient and her son provided informed consent to use her anonymised data for research purposes. The patient’s son consented to the access and use of the patient’s medical records after she had passed away. The data were collected as part of post-market surveillance activities in the UK.

Treatment
Device information
The Heartfelt device (Heartfelt Technologies) and connected weighing scales (Xiaomi Smart Scales, Anhui Huami Information Technologies) were installed 

In [56]:
df_diff = df_human_tot[["filenameid"] + [f"diff_{col}" for col in cols_eval + ["len_Key Information DS"]] + ["sum_diff", "max_diff"]]
print(df_diff.iloc[:3].T.reset_index().to_latex(index=False, escape=False, column_format="l" + "r" * (len(cols_eval) + 2), caption="Differences in evaluation scores between annotators Laura and Leti. The columns 'diff_*' represent the absolute difference in scores for each evaluation criterion, while 'sum_diff' and 'max_diff' summarize the differences across all criteria.", label="tab:human_eval_differences"))

\begin{table}
\caption{Differences in evaluation scores between annotators Laura and Leti. The columns 'diff_*' represent the absolute difference in scores for each evaluation criterion, while 'sum_diff' and 'max_diff' summarize the differences across all criteria.}
\label{tab:human_eval_differences}
\begin{tabular}{lrrrrrrrr}
\toprule
index & 4 & 6 & 15 \\
\midrule
filenameid & 31056078 & 29899452_1 & 31379463 \\
diff_Medical Entities Completeness & 2 & 0 & 1 \\
diff_Structure - Headers & 2 & 2 & 3 \\
diff_Structure - Content & 2 & 0 & 2 \\
diff_Content Accuracy & 1 & 0 & 1 \\
diff_Made-up Content & 1 & 3 & 1 \\
diff_Overall Quality & 1 & 2 & 0 \\
diff_len_Key Information DS & 0 & 1 & 0 \\
sum_diff & 9 & 8 & 8 \\
max_diff & 2 & 3 & 3 \\
\bottomrule
\end{tabular}
\end{table}



In [68]:
df_diff = df_human_tot[["filenameid"] + [f"diff_{col}" for col in cols_eval + ["len_Key Information DS"]] + ["sum_diff", "max_diff"]]
print(df_diff.iloc[-3:-1].T.reset_index().to_latex(index=False, escape=False, column_format="l" + "r" * 2))

\begin{tabular}{lrr}
\toprule
index & 2 & 24 \\
\midrule
filenameid & 35414581 & 34946264 \\
diff_Medical Entities Completeness & 0 & 0 \\
diff_Structure - Headers & 0 & 0 \\
diff_Structure - Content & 0 & 0 \\
diff_Content Accuracy & 0 & 0 \\
diff_Made-up Content & 0 & 0 \\
diff_Overall Quality & 0 & 0 \\
diff_len_Key Information DS & 0 & 0 \\
sum_diff & 0 & 0 \\
max_diff & 0 & 0 \\
\bottomrule
\end{tabular}

