In [7]:
# ------------------------------------------------------------
# Initiate system
# ------------------------------------------------------------
import pandas as pd 

# Load data as pickle
df_Series = pd.read_pickle("./Data_24-12-05/series_data.pkl")
df_Study = pd.read_pickle("./Data_24-12-05/study_data.pkl")

# Merge data based on studyInstanceUID
data = pd.merge(df_Series.reset_index(), df_Study.reset_index(), on='studyInstanceUID', how="left")

# Filter merged data, remove all relatively un-wanted acq_prots
seperate_acq_prots = ['Position Skellefteå','-----','---Frigör systemet---','.OL Benlängd 20-40kg belastad','.OL Benlängd belastad','.OL Benvinkel DX 20-40kg belastad', '.OL Benvinkel DX 20-50kg belastad',
                                                '.OL Benvinkel DX belastad','.OL Benvinkel DX stående','.OL Benvinkel SIN 20-50kg belastad','.OL Benvinkel SIN belastad',
                                                '.OL Benvinkel SIN stående','.OL Helrygg frontal AP 0-20kg','.OL Helrygg frontal AP 20-50kg','.OL Helrygg frontal AP vuxen','.OL Helrygg frontal PA 0-20kg',
                                                '.OL Helrygg frontal PA 20-50kg','.OL Helrygg frontal PA vuxen','.OL Helrygg sida 0-20kg','.OL Helrygg sida 20-50kg','.OL Helrygg sida stag 20-50kg',
                                                '.OL Helrygg sida vuxen','.OL Underben DX frontal belastad','.OL Underben SIN frontal belastad','.OL Underben SIN sida belastad','.OT Helrygg frontal',
                                                '.OT Helrygg frontal 20-50kg','.OT Helrygg frontal vuxen','.OT Lårben DX frontal','.OT Lårben SIN frontal','.OT Tunntarmspassage','.OT Underben DX frontal',
                                                '.OT Underben DX sida','.OT Underben SIN frontal','.OT Underben SIN sida','CP_Shunt','CP_Vuxen esofagus','CP_antiiso','OL Benvinkel DX stående',
                                                'OL Benvinkel SIN stående','OL Helrygg frontal PA vuxen','OL Helrygg sida vuxen','OT Buköversikt','OT Helrygg frontal vuxen','OT Helrygg sida vuxen',
                                                'OT Lårben DX frontal','OT Lårben SIN frontal','OT Tunntarm','OT Tunntarmpassage','OT Underben DX frontal','OT Underben DX sida','OT Underben SIN frontal',
                                                'OT Underben SIN sida','OW Benlängd belastad','OW Benvinkel DX','OW Benvinkel DX 20-40 kg','OW Benvinkel SIN',"OW Helrygg frontal 20 -30 kg", 
                                                "OW Helrygg frontal 30-40 kg", "OW Helrygg frontal 40-50 kg", "OW Helrygg frontal 50-60 kg", "OW Helrygg frontal 60-70 kg", "OW Helrygg frontal vuxen", 
                                                "OW Helrygg sida 20-30 kg", "OW Helrygg sida 30-40 kg", "OW Helrygg sida 40-50 kg", "OW Helrygg sida 50-60 kg", "OW Helrygg sida 60-70 kg", 
                                                "OW Helrygg sida vuxen", "OW Skalle-halsrygg-bröstrygg frontal", "OW Skalle-halsrygg-bröstrygg sida",'Sondläge','____________________']
filtered_data = data[~data["acquisitionProtocol"].isin(seperate_acq_prots)]

In [None]:
# ------------------------------------------------------------
# Match data
# ------------------------------------------------------------

# Match and identify duplicate acq_prot per studyInstanceUID, 
duplicate_acqprot = filtered_data.pivot_table(
    index='studyInstanceUID',
    values='acquisitionProtocol',
    aggfunc=[
        lambda x: x.duplicated().sum(),
        'count'
    ]
)

# Rename columns
duplicate_acqprot.columns = ['duplicates', 'total_count']

# Calc ratio of duplicate examinations per studyInstanceUID, 
duplicate_acqprot["Ratio"] = round(100*(duplicate_acqprot['duplicates'] / duplicate_acqprot['total_count']))

# Add duplicate acq_prot per studyInstanceUID to main DataFrame
joined_data = pd.merge(filtered_data, duplicate_acqprot, on='studyInstanceUID', how="left")

# Remove studyInstanceUID duplicates to avoid double counting total_count when calc certain ratio
limited_data = joined_data.drop_duplicates("studyInstanceUID", ignore_index=True)

In [9]:
# ------------------------------------------------------------
# Estimate ratios of interest from dataset
# ------------------------------------------------------------

# Ratio, examinations that include one or more re-takes,
P_exam = round(100*(duplicate_acqprot['duplicates'] >0 ).sum()/len(duplicate_acqprot))
print(f"About {P_exam} % of all examinations have one or more retake images")

# Ratio, tot exposures due to re-takes,
P_expo = round(100*(duplicate_acqprot['duplicates'].sum()/duplicate_acqprot['total_count'].sum()))
print(f"About {P_expo} % of all exposures are retakes")

# Ratio, Retakes per studyDescription
duplicate_studies = limited_data.groupby('studyDescription').agg(
    duplicate_count = ("duplicates", "sum",),
    study_expo_count = ("total_count", "sum"))
duplicate_studies["ratio"] = round(100*(duplicate_studies.duplicate_count / duplicate_studies.study_expo_count))


About 33 % of all examinations have one or more retake images
About 16 % of all exposures are retakes
