In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from pathlib import Path

In [2]:
path_to_recist = "../data/final/recist_measurements.csv"
path_to_patients = "../data/final/metadata/patients.csv"
path_to_train_labels = "../data/final/images/train/labels"
path_to_test_labels = "../data/final/images/test/labels"

In [3]:
recist_df = pd.read_csv(path_to_recist)
patients_df = pd.read_csv(path_to_patients)

In [4]:
def add_lesion_type_labels(recist_df, path_to_train_labels, path_to_test_labels):
    # Create table with labels
    labels_df = []
    paths_to_json = [
        *list(Path(path_to_train_labels).glob('*.json')),
        *list(Path(path_to_test_labels).glob('*.json')),
    ]
    for path in paths_to_json:
        with open(path, 'r') as file:
            labels = json.load(file)
        partial = [
            {
                "uuid": path.stem,
                "lesion_label_value": int(key),
                "lesion_label_type": value.split(',')[0].strip(),
                "lesion_label_location": value.split(',')[1].strip()
            }
            for key, value in labels.items()
        ]
        labels_df.extend(partial)
    labels_df = pd.DataFrame(labels_df)
    # Merge them
    recist_df = recist_df.merge(
        labels_df,
        on=['uuid', 'lesion_label_value'],
        how='left'
    )
    return recist_df

In [5]:
# Add lesion type and location
recist_df = add_lesion_type_labels(
    recist_df,
    path_to_train_labels,
    path_to_test_labels
)
display(recist_df)

Unnamed: 0,patient_id,subset,study_date,study_uuid,uuid,filename,region,final_3d_objects,lesion_label_value,lesion_label_alias,recist_measurement_mm,study_order,lesion_label_type,lesion_label_location
0,1,train,20230426,1.3.51.0.1.1.172.19.3.128.3187796.3187735,1.3.12.2.1107.5.1.4.83504.30000023042612315883...,1.3.12.2.1107.5.1.4.83504.30000023042612315883...,abdomen,1,1,A,40,baseline,m,kidney
1,1,train,20230426,1.3.51.0.1.1.172.19.3.128.3187796.3187735,1.3.12.2.1107.5.1.4.83504.30000023042612315883...,1.3.12.2.1107.5.1.4.83504.30000023042612315883...,thorax,2,2,B,24,baseline,t,lung
2,2,train,20220712,1.3.51.0.1.1.172.19.3.128.3051489.3051428,1.3.12.2.1107.5.1.4.83504.30000022071212080050...,1.3.12.2.1107.5.1.4.83504.30000022071212080050...,abdomen,2,1,A,27,baseline,m,liver
3,3,test,20210615,1.3.51.0.1.1.172.19.3.128.2857496.2857435,1.3.12.2.1107.5.1.4.83504.30000021061509140333...,1.3.12.2.1107.5.1.4.83504.30000021061509140333...,abdomen,3,3,A,61,baseline,n,abdomen
4,3,test,20210615,1.3.51.0.1.1.172.19.3.128.2857496.2857435,1.3.12.2.1107.5.1.4.83504.30000021061509140333...,1.3.12.2.1107.5.1.4.83504.30000021061509140333...,abdomen,3,2,B,23,baseline,n,abdomen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,35,test,20210913,1.3.51.0.1.1.172.19.3.128.2891211.2891150,1.3.12.2.1107.5.1.4.83504.30000021091312134713...,1.3.12.2.1107.5.1.4.83504.30000021091312134713...,abdomen,3,1,B,46,follow-up-1,m,ovary
78,35,test,20210913,1.3.51.0.1.1.172.19.3.128.2891211.2891150,1.3.12.2.1107.5.1.4.83504.30000021091312134713...,1.3.12.2.1107.5.1.4.83504.30000021091312134713...,abdomen,3,4,C,17,follow-up-1,m,abdominal wall
79,37,train,20230221,1.3.51.0.1.1.172.19.3.128.3156833.3156772,1.3.12.2.1107.5.1.4.83885.30000023022112155417...,1.3.12.2.1107.5.1.4.83885.30000023022112155417...,abdomen,3,1,A,25,baseline,m,suprarenal
80,37,train,20230221,1.3.51.0.1.1.172.19.3.128.3156833.3156772,1.3.12.2.1107.5.1.4.83885.30000023022112155417...,1.3.12.2.1107.5.1.4.83885.30000023022112155417...,thorax,4,2,A,10,baseline,m,lung


In [6]:
# Adenopathies vs tumors
recist_df['lesion_type_gross'] = recist_df['lesion_label_type'].apply(lambda x: "adenopathy" if x == 'n' else "tumor") 
grouped = recist_df.groupby('lesion_type_gross').size().reset_index(name="lesions_count")
display(grouped)

Unnamed: 0,lesion_type_gross,lesions_count
0,adenopathy,23
1,tumor,59


In [8]:
# Tumors distribution
grouped = recist_df[recist_df['lesion_type_gross'] == "tumor"].groupby('lesion_label_location').size().reset_index(name='lesions_count')
display(grouped)

Unnamed: 0,lesion_label_location,lesions_count
0,abdominal wall,2
1,kidney,1
2,liver,29
3,lung,22
4,ovary,4
5,suprarenal,1
