In [13]:
import pandas as pd
padchest_df = pd.read_csv('./metadata/padchest/PADCHEST_chest_x_ray_images_labels_160K_01.02.19.csv')
jsrt_df1 = pd.read_csv('./metadata/jsrt/CNNDAT_EN.txt', sep=' ', names = ['ImageID', 'Age', 'Sex', 'Type'], index_col=False)
jsrt_df2 = pd.read_csv('./metadata/jsrt/CLNDAT_EN.txt', sep='\t', names = ['ImageID', 'Nodule_size', 'Degree_of_subtlety', 'Age','Sex','X','Y','Type','Location', 'Diagnosis'], index_col=False)

## Number of images

In [14]:
def image_count_total(dfs):
    return sum(len(df) for df in dfs)

In [15]:
print(f'Padchest dataset has {image_count_total([padchest_df])} images')

Padchest dataset has 160861 images


In [16]:
print(f'JSRT dataset has {image_count_total([jsrt_df1, jsrt_df2])} images')

JSRT dataset has 247 images


In [17]:
def unique_examinations_count_ct(dfs, patient_id_column, date_column):
    count = 0
    for df in dfs:
        tmp = df.groupby([patient_id_column, date_column]).size()
        count += sum(tmp.to_frame(name = 'size').reset_index()['size'])
    return count

## Number of unique patients

JSRT database does not identify patients.

In [18]:
def unique_patients(patient_id_column):
    return patient_id_column.nunique()

In [19]:
print(f"Padchest dataset has {unique_patients(padchest_df['PatientID'])} unique patients")

Padchest dataset has 67625 unique patients


## Xray Projections

JSRT database does not contain information about projections.

In [20]:
def rtg_projections(projection_column):
    return projection_column.value_counts()

#### Number of different projections in Padchest dataset

In [21]:
rtg_projections(padchest_df['Projection'])

PA               91728
L                49579
AP_horizontal    14346
AP                4559
COSTAL             630
EXCLUDE             11
UNK                  8
Name: Projection, dtype: int64

## Types of images and existence of masks

|             | Images Type | Masks            |
|-------------|:---------:|------------------|
| PadChest    |    RTG    | -                |
| JSRT        |    RTG    | -                |
| VinDr_CXR   | RTG         | -                |
| Covid-CT    | CT        | -                |
| Chest Xray  | RTG       | some images |
| Lung Cancer | CT         | -                |

## DICOM attributes in images

None of our datasets contained images in DICOM form