In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from os import listdir
from os.path import isfile, join

In [None]:
root_path = "../input/rsna-miccai-brain-tumor-radiogenomic-classification"
train = pd.read_csv("{}/train_labels.csv".format(root_path))
train.head()

In [None]:
# Count the number of files in each dirs
p_ids = listdir("{}/train/".format(root_path))
mri_kinds = ["FLAIR", "T1w", "T1wCE", "T2w"]
p_counts = []
for p_id in tqdm(p_ids):
    p_dir = "{}/train/{}".format(root_path, p_id)
    p_count = {"BraTS21ID": p_id, "FLAIR": np.nan, "T1w": np.nan, "T1wCE": np.nan, "T2w": np.nan}
    for kind in mri_kinds:
        each_kind_dir = "{}/{}".format(p_dir, kind)
        files = [f for f in listdir(each_kind_dir) if isfile(join(each_kind_dir, f))]
        p_count[kind] = len(files)
    p_counts.append(p_count)

In [None]:
df_count = pd.DataFrame(p_counts)
df_count

In [None]:
# Create flag if each patient has the same length of MRI images
df_count["is_length_same"] = df_count.apply(lambda x: x["FLAIR"] == x["T1w"] == x["T1wCE"] == x["T2w"], axis=1)
df_count.head(20)

In [None]:
print("same length / all = {} / {}".format(df_count["is_length_same"].sum(), df_count["BraTS21ID"].count()))

In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=[12, 4])
ax1.hist(df_count["FLAIR"])
ax1.set_title("FLAIR")
ax2.hist(df_count["T1w"])
ax2.set_title("T1w")
ax3.hist(df_count["T1wCE"])
ax3.set_title("T1wCE")
ax4.hist(df_count["T2w"])
ax4.set_title("T2w")