In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

test_df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
train_df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')

In [None]:
test_df['caseID'] = test_df['BraTS21ID'].astype(str).str.zfill(5)   
test_df

In [None]:
folders = ['T1w', 'T1wCE', 'T2w', 'FLAIR']
t1w = []
t1wce = []
t2w = []
flair = []

for case_id in test_df.caseID:
    for folder in folders:
        for dirname, _, filenames in os.walk(f'../input/rsna-miccai-png/test/{case_id}/{folder}/'):
            max_nonblack = 0
            FILENAME = ''
            for filename in filenames:
                img = plt.imread(f'{dirname}{filename}')
                if cv2.countNonZero(img) > max_nonblack:
                #if np.mean(img) > max_nonblack:
                    max_nonblack = cv2.countNonZero(img)
                    FILENAME = filename

            if folder == 'T1w':
                t1w.append(FILENAME)
            elif folder == 'T1wCE':
                t1wce.append(FILENAME)
            elif folder == 'T2w':
                t2w.append(FILENAME)
            elif folder == 'FLAIR':
                flair.append(FILENAME)
        
test_df['T1w'] = t1w 
test_df['T1wCE'] = t1wce
test_df['T2w'] = t2w
test_df['FLAIR'] = flair

test_df

In [None]:
idx = 1
case_id = test_df.caseID.iloc[idx]
filename = test_df.T1w.iloc[idx]
t1w = plt.imread(f'../input/rsna-miccai-png/test/{case_id}/T1w/{filename}')
filename = test_df.T1wCE.iloc[idx]
t1wce = plt.imread(f'../input/rsna-miccai-png/test/{case_id}/T1wCE/{filename}')
filename = test_df.T2w.iloc[idx]
t2w = plt.imread(f'../input/rsna-miccai-png/test/{case_id}/T2w/{filename}')
filename = test_df.FLAIR.iloc[idx]
flair = plt.imread(f'../input/rsna-miccai-png/test/{case_id}/FLAIR/{filename}')

fig = plt.figure(figsize=(26,6))
plt.gray()
ax1 = fig.add_subplot(141)
plt.imshow(t1w, aspect='auto')
ax2 = fig.add_subplot(142)
plt.imshow(t1wce, aspect='auto')
ax3 = fig.add_subplot(143)
plt.imshow(t2w, aspect='auto')
ax4 = fig.add_subplot(144)
plt.imshow(flair, aspect='auto')

In [None]:
train_df['caseID'] = train_df['BraTS21ID'].astype(str).str.zfill(5)   
train_df

Note that according to the competition host ([discussion thread](https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/262046)), there are three case ids (`00109`, `00123`, `00709`) in the train set that should be excluded because they contain unexpected errors (e.g. missing images). 

Drop rows with the following caseID:
* 00109
* 00123
* 00709

In [None]:
train_df = train_df[(train_df.caseID != "00109") & (train_df.caseID != "00123") & (train_df.caseID != "00709")]
train_df

In [None]:
folders = ['T1w', 'T1wCE', 'T2w', 'FLAIR']
t1w = []
t1wce = []
t2w = []
flair = []

for case_id in train_df.caseID:
    for folder in folders:
        for dirname, _, filenames in os.walk(f'../input/rsna-miccai-png/train/{case_id}/{folder}/'):
            max_nonblack = 0
            FILENAME = ''
            for filename in filenames:
                img = plt.imread(f'{dirname}{filename}')
                if cv2.countNonZero(img) > max_nonblack:
                #if np.mean(img) > max_nonblack:
                    max_nonblack = cv2.countNonZero(img)
                    FILENAME = filename

            if folder == 'T1w':
                t1w.append(FILENAME)
            elif folder == 'T1wCE':
                t1wce.append(FILENAME)
            elif folder == 'T2w':
                t2w.append(FILENAME)
            elif folder == 'FLAIR':
                flair.append(FILENAME)
        
train_df['T1w'] = t1w 
train_df['T1wCE'] = t1wce
train_df['T2w'] = t2w
train_df['FLAIR'] = flair

train_df

In [None]:
idx = 2
case_id = train_df.caseID.iloc[idx]
filename = train_df.T1w.iloc[idx]
t1w = plt.imread(f'../input/rsna-miccai-png/train/{case_id}/T1w/{filename}')
filename = train_df.T1wCE.iloc[idx]
t1wce = plt.imread(f'../input/rsna-miccai-png/train/{case_id}/T1wCE/{filename}')
filename = train_df.T2w.iloc[idx]
t2w = plt.imread(f'../input/rsna-miccai-png/train/{case_id}/T2w/{filename}')
filename = train_df.FLAIR.iloc[idx]
flair = plt.imread(f'../input/rsna-miccai-png/train/{case_id}/FLAIR/{filename}')

fig = plt.figure(figsize=(26,6))
plt.gray()
ax1 = fig.add_subplot(141)
plt.imshow(t1w, aspect='auto')
ax2 = fig.add_subplot(142)
plt.imshow(t1wce, aspect='auto')
ax3 = fig.add_subplot(143)
plt.imshow(t2w, aspect='auto')
ax4 = fig.add_subplot(144)
plt.imshow(flair, aspect='auto')

In [None]:
test_df.to_csv('test_df.csv', index=False)
train_df.to_csv('train_df.csv', index=False)