In [1]:
import pandas as pd
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tqdm import tqdm
import os
import glob
import numpy as np
import random
import math
import time
import cv2
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
import matplotlib.pyplot as plt

In [2]:
class DataLoader(keras.utils.Sequence):
    def __init__(self,base_dir='/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/test',\
                mods=['FLAIR']):
        self.batch_size = 1 
        self.base_dir = base_dir
        self.pat_ids = sorted(glob.glob(os.path.join(base_dir, '*')))
        self.modalities = mods
        print('PAT IDS:',len(self.pat_ids),' | Modalities:',self.modalities)

    def load_dicom_image(self,path):
        dicom = pydicom.read_file(path)
        data = apply_voi_lut(dicom.pixel_array, dicom)
        if dicom.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        data = data - np.min(data)
        data = data / np.max(data)
        data = (data * 255).astype(np.uint8)[:,:,np.newaxis]
        data = np.repeat(data,3,2)
        return data
        
    def __getitem__(self,index):
        batch_patids = self.pat_ids[index:index+self.batch_size]
        all_images = {}
        for K in self.modalities:
            all_images[K] = {'images':[],'ids':[]}
        for patid in batch_patids:
            for MOD in all_images.keys():
                dicom_pngs = []
                dicom_files = glob.glob(os.path.join(patid, MOD, '*.dcm'))
                dset = tqdm(enumerate(dicom_files), total=len(dicom_files),position=0, leave=True)
                dset.set_description(f'{patid}|{MOD}')
                for i,dicom_path in dset:
                    try:
                        png = self.load_dicom_image(dicom_path)
                        dicom_pngs.append(png)
                    except:
                        continue
                if len(dicom_pngs)!=0:
                    all_images[MOD]['images'].append(dicom_pngs)
                    all_images[MOD]['ids'].append(patid.replace('\\','/').split('/')[-1])
                    
                dicom_pngs = None
                
        return all_images
   
    def __len__(self):
        return int(len(self.pat_ids)/self.batch_size)

In [3]:
class StageOne:
    def __init__(self,modelpath='',height=256,width=256,score_min_thresh=0.5):
        self.model = load_model(modelpath)
        self.score_min_thresh = score_min_thresh
        self.height = height
        self.width = width
        self.offset_perc = 0.1
    
    def infer(self,image_batch,filter_batchsize=16):
        filtered_images = {}
        for K in image_batch.keys():
            filtered_images[K] = {'images':[],'ids':[]}
            for imagesbatch, patid in zip(image_batch[K]['images'],image_batch[K]['ids']):
                div,mod = divmod(len(imagesbatch),filter_batchsize)
                if mod!=0:
                    div+=1
                dset = tqdm(range(0,len(imagesbatch),filter_batchsize),total=div,position=0, leave=True)
                dset.set_description(f'{patid}|Filtering')
                filtered_batch_images = []
                for i in dset:
                    org_batchimgs = imagesbatch[i:i+filter_batchsize]
                    batchimgs = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in org_batchimgs]
                    batchimgs = np.array([cv2.resize(img,(self.width,self.height))/255. for img in batchimgs])
                    out = self.model.predict(batchimgs)
                    maxindexes = np.argmax(out,axis=1)
                    for j in range(len(maxindexes)):
                        if maxindexes[j] == 1 and out[j][maxindexes[j]] >= self.score_min_thresh:
                            filtered_batch_images.append(org_batchimgs[j])
                            
                if len(filtered_batch_images)==0:
                    offset = math.ceil(len(imagesbatch)*self.offset_perc)
                    filtered_batch_images = imagesbatch[offset:-offset]
                    
                    
                filtered_images[K]['images'].append(filtered_batch_images)
                filtered_images[K]['ids'].append(patid)
                filtered_batch_images = None
                dset = None
                
            return filtered_images
              

In [4]:
class StageTwo:
    def __init__(self,modelpath='',height=224,width=224):
        self.model = load_model(modelpath)
        self.height = height
        self.width = width
    
    def preprocess(self,image):
        image = cv2.resize(image,(self.width,self.height))
        return image
    
    def infer(self,imagebatch):
        out = self.model.predict(imagebatch)
        return out

In [5]:
mods = ['FLAIR']
generator = DataLoader(base_dir='/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train')
stage_one = StageOne(modelpath='/kaggle/input/models/FINAL_MODELALL_acc0.9825_ep26.h5')
stage_two = StageTwo(modelpath='/kaggle/input/models/3d_image_classification.hdf5')
dset = tqdm(enumerate(generator),total=len(generator),position=0, leave=True)
dset.set_description('Loading_test')
all_results = {}
for i,sample in dset:
    #if i > 2:
    #    break
    filtered_images = stage_one.infer(sample)
    for K in filtered_images.keys():
        for batchimg,patid in zip(filtered_images[K]['images'],filtered_images[K]['ids']):
            batchimg = [stage_two.preprocess(img) for img in batchimg]
            batchimg = np.transpose(batchimg, (1,2,0,3))[None,:,:,:,:]

            out = stage_two.infer(batchimg)
            maxindexes = int(np.argmax(out,axis=1)[0])

            patidkey = str(patid).zfill(5)
            if patidkey not in all_results.keys():
                all_results[patidkey] = [maxindexes]
            else:
                all_results[patidkey].append(maxindexes)

allpatids = []
allpreds = []
for K in all_results.keys():
    one_count = all_results[K].count(1)
    zero_count = all_results[K].count(0)
    allpatids.append(K)
    if one_count > zero_count:
        allpreds.append(1)
    else:
        allpreds.append(0)
    
        
    
outcsv = 'submission_train.csv'
df = pd.DataFrame({'BraTS21ID':allpatids,'MGMT_value':allpreds})
#df = pd.DataFrame([allpatids,allpreds], columns=['BraTS21ID','MGMT_value'])
df.to_csv(outcsv,index=False)

PAT IDS: 585  | Modalities: ['FLAIR']


  app.launch_new_instance()
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000|FLAIR: 100%|██████████| 400/400 [00:06<00:00, 62.64it/s]
00000|Filtering: 100%|██████████| 25/25 [00:08<00:00,  2.80it/s]
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00002|FLAIR: 100%|██████████| 129/129 [00:01<00:00, 69.54it/s]
00002|Filtering: 100%|██████████| 9/9 [00:00<00:00, 11.14it/s]
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00003|FLAIR: 100%|██████████| 129/129 [00:01<00:00, 68.51it/s]
00003|Filtering: 100%|██████████| 9/9 [00:00<00:00, 14.07it/s]
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00005|FLAIR: 100%|██████████| 400/400 [00:05<00:00, 67.44it/s]
00005|Filtering: 100%|██████████| 25/25 [00:01<00:00, 13.02it/s]
/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00006|FLAIR: 100%|██████████| 129/129 [00:01<00:00, 68.40it/s]
00006|Filtering: 100%|██████████| 9/9 [00

In [6]:
df

Unnamed: 0,BraTS21ID,MGMT_value
0,00000,0
1,00002,1
2,00003,0
3,00005,0
4,00006,1
...,...,...
580,01005,0
581,01007,1
582,01008,0
583,01009,0
