In [None]:
import os
import json
import glob
import random
import collections

import numpy as np
import pandas as pd
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import random
from tqdm.notebook import tqdm

import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Conv2D, MaxPooling2D, AveragePooling2D, concatenate, \
    Activation, ZeroPadding2D
from tensorflow.keras.layers import add, Flatten
from tensorflow.keras.utils import plot_model
from tensorflow.keras.metrics import top_k_categorical_accuracy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from sklearn import model_selection as sk_model_selection
import os



In [None]:

TYPES = ["FLAIR", "T1w", "T2w", "T1wCE"]
#WHITE_THRESHOLD = 10 # out of 255
EXCLUDE = [109, 123, 709]


train_df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
test_df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
train_df = train_df[~train_df.BraTS21ID.isin(EXCLUDE)]


train_df, val_df = sk_model_selection.train_test_split(
    train_df, 
    test_size=0.1, 
    random_state = 404, 
    stratify=train_df["MGMT_value"],
)

def load_dicom(path, size = 224):
    ''' 
    Reads a DICOM image, standardizes so that the pixel values are between 0 and 1, then rescales to 0 and 255
    
    Note super sure if this kind of scaling is appropriate, but everyone seems to do it. 
    '''
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return cv2.resize(data, (size, size))



In [None]:
len(val_df)

In [None]:
def get_all_image_paths(brats21id, image_type, folder='train'): 
    '''
    Returns an array of all the images of a particular type for a particular patient ID
    '''
    assert(image_type in TYPES)
    
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/%s/" % folder, 
        str(brats21id).zfill(5),
    )

    paths = sorted(
        glob.glob(os.path.join(patient_path, image_type, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    
    num_images = len(paths)
    if num_images <= 20: 
        start = int(num_images * 0)
        end = int(num_images * 1)
    elif num_images <= 40:
        start = int(num_images * 0.15)
        end = int(num_images * 0.85)

    elif num_images > 40:
        start = int(num_images * 0.3)
        end = int(num_images * 0.7)

    

    interval = 1
    
    return np.array(paths[start:end:interval])

def get_all_images(brats21id, image_type, folder='train', size=225):
    return [load_dicom(path, size) for path in get_all_image_paths(brats21id, image_type, folder)]
IMAGE_SIZE = 224




In [None]:
val_df.index

In [None]:
train_df.index

In [None]:
def get_all_data_for_train(image_type):
    global train_df
    
    X_train = []
    y_train = []
    train_ids = []

    for i in tqdm(train_df.index):
        x = train_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'train', IMAGE_SIZE)
        label = x['MGMT_value']

        X_train += images
        y_train += [label] * len(images)
        train_ids += [int(x['BraTS21ID'])] * len(images)
        assert(len(X_train) == len(y_train))
    return np.array(X_train), np.array(y_train), np.array(train_ids)


def get_all_data_for_val(image_type):
    global val_df
    
    X_val = []
    y_val = []
    val_ids = []

    for i in tqdm(val_df.index):
        x = val_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'train', IMAGE_SIZE)
        label = x['MGMT_value']

        X_val += images
        y_val += [label] * len(images)
        val_ids += [int(x['BraTS21ID'])] * len(images)
        assert(len(X_val) == len(y_val))
    return np.array(X_val), np.array(y_val), np.array(val_ids)



# def get_all_data_for_test(image_type):
#     global test_df
    
#     X_test = []
#     test_ids = []

#     for i in tqdm(test_df.index):
#         x = test_df.loc[i]
#         images = get_all_images(int(x['BraTS21ID']), image_type, 'test', IMAGE_SIZE)
#         X_test += images
#         test_ids += [int(x['BraTS21ID'])] * len(images)

#     return np.array(X), np.array(test_ids)
X_train, y_train, trainidt = get_all_data_for_train('T1wCE')
X_val, y_val, validt = get_all_data_for_val('T1wCE')
# X_test, testidt = get_all_data_for_test('T1wCE')
#X.shape, y.shape, trainidt.shape

In [None]:
#X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.2, random_state=40)

#split = int(X.shape[0] * 0.8)

# X_train = X[:split]
# X_valid = X[split:]

# y_train = y[:split]
# y_valid = y[split:]

# trainidt_train = trainidt[:split]
# trainidt_valid = trainidt[split:]

# add one dim to fit the model
X_train = tf.expand_dims(X_train, axis=-1)
X_val = tf.expand_dims(X_val, axis=-1)

y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

X_train.shape, y_train.shape, X_val.shape, y_val.shape, trainidt.shape, validt.shape

In [None]:
len(X_val)

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
  tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
])

In [None]:
def Conv2d_BN(x, nb_filter, kernel_size, strides=(1, 1), padding='same', name=None):
    if name is not None:
        bn_name = name + '_bn'
        conv_name = name + '_conv'
    else:
        bn_name = None
        conv_name = None

    x = Conv2D(nb_filter, kernel_size, padding=padding, strides=strides, activation='relu', name=conv_name)(x)
    x = BatchNormalization(axis=3, name=bn_name)(x)
    return x



def bottleneck_Block(inpt,nb_filters,strides=(1,1),with_conv_shortcut=False):
    k1,k2,k3=nb_filters
    x = Conv2d_BN(inpt, nb_filter=k1, kernel_size=1, strides=strides, padding='same')
    x = Conv2d_BN(x, nb_filter=k2, kernel_size=3, padding='same')
    x = Conv2d_BN(x, nb_filter=k3, kernel_size=1, padding='same')
    if with_conv_shortcut:
        shortcut = Conv2d_BN(inpt, nb_filter=k3, strides=strides, kernel_size=1)
        x = add([x, shortcut])
        return x
    else:
        x = add([x, inpt])
        return x


In [None]:
# np.random.seed(0)
# random.seed(12)
# tf.random.set_seed(12)
IM_WIDTH=224 #图片宽度
IM_HEIGHT=224 #图片高度
batch_size=32 #批的大小

inpt = keras.Input(shape=(224, 224, 1))
x = ZeroPadding2D((3, 3))(inpt)

h = keras.layers.experimental.preprocessing.Rescaling(1./255)(inpt)
# h = data_augmentation(h)


# # convolutional layer!

x = Conv2d_BN(x,nb_filter=64, kernel_size=(4, 4), strides=(2, 2), padding='valid')
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

#conv2_x
x = bottleneck_Block(x, nb_filters=[64,64,256],strides=(1,1),with_conv_shortcut=True)
x = bottleneck_Block(x, nb_filters=[64,64,256])
x = bottleneck_Block(x, nb_filters=[64,64,256])

#conv3_x
x = bottleneck_Block(x, nb_filters=[128, 128, 512],strides=(2,2),with_conv_shortcut=True)
x = bottleneck_Block(x, nb_filters=[128, 128, 512])
x = bottleneck_Block(x, nb_filters=[128, 128, 512])
x = bottleneck_Block(x, nb_filters=[128, 128, 512])

#conv4_x
x = bottleneck_Block(x, nb_filters=[256, 256, 1024],strides=(2,2),with_conv_shortcut=True)
x = bottleneck_Block(x, nb_filters=[256, 256, 1024])
x = bottleneck_Block(x, nb_filters=[256, 256, 1024])
x = bottleneck_Block(x, nb_filters=[256, 256, 1024])
x = bottleneck_Block(x, nb_filters=[256, 256, 1024])
x = bottleneck_Block(x, nb_filters=[256, 256, 1024])

#conv5_x
x = bottleneck_Block(x, nb_filters=[512, 512, 2048], strides=(2, 2), with_conv_shortcut=True)
x = bottleneck_Block(x, nb_filters=[512, 512, 2048])
x = bottleneck_Block(x, nb_filters=[512, 512, 2048])

x = AveragePooling2D(pool_size=(4, 4))(x)

h = tf.keras.layers.BatchNormalization(axis=-1)(h)
h = keras.layers.Dropout(0.3)(h)   

h = keras.layers.Flatten()(h) 
# h = global_average_layer(h)
h = keras.layers.Dense(128, activation='relu')(h)   

output = keras.layers.Dense(2, activation="softmax")(h)

model = keras.Model(inpt, output)

from tensorflow.keras.optimizers import SGD
# opt = SGD(lr=0.1)

checkpoint_filepath = 'best_model.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=False,
monitor='val_auc',
mode='max',
save_best_only=True,
save_freq='epoch')

model.compile(loss='categorical_crossentropy',
             optimizer=tf.keras.optimizers.SGD(learning_rate =0.0001),
             metrics=[tf.keras.metrics.AUC()])

history = model.fit(x=X_train, y = y_train, epochs=100, callbacks=[model_checkpoint_callback], validation_data= (X_val, y_val))



In [None]:
history