In [None]:
# Imports
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import time
from tqdm import tqdm_notebook as tqdm

In [None]:
# Load train labels
train_labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')
train_labels = train_labels.rename(columns={'BraTS21ID':'id', 'MGMT_value':'mgmt'}) # rename columns for simplicity
train_labels.drop(train_labels[train_labels['id'].isin([109, 123, 709])].index, inplace=True) # Remove problematic/corrupted samples
train_labels.head()

In [None]:
import os, math, glob, re
import numpy as np
import pandas as pd
import cv2

import matplotlib.pyplot as plt
import pydicom

from kaggle_datasets import KaggleDatasets

from sklearn.model_selection import train_test_split


from random import shuffle
import tensorflow as tf

# x_train, x_val, y_train, y_val = train_test_split(train_labels.id, 
#                                                   train_labels.mgmt, 
#                                                   test_size=0.2, 
#                                                   random_state=42,
#                                                   stratify=train_labels.mgmt)

x_train = train_labels.id
y_train = train_labels.mgmt

print(x_train.shape)
print(y_train.shape)
# print(x_val.shape)
# print(y_val.shape)

In [None]:
train_labels.values

In [None]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() 
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_example(subject_id, image, label):
    feature = {
        'subject_id': _int64_feature(subject_id),
        'image': _bytes_feature(image.tobytes()),
        'mgmt': _float_feature(label)
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

def serialize_test_example(subject_id, image):
    feature = {
        'subject_id': _int64_feature(subject_id),
        'image': _bytes_feature(image.tobytes())
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

In [None]:
def load_3D_images(subject_id, dataset_path = '../input/rsna-processed-voxels-64x256x256-clahe/voxels/', contrasts = ['FLAIR', 'T1w', 'T1wCE', 'T2w']):
    subject_volume = []
    for contrast in contrasts:
        subject_volume.append(np.load(os.path.join(dataset_path, contrast, str(subject_id).zfill(5) +'.npy')))
    return tf.stack(subject_volume, axis=-1)

In [None]:
! mkdir -p ./tfrecords/train/
with tf.io.TFRecordWriter(str("./tfrecords/train" + os.sep + 'data_train.tfrec'),
                          options=tf.io.TFRecordOptions(compression_type="GZIP")) as writer:
    for x,y in tqdm(zip(x_train,y_train), total=len(x_train)):
        img = load_3D_images(x).numpy()
        example = serialize_example(int(x), img, y)
        writer.write(example)

# ! mkdir -p ./tfrecords/valid/
# with tf.io.TFRecordWriter(str("./tfrecords/valid" + os.sep + 'data_val.tfrec'),
#                           options=tf.io.TFRecordOptions(compression_type="GZIP")) as writer:
#     for x,y in tqdm(zip(x_val,y_val), total = len(x_val)):
#         img = load_3D_images(x).numpy()
#         example = serialize_example(int(x), img, y)
#         writer.write(example)
        
# Get test subject id's
test_path = '../input/rsna-processed-test-voxels-clahe/voxels'
test_ids = [file.split('.')[0] for file in os.listdir(test_path + '/FLAIR') if os.path.isfile(os.path.join(test_path + '/FLAIR', file))]
! mkdir -p ./tfrecords/test/
with tf.io.TFRecordWriter(str("./tfrecords/test" + os.sep + 'data_test.tfrec'),
                          options=tf.io.TFRecordOptions(compression_type="GZIP")) as writer:
    for x in tqdm(test_ids):
        img = load_3D_images(x, dataset_path=test_path).numpy()
        example = serialize_test_example(int(x), img)
        writer.write(example)