In [1]:
#  Import libraries and define variables
import os
import shutil
import glob
from tqdm import tqdm
from PIL import Image
import nibabel as nib
import numpy as np
import pickle
import cv2
import pandas as pd

import sys
sys.path.append("..")  # Adds the parent directory to sys.path
import config 

# Define the modalities and classifications
modalities = ['t1', 't1ce', 't2', 'flair']
classifications = ['MGMT_positive', 'MGMT_negative']

# Define patch size and stride
block_h, block_w = (32, 32)
stride = 2

# Interpolated image dimestions
inter_dim = (110, 90)

# Define paths to the BraTS dataset folders
path = config.MAIN_DIR

PATH = path + 'Data/'
Org_Dir = PATH + 'Original_Data_Backup/'
Work_Dir = PATH + 'Working_Data/'




In [2]:
# Function Definitions --> For reading images and appending it to a list

def read_image(data):
    print('Reading Images')
    class_num = 0
    workdir = os.listdir(Work_Dir)
    if '.DS_Store' in workdir:
          workdir.remove('.DS_Store')
          print('Removed .DS_Store')
    for classi in classifications:
        if classi in workdir:
            workdir.remove(classi)
    for pool in workdir:
        pool_dir = Work_Dir + pool + '/'
        pool_dir_list = os.listdir(pool_dir)
        if '.DS_Store' in pool_dir_list:
            pool_dir_list.remove('.DS_Store')
            print('Removed .DS_Store')
        for img in tqdm(pool_dir_list):
            try:
                img_array = cv2.imread(os.path.join(pool_dir, img), cv2.IMREAD_GRAYSCALE)
                # Saving images in the list
                data.append([img_array, class_num])
            except Exception as e:
                print(e)
        class_num = 1
        


In [3]:
# Function Definitions --> Initialize the feature & labels of the processes image in the list X & Y

def Initializing_feature_labels(data, X, Y):
    print('Initializing Features & Labels')
    for features, label in data:
        X.append(features)
        Y.append(label)
    print('List Size: ', len(X), len(Y))

In [4]:
# Function Defination --> Reshape the list to numpy array

def Converting(block_h, block_w, X, Y):
    print('Converting to Array')
    global x, y

    # -1 is added to solve dimension mismatch while converting list to an array.
    x = np.array(X).reshape((-1, block_h, block_w, 1))
    y = np.array(Y)

    print('Array Size with Reshape: ', len(X), len(y))
    print('Array Shape with Reshape: ', x.shape, y.shape)


In [5]:
# Main cell to execute all the functions

# Creating list for storing processed data
data = []

# Reading Images
read_image(data) 

#  Printing the length of the data
print('Size of the data: ', len(data))

# Initializing all features & labels of the processed image in the list X & Y
X = []
Y = []

# Initializing the features and labels
Initializing_feature_labels(data, X, Y)

# Converting the list into numpy array
Converting(block_h, block_w, X, Y)

# Storing the numpy array in a pickle file
Storing_Preprocessed_Data = open(Work_Dir + 'X.pickle', 'wb')
pickle.dump(X, Storing_Preprocessed_Data)
Storing_Preprocessed_Data.close()

Storing_Preprocessed_Data = open(Work_Dir + 'y.pickle', 'wb')
pickle.dump(y, Storing_Preprocessed_Data)
Storing_Preprocessed_Data.close()


Reading Images
Removed .DS_Store


100%|██████████| 1015200/1015200 [03:13<00:00, 5242.02it/s]
100%|██████████| 1123200/1123200 [03:34<00:00, 5228.42it/s]


Size of the data:  2138400
Initializing Features & Labels
List Size:  2138400 2138400
Converting to Array
Array Size with Reshape:  2138400 2138400
Array Shape with Reshape:  (2138400, 32, 32, 1) (2138400,)


In [None]:
def create_dataframe():
    modality_in_annotated = sorted(os.listdir(Work_Dir))

    image=[]
    label=[]

    for classi in classifications:
        if classi in modality_in_annotated:
            modality_in_annotated.remove(classi)
                
    for pool_modality in modality_in_annotated:
        for img in tqdm(os.listdir(Work_Dir + pool_modality + '/')):
            image.append(Work_Dir + pool_modality + '/' + img)
            label.append(0 if 'MGMT_negative' in pool_modality else 1) 

    df = pd.DataFrame()
    df['images']=[str(x) for x in image]
    df['labels']=[str(x) for x in label]
    # df = df.sample(frac=1, random_state=1).reset_index(drop=True)
    df.to_csv('step2_file_paths.csv')
    df.head()

In [None]:
create_dataframe()