# f-MRI Tumour Data Handling

In [1]:
import os
import cv2 as cv

from sklearn.model_selection import train_test_split

## Gather images

We will be using a small dataset of f-MRI images of brains slices with and without tumours from [Kaggle](https://www.kaggle.com/navoneel/brain-mri-images-for-brain-tumor-detection). 

In [39]:
raw_data_dir = os.path.join(os.path.join(os.getcwd(), os.pardir), "resources/raw_data/fmri_tumour")
neg_dir = os.path.join(raw_data_dir, "no")
pos_dir = os.path.join(raw_data_dir, "yes")
neg_imgs = [cv.imread('{0}/{1}'.format(neg_dir, filepath), 0) for filepath in os.listdir(neg_dir)]
pos_imgs = [cv.imread('{0}/{1}'.format(pos_dir, filepath), 0) for filepath in os.listdir(pos_dir)]

Now we have each of the images we shall create labels, then combine and use train test split to create random train, validation and testing sets.

In [42]:
neg_labels = [0] * len(neg_imgs)
pos_labels = [1] * len(pos_imgs)
X = neg_imgs + pos_imgs
y = neg_labels + pos_labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)

Now we can organise in the data directory for the data generator.

First we will need to make the classes.

In [47]:
def getdirs(classes):
    data_dir = os.path.join(os.path.join(os.getcwd(), os.pardir), "resources/data/fmri_tumour")
    dir_sets = []
    for s in ["train", "val", "test"]:
        dirs = []
        for c in classes:
            dir = "{0}/{1}/".format(s, c)
            dirs.append(os.path.join(data_dir, dir))
        dir_sets.append(dirs)
    return dir_sets

def mkdirs(classes):
    dir_sets = getdirs(classes)
    for dirs in dir_sets:
        for dir in dirs:
            if not os.path.isdir(dir):    
                os.makedirs(dir)
            else:
                for filepath in os.listdir(dir):
                    os.remove(os.path.join(dir, filepath))
 
classes = range(2)
mkdirs(classes)

Now we can organise the images into the directories.

In [48]:
def write_imgs(classes, X_train, y_train, X_val, y_val, X_test, y_test):
    dir_sets = getdirs(classes)
    sets = [(X_train, y_train), (X_val, y_val), (X_test, y_test)]
    for (dirs, (X, y)) in zip(dir_sets, sets):
        counter = [0] * len(classes)
        for (img, label) in zip(X, y):
            filename = "{0}class_{1}_img_{2}.jpg".format(dirs[label], label, counter[label])
            cv.imwrite(filename, img)
            counter[label] += 1

write_imgs(classes, X_train, y_train, X_val, y_val, X_test, y_test)