In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
pip install hdf5storage

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hdf5storage
  Downloading hdf5storage-0.1.18-py2.py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.2 MB/s 
Installing collected packages: hdf5storage
Successfully installed hdf5storage-0.1.18


In [4]:
pip install mat73

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mat73
  Downloading mat73-0.59-py3-none-any.whl (19 kB)
Installing collected packages: mat73
Successfully installed mat73-0.59


In [5]:
# 라이브러리 import
import urllib.request
import os
import re
from tqdm import tqdm
import zipfile
import hdf5storage
import numpy as np
import scipy 
import sklearn
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
from mat73 import loadmat
from skimage.transform import resize
from PIL import Image
import cv2
import pandas as pd
from scipy.ndimage.interpolation import map_coordinates
from scipy.ndimage.filters import gaussian_filter

In [6]:
# 탄성변형
def elastic_transform(image, alpha, sigma, alpha_affine, random_state=None):
    if random_state is None:
        random_state = np.random.RandomState(None)

    shape = image.shape
    shape_size = shape[:2]
    
    center_square = np.float32(shape_size) // 2
    square_size = min(shape_size) // 3
    pts1 = np.float32([center_square + square_size, [center_square[0]+square_size, center_square[1]-square_size], center_square - square_size])
    pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32)
    M = cv2.getAffineTransform(pts1, pts2)
    image = cv2.warpAffine(image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101)

    dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
    dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
    dz = np.zeros_like(dx)

    x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
    indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1)), np.reshape(z, (-1, 1))

    return map_coordinates(image, indices, order=1, mode='reflect').reshape(shape)

In [7]:
# 데이터 다운로드
class DownloadProgressBar(tqdm):
    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)

def download_url(url, target_folder, filename):
    # check if data exists
    print("Check if data exists on disk")
    if not os.path.isdir(target_folder):
      print("Creating target folder")
      os.mkdir(target_folder)
    files = os.listdir(target_folder)
    if not files:
        print("Cannot find files on disk")
        print("Downloading files")
        with DownloadProgressBar(unit='B', unit_scale=True,
                                 miniters=1, desc=url.split('/')[-1]) as t:
            urllib.request.urlretrieve(url, filename=target_folder + filename, reporthook=t.update_to)
    print("Download completed!")


In [8]:
# zip 파일 연 후 폴더 안에 있는 데이터들을 밖으로 빼냄
def unzip_all_files(target_folder):
    print("Unzip files")
    items = os.listdir(target_folder)
    while(any(item.endswith('.zip') for item in items)):
        for item in filter(lambda item: item.endswith('.zip'), items):
            with zipfile.ZipFile(target_folder + item, "r") as zip_ref:
                zip_ref.extractall(target_folder)
        for item in items:
            if item.endswith(".zip"):
                os.remove(target_folder + item)
        items = os.listdir(target_folder)
    print("Unzip completed!")

In [9]:
def get_data_if_needed(data_path='./data/', url="https://ndownloader.figshare.com/articles/1512427/versions/5"):
    if os.path.isdir(data_path):
        _arrange_brain_tumor_data(data_path)
        return
    filename = "all_data.zip"
    download_url(url, data_path, filename)
    unzip_all_files(data_path)
    _arrange_brain_tumor_data(data_path)

In [29]:
def _arrange_brain_tumor_data(root):
    # Remove and split files
    items = [item for item in filter(lambda item: re.search("^[0-9]+\.mat$", item), os.listdir(root))]


    # image directory 생성
    try:
        os.mkdir(root + 'meningioma')
        os.mkdir(root + 'meningioma/image/')

        os.mkdir(root + 'glioma')
        os.mkdir(root + 'glioma/image/')

        os.mkdir(root + 'pituitary')
        os.mkdir(root + 'pituitary/image/')

    except:
        print("image directory already exists")


    # mask directory 생성
    try:
        os.mkdir(root + 'meningioma/mask/')
        os.mkdir(root + 'glioma/mask/')
        os.mkdir(root + 'pituitary/mask/')

    except:
        print("mask directory already exists")

    # image aug directory 생성
    try:
        os.mkdir(root + 'meningioma/image/aug/')
        os.mkdir(root + 'glioma/image/aug/')
        os.mkdir(root + 'pituitary/image/aug/')

    except:
        print("image_aug directory already exists")

    # mask aug directory 생성
    try:
        os.mkdir(root + 'meningioma/mask/aug/')
        os.mkdir(root + 'glioma/mask/aug/')
        os.mkdir(root + 'pituitary/mask/aug/')
        
    except:
        print("mask_aug directory already exists")    


    for item in items:
        matfile = loadmat(root + item)
        
        image = matfile['cjdata']['image']
        mask = matfile['cjdata']['tumorMask']

        image = resize(image, (224, 224))
        mask = resize(mask, (224, 224))

        image_formatted = (image * 255 / np.max(image)).astype('uint8')
        image_jpg = Image.fromarray(image_formatted)

        mask_formatted = (mask * 255 / np.max(mask)).astype('uint8')
        mask_png = Image.fromarray(mask_formatted)

        # meningioma
        if int(matfile['cjdata']['label']) == 1:    
            name = item.split('.')  
            image_jpg.save(os.path.join(root + 'meningioma/image/' + name[0] + '_meningioma.jpg'))
            mask_png.save(os.path.join(root + 'meningioma/mask/' + name[0] + '_meningioma.png'))

            path1 = os.path.join(root + 'meningioma/image/' + name[0] + '_meningioma.jpg')
            path2 = os.path.join(root + 'meningioma/mask/' + name[0] + '_meningioma.png') 
            im = cv2.imread(path1, -1)
            im_mask = cv2.imread(path2, -1)
            
            im_merge = np.concatenate((im[...,None], im_mask[...,None]), axis=2)
            im_merge_t = elastic_transform(im_merge, im_merge.shape[1] * 2, im_merge.shape[1] * 0.08, im_merge.shape[1] * 0.08)
            im_t = im_merge_t[...,0]
            im_mask_t = im_merge_t[...,1]

            im_t_formatted = np.array(im_t)
            im_t = Image.fromarray(im_t_formatted)
            im_mask_t_formatted = np.array(im_mask_t)
            im_mask_t = Image.fromarray(im_mask_t_formatted)
                
            im_t.save(os.path.join(root + 'meningioma/image/aug/' + name[0] + '_aug_meningioma.jpg'))
            im_mask_t.save(os.path.join(root + 'meningioma/mask/aug/' + name[0] + '_aug_meningioma.png'))

        # glioma
        elif int(matfile['cjdata']['label']) == 2:    
            name = item.split('.')
            image_jpg.save(os.path.join(root + 'glioma/image/' + name[0] + '_glioma.jpg'))
            mask_png.save(os.path.join(root + 'glioma/mask/' + name[0] + '_glioma.png'))

            path1 = os.path.join(root + 'glioma/image/' + name[0] + '_glioma.jpg')
            path2 = os.path.join(root + 'glioma/mask/' + name[0] + '_glioma.png') 
            im = cv2.imread(path1, -1)
            im_mask = cv2.imread(path2, -1)
            
            im_merge = np.concatenate((im[...,None], im_mask[...,None]), axis=2)
            im_merge_t = elastic_transform(im_merge, im_merge.shape[1] * 2, im_merge.shape[1] * 0.08, im_merge.shape[1] * 0.08)
            im_t = im_merge_t[...,0]
            im_mask_t = im_merge_t[...,1]

            im_t_formatted = np.array(im_t)
            im_t = Image.fromarray(im_t_formatted)
            im_mask_t_formatted = np.array(im_mask_t)
            im_mask_t = Image.fromarray(im_mask_t_formatted)
                
            im_t.save(os.path.join(root + 'glioma/image/aug/' + name[0] + '_aug_glioma.jpg'))
            im_mask_t.save(os.path.join(root + 'glioma/mask/aug/' + name[0] + '_aug_glioma.png'))

        # pituitary
        elif int(matfile['cjdata']['label']) == 3:    
            name = item.split('.')  
            image_jpg.save(os.path.join(root + 'pituitary/image/' + name[0] + '_pituitary.jpg'))
            mask_png.save(os.path.join(root + 'pituitary/mask/' + name[0] + '_pituitary.png'))

            path1 = os.path.join(root + 'pituitary/image/' + name[0] + '_pituitary.jpg')
            path2 = os.path.join(root + 'pituitary/mask/' + name[0] + '_pituitary.png') 
            im = cv2.imread(path1, -1)
            im_mask = cv2.imread(path2, -1)
            
            im_merge = np.concatenate((im[...,None], im_mask[...,None]), axis=2)
            im_merge_t = elastic_transform(im_merge, im_merge.shape[1] * 2, im_merge.shape[1] * 0.08, im_merge.shape[1] * 0.08)
            im_t = im_merge_t[...,0]
            im_mask_t = im_merge_t[...,1]

            im_t_formatted = np.array(im_t)
            im_t = Image.fromarray(im_t_formatted)
            im_mask_t_formatted = np.array(im_mask_t)
            im_mask_t = Image.fromarray(im_mask_t_formatted)

            im_t.save(os.path.join(root + 'pituitary/image/aug/' + name[0] + '_aug_pituitary.jpg'))
            im_mask_t.save(os.path.join(root + 'pituitary/mask/aug/' + name[0] + '_aug_pituitary.png'))

        # 기존 matfile 삭제
        os.remove(root + item)

In [28]:
import shutil
shutil.rmtree('/content/drive/MyDrive/data2')

In [30]:
if __name__ == "__main__":
    get_data_if_needed('/content/drive/MyDrive/data224/')

Check if data exists on disk
Creating target folder
Cannot find files on disk
Downloading files


5: 880MB [02:31, 5.82MB/s]                           


Download completed!
Unzip files
Unzip completed!


In [35]:
import pathlib

# 폴더 안 파일 수 확인
def file_count(root):
    count = 0
    for path in pathlib.Path(root).iterdir():
        if path.is_file():
            count += 1
    return count

if __name__ == "__main__":
    # 데이터 몇개인지 확인
    print(file_count("/content/drive/MyDrive/data224/meningioma/image/aug"))
    print(file_count("/content/drive/MyDrive/data224/meningioma/mask"))

708
708


In [33]:
i = cv2.imread('/content/drive/MyDrive/data224/glioma/image/1841_glioma.jpg', -1)
ia = cv2.imread('/content/drive/MyDrive/data224/glioma/image/aug/1841_aug_glioma.jpg', -1)
m = cv2.imread('/content/drive/MyDrive/data224/glioma/mask/1841_glioma.png', -1)
ma = cv2.imread('/content/drive/MyDrive/data224/glioma/mask/aug/1841_aug_glioma.png', -1)

print(i.shape, ia.shape, m.shape, ma.shape)

(224, 224) (224, 224) (224, 224) (224, 224)


In [None]:
items = [item for item in filter(lambda item: re.search("^[0-9]+\.png$", item), os.listdir("/content/drive/MyDrive/data/mask/"))]
for item in items:
    os.remove("/content/drive/MyDrive/data/mask/" + item)

In [None]:
items = [item for item in filter(lambda item: re.search("^[0-9]+\.jpg$", item), os.listdir("/content/drive/MyDrive/data/image/"))]
for item in items:
    os.remove("/content/drive/MyDrive/data/image/" + item)


In [None]:
items = [item for item in filter(lambda item: re.search("^[0-9]+_aug_glioma.jpg$", item), os.listdir("/content/drive/MyDrive/data/image/"))]
for item in items:
    img = plt.imread("/content/drive/MyDrive/data/image/" + item)
    img = resize(img, (512, 512))
    image_formatted = (img * 255 / np.max(img)).astype('uint8')
    image_jpg = Image.fromarray(image_formatted)
    image_jpg.save(os.path.join("/content/drive/MyDrive/data/image/" + item))


In [None]:
items = [item for item in filter(lambda item: re.search("^[0-9]+_aug_glioma.png$", item), os.listdir("/content/drive/MyDrive/data/mask/"))]
for item in items:
    mask = plt.imread("/content/drive/MyDrive/data/mask/" + item)
    mask = resize(mask, (96, 96))
    mask_formatted = (mask * 255 / np.max(mask)).astype('uint8')
    mask_png = Image.fromarray(mask_formatted)
    mask_png.save(os.path.join("/content/drive/MyDrive/data/mask/" + item))

In [None]:
from PIL import Image
items = [item for item in filter(lambda item: re.search("^[0-9]+_aug_glioma.png$", item), os.listdir("/content/drive/MyDrive/data/mask/"))]
img = Image.open("/content/drive/MyDrive/data/mask/" + items[0])
img_size = img.size
print(img_size)

(96, 96)
