<a href="https://colab.research.google.com/github/samilarinc/engineerdocs/blob/main/Preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Download Data

In [None]:
!gcloud auth login
!gcloud auth application-default login

In [None]:
!gsutil -m -q cp -r gs://engineerdocs/sakso .

# Import Headers

In [372]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pydicom as dicom
from tqdm import tqdm

from skimage import filters
from time import perf_counter

# Preprocess

In [265]:
# Apply Otsu's thresholding to segment the breast region

def img_crop(gray_img, kernel_size = (100, 100)):
    img = 4095 - gray_img
    thresh = filters.threshold_otsu(img)
    binary = img > thresh

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernel_size)
    binary = cv2.morphologyEx(binary.astype(np.uint8), cv2.MORPH_GRADIENT, kernel)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    breast_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(breast_contour)
    cropped_img = img[y:y+h, x:x+w]

    cropped_img = cv2.bitwise_not(cropped_img)
    return cropped_img

def img_normalize(img):
    img = (img - img.min()) / (img.max() - img.min())
    return img

def img_clahe(img, clip=2.0, tile=(8, 8)):
    clahe_create = cv2.createCLAHE(clipLimit=clip, tileGridSize=tile)
    clahe_img = clahe_create.apply(img)

    return clahe_img

def preprocess(img):
    return img_clahe(img_crop(img))
    # return img_normalize(img_clahe(img_crop(img)))

In [None]:
folder_names = os.listdir('sakso')
files = ('LCC', 'LMLO', 'RCC', 'RMLO')
# os.mkdir('processed')
for i in tqdm(folder_names):
    path = 'sakso/' + i
    for j in files:
        img_path = path + '/' + j + '.dcm'
        ds = dicom.dcmread(img_path)
        img = ds.pixel_array
        img = preprocess(img)
        np.save(f'processed/{i}_{j}.npy', img)

In [None]:
# downsampled_img = cv2.resize(img, (1024, 1024), interpolation = cv2.INTER_LANCZOS4)