In [168]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tifffile import tifffile
from PIL import Image
import PIL
from skimage import color, data
from skimage.color import rgb2gray
from itertools import product
import os
from datasets import Dataset
import torch
from torch.utils.data import Dataset
from tqdm import tqdm

In [150]:
# tilepath = './data/doq_data_224/'
dir_tif = "data/doq_data/"
dir_rgb = "./data/doq_data_256/"
dir_gray = "./data/doq_data_256_gray/"

In [151]:
# tif files:
file1 = "C3311604.SES.100201791.tif"
file2 = "C4712134.NES.100228298.tif"
file3 = "O3712206.SWS.53377.tif"
file4 = "O3712208.SWS.53164.tif"
file5 = "O3712223.SES.53274.tif"
file6 = "O3712232.NWS.53174.tif"
file7 = "O3910434.NES.1137827.tif"

In [263]:
def split_images_to_tiles(filename, dir_in, dir_out, d):
    name, ext = os.path.splitext(filename)
    img = Image.open(os.path.join(dir_in, filename))
    w, h = img.size
    
    grid = product(range(0, h-h%d, d), range(0, w-w%d, d))
    for i, j in grid:
        box = (j, i, j+d, i+d)
        out = os.path.join(dir_out, f'{name}_{i}_{j}{ext}')
        img.crop(box).save(out)

    return None

## creates an array of filenames i dir_in 
def create_filename_array(dir_in):
    directory_files = os.listdir(dir_in)
    filename_array = []
    for file in directory_files:
        d = str(dir_in + "/" + file)
        filename_array.append(d)
    return filename_array

## Read RGB files from dir_in directory filenames, converts them to gray-scale, and saves as png images.
def convert_tiles_to_grayscale(filename_array, dir_in, dir_out):
    for file in filename_array:
        dir_in, fileextension =  os.path.splitext(file)    ## split directory + filename + extension
        filename = os.path.basename(dir_in)                ## read filename only 
        tile_rgb = tifffile.imread(file)                     ## read image as tifffile
        tile_gray = rgb2gray(tile_rgb)
        #tile_gray = color.rgb2gray(tile_rgb, channel_axis=-1)
        img = Image.fromarray(tile_gray*255).convert('L').save(dir_out + filename + ".png")
        
    return None

def convert_tif_tiles_to_png(filename_array, dir_out):
    for file in filename_array:
        dir_in, fileextension =  os.path.splitext(file)    ## split directory + filename + extension
        filename = os.path.basename(dir_in)                ## read filename only 
        tile_tif = tifffile.imread(file)                   ## read image as tifffile                  
        img = Image.fromarray(tile_tif).convert('L').save(dir_out + filename + ".png")
    return None

## creates an array of filenames i dir_in 
def create_gray_image_tile_array(dir_out): 
    gray_directory_files = os.listdir(dir_out)
    gray_file_names = []
    for file in gray_directory_files:
        d = str(dir_out + file)
        gray_file_names.append(d)

    ## Read gray files from dir_in directory filenames and save them as an numpy array.
    directory_array = []

    for file in gray_file_names:
        img = plt.imread(file)  ## numpy.ndarray
        directory_array.append(img)

    # torch expects type    
    # datasets.arrow_dataset.Dataset  
    return directory_array

In [264]:
# 1. splitting rgb images to RGB tiles:
# split_images_to_tiles(filename= file2, dir_in= "data/doq_data/", dir_out="data/doq_data_256/", d=256)
# split_images_to_tiles(filename= file7, dir_in= "data/doq_data/gray/", dir_out = "data/doq_data/gray_256_tif/", d=256)

In [265]:
# 2. creating filename array:
# filename_array_rgb = create_filename_array(dir_in = "data/doq_data_256")
# print(len(filename_array_rgb)) # 4785 from 7 tiffiles

# filename_array_gray = create_filename_array("data/doq_data/gray_256_tif")
print(filename_array_gray)

['data/doq_data/gray_256_tif/O3910434.NES.1137827_256_4608.tif', 'data/doq_data/gray_256_tif/O3712232.NWS.53174_6656_2816.tif', 'data/doq_data/gray_256_tif/O3712232.NWS.53174_4352_3840.tif', 'data/doq_data/gray_256_tif/O3712208.SWS.53164_4608_1024.tif', 'data/doq_data/gray_256_tif/O3712208.SWS.53164_7168_4608.tif', 'data/doq_data/gray_256_tif/O3910434.NES.1137827_6144_5376.tif', 'data/doq_data/gray_256_tif/O3712223.SES.53274_2560_4608.tif', 'data/doq_data/gray_256_tif/O3712223.SES.53274_4096_2816.tif', 'data/doq_data/gray_256_tif/O3910434.NES.1137827_1792_4096.tif', 'data/doq_data/gray_256_tif/O3712232.NWS.53174_0_1024.tif', 'data/doq_data/gray_256_tif/O3712206.SWS.53377_1024_256.tif', 'data/doq_data/gray_256_tif/O3712208.SWS.53164_6912_5376.tif', 'data/doq_data/gray_256_tif/O3712232.NWS.53174_3840_1024.tif', 'data/doq_data/gray_256_tif/O3712232.NWS.53174_1024_4352.tif', 'data/doq_data/gray_256_tif/O3712208.SWS.53164_5632_2304.tif', 'data/doq_data/gray_256_tif/O3910434.NES.1137827_1536

In [266]:

# 3. converting all images to Grayscale and saving them to a separate folder:
# convert_tiles_to_grayscale(filename_array, dir_in=dir_rgb, dir_out="./data/doq_data_gray_256/")

# 3. save gray tif files to png:
# convert_tif_tiles_to_png(filename_array_gray, dir_out="./data/doq_data_gray_256/")


In [267]:
file_array = create_gray_image_tile_array(dir_out="./data/doq_data_gray_256/")

In [268]:
np.save("gray_image_tile_array1", file_array)

In [273]:
len(file_array)

4785

In [276]:
np.max(file_array)

1.0

In [278]:
np.min(file_array)

0.0