In [1]:
from PIL import Image
from multiprocessing import Pool
import os

In [2]:
def split_into_subarrays(array, num_subarrays):
    # Calculate the length of each subarray
    subarray_length = len(array) // num_subarrays

    # Use list comprehension to create subarrays
    subarrays = [array[i * subarray_length: (i + 1) * subarray_length] for i in range(num_subarrays)]

    return subarrays

In [3]:
imgs_dir = "/mnt/d/uczelnia/magister/TCGA_breast_patch"
dirs = os.listdir(imgs_dir)
len(dirs)

3318

In [4]:
def get_white_pixels_ratio(image_path):
    # Open the image
    img = Image.open(image_path)

    # Convert the image to a mode that supports transparency (e.g., RGBA)
    img = img.convert("RGBA")

    # Get the pixel data
    pixel_data = img.getdata()

    # Count the white pixels
    white_pixel_count = sum(1 for pixel in pixel_data if pixel == (255, 255, 255, 255))
    
    return [white_pixel_count/len(pixel_data), white_pixel_count, len(pixel_data)]

In [5]:
def process_single_dir(dirname: str):
    dir_path = f"{imgs_dir}/{dirname}"
    file_names = os.listdir(dir_path)
    return (dirname, [(filename, get_white_pixels_ratio(f"{dir_path}/{filename}")) for filename in file_names])

def process_dirs_batch(batch: list):
    return [process_single_dir(d) for d in batch]

In [6]:
processes=20
with Pool(processes=processes) as pool:
    res = pool.map(process_dirs_batch, split_into_subarrays(dirs, processes))

In [7]:
res

KeyboardInterrupt: 

In [8]:
csv_res = []
for batch in res:
    for dirname, dir_files in batch:
        for filename, data in dir_files:
            csv_res.append((f"{imgs_dir}/{dirname}/{filename}", data[0], data[1], data[2]))

In [10]:
import numpy as np
import pandas as pd

In [11]:
pd.DataFrame(csv_res).to_csv("white_stats.csv")

In [None]:
os.listdir(imgs_dir + "/" + dirs[0])

['patch_10_1.png',
 'patch_10_10.png',
 'patch_10_11.png',
 'patch_10_12.png',
 'patch_10_13.png',
 'patch_10_14.png',
 'patch_10_15.png',
 'patch_10_16.png',
 'patch_10_17.png',
 'patch_10_18.png',
 'patch_10_19.png',
 'patch_10_2.png',
 'patch_10_20.png',
 'patch_10_21.png',
 'patch_10_22.png',
 'patch_10_23.png',
 'patch_10_24.png',
 'patch_10_25.png',
 'patch_10_26.png',
 'patch_10_27.png',
 'patch_10_28.png',
 'patch_10_29.png',
 'patch_10_3.png',
 'patch_10_30.png',
 'patch_10_31.png',
 'patch_10_32.png',
 'patch_10_33.png',
 'patch_10_4.png',
 'patch_10_5.png',
 'patch_10_6.png',
 'patch_10_7.png',
 'patch_10_8.png',
 'patch_10_9.png',
 'patch_11_10.png',
 'patch_11_11.png',
 'patch_11_12.png',
 'patch_11_13.png',
 'patch_11_14.png',
 'patch_11_15.png',
 'patch_11_16.png',
 'patch_11_17.png',
 'patch_11_18.png',
 'patch_11_19.png',
 'patch_11_20.png',
 'patch_11_21.png',
 'patch_11_22.png',
 'patch_11_23.png',
 'patch_11_24.png',
 'patch_11_25.png',
 'patch_11_26.png',
 'patch_1