In [11]:
from pathlib import Path
import os
import pandas as pd
import numpy as np
import openslide
from tqdm import tqdm
import cv2 as cv
from natsort import natsorted

thispath = Path.cwd().resolve()


def available_magnifications(mpp, level_downsamples):
	mpp = float(mpp)
	if (mpp<0.26):
		magnification = 40
	else:
		magnification = 20
	
	mags = []
	for l in level_downsamples:
		mags.append(magnification/l)
	
	return mags

datadir = Path("/mnt/nas4/datasets/ToReadme/ExaMode_Dataset1/AOEC")
maskdir = Path(thispath.parent / "data" / "Mask_PyHIST")

svs_files = natsorted([i for i in datadir.rglob("*.svs") if "LungAOEC" in str(i)], key=str)

labels = pd.read_csv(Path(thispath.parent / "data" / "lung_data" / "he_images.csv"))
names = natsorted(labels["file_name"].values, key=str)

he_svs_files = []
for name in names:
    for file in svs_files:
        if file.stem in name:
            he_svs_files.append(file)

# for mask, file in natsorted(zip(binary_masks, he_svs_files), key=str):
#      print(file)
#      print(mask)

print(len(he_svs_files))

header = ["level_dimensions", "level_downsamples", "magnifications", "mpp", "number_patches_PyHist"]

metadata = pd.DataFrame(columns=header)

for svs_file in tqdm(he_svs_files, desc="Metadata .csv file in progress"):
    
    number_patches = len(os.listdir(Path(maskdir / svs_file.parent.stem / svs_file.stem / f"{svs_file.stem}_tiles")))

    slide = openslide.OpenSlide(str(svs_file))

    level_dimensions = slide.level_dimensions
    mpp = slide.properties['openslide.mpp-x']
    level_downsamples = slide.level_downsamples
    mags = available_magnifications(mpp, level_downsamples)
    metadata.loc[svs_file.stem] = [level_dimensions, level_downsamples, mags, mpp, number_patches]

metadata.to_csv(f"{maskdir.parent}/metadata_slides.csv")


1323


Metadata .csv file in progress:   0%|          | 3/1323 [00:00<00:58, 22.65it/s]

000029269500258388
000029389200263815
000029488200270022
000029488200270027
000029488200270028
000029488200270029


Metadata .csv file in progress:   1%|          | 9/1323 [00:00<00:58, 22.38it/s]

000029488200270030
000029496800270407
000029496800270408
000029496800270409


Metadata .csv file in progress:   1%|          | 15/1323 [00:00<00:58, 22.35it/s]

000029496800270410
000029496800270411
000029496800270412
000029496800270413
000029496800270414
000029496800270415


Metadata .csv file in progress:   2%|▏         | 21/1323 [00:00<00:55, 23.49it/s]

000029496800270416
000029496800270417
000029496800270418
000029536100271824
000029709100281888


Metadata .csv file in progress:   2%|▏         | 24/1323 [00:01<00:55, 23.46it/s]

000029832000288268
000029919100292999
000030069800299917
000030075400300796


Metadata .csv file in progress:   2%|▏         | 30/1323 [00:01<00:56, 22.76it/s]

000030274100310453
000030303300314205
000030303300314206
000030303300314207
000030303300314209


Metadata .csv file in progress:   2%|▏         | 33/1323 [00:01<01:02, 20.67it/s]

000030303300314210
000030397000318685
000030397000318686
000030397000318688


Metadata .csv file in progress:   3%|▎         | 36/1323 [00:01<01:01, 20.96it/s]

000030397000318689
000030399700317760
000030399700317761
000030399700317762
000030399700317763


Metadata .csv file in progress:   3%|▎         | 42/1323 [00:01<01:07, 18.96it/s]

000030412700319317
000030412700319318
000030412700319319
000030412700319320
000030412700319321


Metadata .csv file in progress:   4%|▎         | 48/1323 [00:02<01:04, 19.64it/s]

000030412700319322
000030412700319323
000030438500320102
000030438500320103


Metadata .csv file in progress:   4%|▍         | 51/1323 [00:02<01:04, 19.71it/s]

000030438500320108
000030438500320109
000030438500320110
000030438500320111
000030438500320112


Metadata .csv file in progress:   4%|▍         | 57/1323 [00:02<01:00, 21.10it/s]

000030443200320380
000030466600322332
000030467500322347
000030473100323233
000030487200323363


Metadata .csv file in progress:   5%|▍         | 60/1323 [00:02<01:05, 19.41it/s]

000030487200323364
000030487200323365
000030487200323369
000030487200323370


Metadata .csv file in progress:   5%|▍         | 65/1323 [00:03<01:06, 18.96it/s]

000030487200323371
000030487200323372
000030487200323373
000030487200323374


Metadata .csv file in progress:   5%|▌         | 68/1323 [00:03<01:03, 19.80it/s]

000030490000322446
000030490000322447
000030490000322448


Metadata .csv file in progress:   5%|▌         | 72/1323 [00:03<01:01, 20.50it/s]

000030490100322318
000030490100322319
000030494900323684





KeyboardInterrupt: 