In [27]:
import difflib
import os
from typing import Dict

def findWSI(
	path_to_wsi: str, 
	thresh: int = 0, 
	iden: str = "".join([str(i) for i in range(10)]) + "_",
) -> Dict:
	"""
	Find all wsi at the base folder found at path_to_wsi
	Match the diagnoses from their respective paths
	Optionally exclude all diagnoses with fewer wsi than thresh
	
	Input: 
		path_to_wsi basefolder to wsi
		thresh minimum number of wsi per diagnose
		iden internal parameter to extract diagnose from path
	
	Output: {"slides": [wsi], "targets": [diagnoses]}
	"""

	assert os.path.exists(path_to_wsi), f"Path not found by function findWSI: {path_to_wsi=}"
	assert thresh >= 0, "Got negative threshold number for function findWSI"
	
	data = {"slides": [], "targets": []}
	
	for path, dirs, files in os.walk(path_to_wsi):
	
		print(f"{path=} {dirs=} {files=}")
	
		wsi = list(filter(lambda x: os.path.splitext(x)[1] == ".isyntax", files))
	
		if len(wsi):
			
			s = difflib.SequenceMatcher(None, path, wsi[0])
			match = next(filter(lambda x: x.b == 0, s.get_matching_blocks()))
			diagnose = path[match[0]:] + " "
			stop = min([i for i, e in enumerate(diagnose[match[2]:]) if e not in iden], default=0)
			diagnose = diagnose[:match[2] + stop]
	
			data["slides"].extend(list(map(lambda x: os.path.join(path, x), wsi)))
			data["targets"].extend([diagnose] * len(wsi)) 

	data["slides"] = [s for i, s in enumerate(data["slides"]) if data["targets"].count(data["targets"][i]) >= thresh]
	data["targets"] = [s for i, s in enumerate(data["targets"]) if data["targets"].count(data["targets"][i]) >= thresh]

	return data


In [26]:

path = '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/'
file = 'blau1_batch0_14oct2022_case039_sample1.isyntax'
s = difflib.SequenceMatcher(None, path, file)
match = next(filter(lambda x: x.b == 0, s.get_matching_blocks()))
diagnose = path[match[0]:] + " "
stop = min([i for i, e in enumerate(diagnose[match[2]:]) if e not in "".join([str(i) for i in range(10)]) + "_"], default=0)
diagnose = diagnose[:match[2] + stop]
diagnose

'blau1_0_'

In [24]:
match.b

0

In [28]:
findWSI("/mnt/hd12tb/Laaff_slides/floriansauter/")


path='/mnt/hd12tb/Laaff_slides/floriansauter/' dirs=['blau1_0_exported14oct2022', 'angkap_0_exported16aug2022', 'angmus_0_exported16sep2022', 'angkav1_0_exported01sep2022', 'blau_0_exported14oct2022', 'phillips_UFS', 'documents', 'requires_tiling', 'angkap1_0_exported10aug2022', 'angkav_0_exported30aug2022'] files=['Philips_IMS_software_manual.pdf']
path='/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022' dirs=['11', '10', '09', '01', '06', '08', '03', '05', '02', '07', '04'] files=['blau1_batch0_14oct2022_case039_sample1.isyntax', 'blau1_batch0_14oct2022_case038_sample1.isyntax', 'blau1_batch0_14oct2022_case042_sample1.isyntax', 'blau1_batch0_14oct2022_case069_sample1.isyntax', 'blau1_batch0_14oct2022_case029_sample1.isyntax', 'blau1_batch0_14oct2022_case051_sample1.isyntax', 'blau1_batch0_14oct2022_case025_sample1.isyntax', 'blau1_batch0_14oct2022_case070_sample1.isyntax', 'blau1_batch0_14oct2022_case022_sample1.isyntax', 'blau1_batch0_14oct2022_case098_sample1.isyntax

{'slides': ['/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case039_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case038_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case042_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case069_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case029_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case051_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case025_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case070_sample1.isyntax',
  '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_expo

In [4]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
d = torch.load("/mnt/hd12tb/Laaff_slides/saved_files/floriansauter2022/mil_dict_torch.pkl")
d["targets"]

[3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,


In [10]:
len(set(d["targets"]))

7

In [11]:
d["slides"]

['/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case039_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case038_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case042_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case069_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case029_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case051_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case025_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1_batch0_14oct2022_case070_sample1.isyntax',
 '/mnt/hd12tb/Laaff_slides/floriansauter/blau1_0_exported14oct2022/blau1